llvm.org GIT mirror llvm / ea7a0c0
AMDGPU: Add a shader calling convention This makes it possible to distinguish between mesa shaders and other kernels even in the presence of compute shaders. Patch By: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Differential Revision: http://reviews.llvm.org/D18559 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265589 91177308-0d34-0410-b5e6-96231b3b80d8 Nicolai Haehnle 3 years ago
105 changed file(s) with 547 addition(s) and 605 deletion(s). Raw diff Collapse all Expand all
177177 /// which have an "optimized" convention to preserve registers.
178178 AVR_BUILTIN = 86,
179179
180 /// Calling convention used for Mesa vertex shaders.
181 AMDGPU_VS = 87,
182
183 /// Calling convention used for Mesa geometry shaders.
184 AMDGPU_GS = 88,
185
186 /// Calling convention used for Mesa pixel shaders.
187 AMDGPU_PS = 89,
188
189 /// Calling convention used for Mesa compute shaders.
190 AMDGPU_CS = 90,
191
180192 /// The highest possible calling convention ID. Must be some 2^k - 1.
181193 MaxID = 1023
182194 };
598598 KEYWORD(hhvmcc);
599599 KEYWORD(hhvm_ccc);
600600 KEYWORD(cxx_fast_tlscc);
601 KEYWORD(amdgpu_vs);
602 KEYWORD(amdgpu_gs);
603 KEYWORD(amdgpu_ps);
604 KEYWORD(amdgpu_cs);
601605
602606 KEYWORD(cc);
603607 KEYWORD(c);
16151615 /// ::= 'hhvmcc'
16161616 /// ::= 'hhvm_ccc'
16171617 /// ::= 'cxx_fast_tlscc'
1618 /// ::= 'amdgpu_vs'
1619 /// ::= 'amdgpu_tcs'
1620 /// ::= 'amdgpu_tes'
1621 /// ::= 'amdgpu_gs'
1622 /// ::= 'amdgpu_ps'
1623 /// ::= 'amdgpu_cs'
16181624 /// ::= 'cc' UINT
16191625 ///
16201626 bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
16501656 case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
16511657 case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
16521658 case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
1659 case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break;
1660 case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break;
1661 case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
1662 case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
16531663 case lltok::kw_cc: {
16541664 Lex.Lex();
16551665 return ParseUInt32(CC);
105105 kw_x86_intrcc,
106106 kw_hhvmcc, kw_hhvm_ccc,
107107 kw_cxx_fast_tlscc,
108 kw_amdgpu_vs,
109 kw_amdgpu_gs,
110 kw_amdgpu_ps,
111 kw_amdgpu_cs,
108112
109113 // Attributes:
110114 kw_attributes,
318318 case CallingConv::X86_INTR: Out << "x86_intrcc"; break;
319319 case CallingConv::HHVM: Out << "hhvmcc"; break;
320320 case CallingConv::HHVM_C: Out << "hhvm_ccc"; break;
321 case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break;
322 case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break;
323 case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break;
324 case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
321325 }
322326 }
323327
122122
123123 } // End namespace llvm
124124
125 namespace ShaderType {
126 enum Type {
127 PIXEL = 0,
128 VERTEX = 1,
129 GEOMETRY = 2,
130 COMPUTE = 3
131 };
132 }
133
134125 /// OpenCL uses address spaces to differentiate between
135126 /// various memory regions on the hardware. On the CPU
136127 /// all of the address spaces point to the same memory,
302302 unsigned RsrcReg;
303303 if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
304304 // Evergreen / Northern Islands
305 switch (MFI->getShaderType()) {
305 switch (MF.getFunction()->getCallingConv()) {
306306 default: // Fall through
307 case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
308 case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
309 case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
310 case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
307 case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
308 case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
309 case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
310 case CallingConv::AMDGPU_VS: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
311311 }
312312 } else {
313313 // R600 / R700
314 switch (MFI->getShaderType()) {
314 switch (MF.getFunction()->getCallingConv()) {
315315 default: // Fall through
316 case ShaderType::GEOMETRY: // Fall through
317 case ShaderType::COMPUTE: // Fall through
318 case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
319 case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
316 case CallingConv::AMDGPU_GS: // Fall through
317 case CallingConv::AMDGPU_CS: // Fall through
318 case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
319 case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
320320 }
321321 }
322322
326326 OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
327327 OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
328328
329 if (MFI->getShaderType() == ShaderType::COMPUTE) {
329 if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
330330 OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
331331 OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
332332 }
545545 S_00B84C_EXCP_EN(0);
546546 }
547547
548 static unsigned getRsrcReg(unsigned ShaderType) {
549 switch (ShaderType) {
548 static unsigned getRsrcReg(CallingConv::ID CallConv) {
549 switch (CallConv) {
550550 default: // Fall through
551 case ShaderType::COMPUTE: return R_00B848_COMPUTE_PGM_RSRC1;
552 case ShaderType::GEOMETRY: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
553 case ShaderType::PIXEL: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
554 case ShaderType::VERTEX: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
551 case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
552 case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
553 case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
554 case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;
555555 }
556556 }
557557
559559 const SIProgramInfo &KernelInfo) {
560560 const AMDGPUSubtarget &STM = MF.getSubtarget();
561561 const SIMachineFunctionInfo *MFI = MF.getInfo();
562 unsigned RsrcReg = getRsrcReg(MFI->getShaderType());
563
564 if (MFI->getShaderType() == ShaderType::COMPUTE) {
562 unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv());
563
564 if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
565565 OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
566566
567567 OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4);
578578 OutStreamer->EmitIntValue(RsrcReg, 4);
579579 OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) |
580580 S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4);
581 if (STM.isVGPRSpillingEnabled(MFI)) {
581 if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
582582 OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
583583 OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4);
584584 }
585585 }
586586
587 if (MFI->getShaderType() == ShaderType::PIXEL) {
587 if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
588588 OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
589589 OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
590590 OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
116116 CCIf<"static_cast"
117117 "(State.getMachineFunction().getSubtarget()).getGeneration() >="
118118 "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
119 "State.getMachineFunction().getInfo()"
120 "->getShaderType() == ShaderType::COMPUTE",
119 "!AMDGPU::isShader(State.getCallingConv())",
121120 CCDelegateTo>,
122121 CCIf<"static_cast"
123122 "(State.getMachineFunction().getSubtarget()).getGeneration() < "
124123 "AMDGPUSubtarget::SOUTHERN_ISLANDS && "
125 "State.getMachineFunction().getInfo()"
126 "->getShaderType() == ShaderType::COMPUTE",
124 "!AMDGPU::isShader(State.getCallingConv())",
127125 CCDelegateTo>,
128126 CCIf<"static_cast"
129127 "(State.getMachineFunction().getSubtarget()).getGeneration() >= "
99
1010 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
1111 MachineFunctionInfo(),
12 ShaderType(ShaderType::COMPUTE),
1312 LDSSize(0),
1413 ABIArgOffset(0),
1514 ScratchSize(0),
1615 IsKernel(true) {
17
18 ShaderType = AMDGPU::getShaderType(*MF.getFunction());
1916 }
1616
1717 class AMDGPUMachineFunction : public MachineFunctionInfo {
1818 virtual void anchor();
19 unsigned ShaderType;
2019
2120 public:
2221 AMDGPUMachineFunction(const MachineFunction &MF);
2928 /// Start of implicit kernel args
3029 unsigned ABIArgOffset;
3130
32 unsigned getShaderType() const {
33 return ShaderType;
34 }
35
3631 bool isKernel() const {
3732 // FIXME: Assume everything is a kernel until function calls are supported.
3833 return true;
135135 return AMDGPU::getIsaVersion(getFeatureBits());
136136 }
137137
138 bool AMDGPUSubtarget::isVGPRSpillingEnabled(
139 const SIMachineFunctionInfo *MFI) const {
140 return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
138 bool AMDGPUSubtarget::isVGPRSpillingEnabled(const Function& F) const {
139 return !AMDGPU::isShader(F.getCallingConv()) || EnableVGPRSpilling;
141140 }
142141
143142 void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
304304 bool isAmdHsaOS() const {
305305 return TargetTriple.getOS() == Triple::AMDHSA;
306306 }
307 bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
307 bool isVGPRSpillingEnabled(const Function& F) const;
308308
309309 bool isXNACKEnabled() const {
310310 return EnableXNACK;
264264
265265 static bool isArgPassedInSGPR(const Argument *A) {
266266 const Function *F = A->getParent();
267 unsigned ShaderType = AMDGPU::getShaderType(*F);
268267
269268 // Arguments to compute shaders are never a source of divergence.
270 if (ShaderType == ShaderType::COMPUTE)
269 if (!AMDGPU::isShader(F->getCallingConv()))
271270 return true;
272271
273272 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
4545 unsigned CurrentEntries;
4646 unsigned CurrentSubEntries;
4747
48 CFStack(const AMDGPUSubtarget *st, unsigned ShaderType) : ST(st),
48 CFStack(const AMDGPUSubtarget *st, CallingConv::ID cc) : ST(st),
4949 // We need to reserve a stack entry for CALL_FS in vertex shaders.
50 MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
50 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
5151 CurrentEntries(0), CurrentSubEntries(0) { }
5252
5353 unsigned getLoopDepth();
477477 TRI = static_cast(ST->getRegisterInfo());
478478 R600MachineFunctionInfo *MFI = MF.getInfo();
479479
480 CFStack CFStack(ST, MFI->getShaderType());
480 CFStack CFStack(ST, MF.getFunction()->getCallingConv());
481481 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
482482 ++MB) {
483483 MachineBasicBlock &MBB = *MB;
484484 unsigned CfCount = 0;
485485 std::vector > > LoopStack;
486486 std::vector IfThenElseStack;
487 if (MFI->getShaderType() == ShaderType::VERTEX) {
487 if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
488488 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
489489 getHWInstrDesc(CF_CALL_FS));
490490 CfCount++;
17581758 MemVT = MemVT.getVectorElementType();
17591759 }
17601760
1761 if (MFI->getShaderType() != ShaderType::COMPUTE) {
1761 if (AMDGPU::isShader(CallConv)) {
17621762 unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
17631763 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
17641764 InVals.push_back(Register);
203203
204204 bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
205205 const MachineFunction *MF = MI->getParent()->getParent();
206 const R600MachineFunctionInfo *MFI = MF->getInfo();
207 return MFI->getShaderType() != ShaderType::COMPUTE &&
206 return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
208207 usesVertexCache(MI->getOpcode());
209208 }
210209
214213
215214 bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
216215 const MachineFunction *MF = MI->getParent()->getParent();
217 const R600MachineFunctionInfo *MFI = MF->getInfo();
218 return (MFI->getShaderType() == ShaderType::COMPUTE &&
216 return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
219217 usesVertexCache(MI->getOpcode())) ||
220218 usesTextureCache(MI->getOpcode());
221219 }
605605 SIMachineFunctionInfo *Info = MF.getInfo();
606606 const AMDGPUSubtarget &ST = MF.getSubtarget();
607607
608 if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
608 if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
609609 const Function *Fn = MF.getFunction();
610610 DiagnosticInfoUnsupported NoGraphicsHSA(
611611 *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
613613 return SDValue();
614614 }
615615
616 // FIXME: We currently assume all calling conventions are kernels.
617
618616 SmallVector Splits;
619617 BitVector Skipped(Ins.size());
620618
622620 const ISD::InputArg &Arg = Ins[i];
623621
624622 // First check if it's a PS input addr
625 if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
623 if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() &&
626624 !Arg.Flags.isByVal() && PSInputNum <= 15) {
627625
628626 if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
640638 }
641639
642640 // Second split vertices into their elements
643 if (Info->getShaderType() != ShaderType::COMPUTE && Arg.VT.isVector()) {
641 if (AMDGPU::isShader(CallConv) &&
642 Arg.VT.isVector()) {
644643 ISD::InputArg NewArg = Arg;
645644 NewArg.Flags.setSplit();
646645 NewArg.VT = Arg.VT.getVectorElementType();
656655 NewArg.PartOffset += NewArg.VT.getStoreSize();
657656 }
658657
659 } else if (Info->getShaderType() != ShaderType::COMPUTE) {
658 } else if (AMDGPU::isShader(CallConv)) {
660659 Splits.push_back(Arg);
661660 }
662661 }
677676 // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
678677 // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
679678 // enabled too.
680 if (Info->getShaderType() == ShaderType::PIXEL &&
679 if (CallConv == CallingConv::AMDGPU_PS &&
681680 ((Info->getPSInputAddr() & 0x7F) == 0 ||
682681 ((Info->getPSInputAddr() & 0xF) == 0 &&
683682 Info->isPSInputAllocated(11)))) {
687686 Info->PSInputEna |= 1;
688687 }
689688
690 if (Info->getShaderType() == ShaderType::COMPUTE) {
689 if (!AMDGPU::isShader(CallConv)) {
691690 getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
692691 Splits);
693692 }
931930 MachineFunction &MF = DAG.getMachineFunction();
932931 SIMachineFunctionInfo *Info = MF.getInfo();
933932
934 if (Info->getShaderType() == ShaderType::COMPUTE)
933 if (!AMDGPU::isShader(CallConv))
935934 return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
936935 OutVals, DL, DAG);
937936
595595 return;
596596 }
597597
598 if (!ST.isVGPRSpillingEnabled(MFI)) {
598 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
599599 LLVMContext &Ctx = MF->getFunction()->getContext();
600600 Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
601601 " spill register");
681681 return;
682682 }
683683
684 if (!ST.isVGPRSpillingEnabled(MFI)) {
684 if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) {
685685 LLVMContext &Ctx = MF->getFunction()->getContext();
686686 Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
687687 " restore register");
727727 return TIDReg;
728728
729729
730 if (MFI->getShaderType() == ShaderType::COMPUTE &&
730 if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) &&
731731 WorkGroupSize > WavefrontSize) {
732732
733733 unsigned TIDIGXReg
168168 MachineBasicBlock &MBB = *MI.getParent();
169169 DebugLoc DL = MI.getDebugLoc();
170170
171 if (MBB.getParent()->getInfo()->getShaderType() !=
172 ShaderType::PIXEL ||
171 if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
173172 !shouldSkip(&MBB, &MBB.getParent()->back()))
174173 return;
175174
327326 const MachineOperand &Op = MI.getOperand(0);
328327
329328 #ifndef NDEBUG
330 const SIMachineFunctionInfo *MFI
331 = MBB.getParent()->getInfo();
329 CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
332330 // Kill is only allowed in pixel / geometry shaders.
333 assert(MFI->getShaderType() == ShaderType::PIXEL ||
334 MFI->getShaderType() == ShaderType::GEOMETRY);
331 assert(CallConv == CallingConv::AMDGPU_PS ||
332 CallConv == CallingConv::AMDGPU_GS);
335333 #endif
336334
337335 // Clear this thread from the exec mask if the operand is negative
7979
8080 const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
8181
82 if (getShaderType() == ShaderType::COMPUTE)
82 if (!AMDGPU::isShader(F->getCallingConv()))
8383 KernargSegmentPtr = true;
8484
8585 if (F->hasFnAttribute("amdgpu-work-group-id-y"))
9999 if (WorkItemIDZ)
100100 WorkItemIDY = true;
101101
102 bool MaySpill = ST.isVGPRSpillingEnabled(this);
102 bool MaySpill = ST.isVGPRSpillingEnabled(*F);
103103 bool HasStackObjects = FrameInfo->hasStackObjects();
104104
105105 if (HasStackObjects || MaySpill)
201201 const AMDGPUSubtarget &ST = MF.getSubtarget();
202202 // FIXME: We should get this information from kernel attributes if it
203203 // is available.
204 return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
205 }
204 if (AMDGPU::isCompute(MF.getFunction()->getCallingConv()))
205 return 256;
206 return ST.getWavefrontSize();
207 }
6161 }
6262
6363 bool SITypeRewriter::runOnFunction(Function &F) {
64 if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE)
64 if (!AMDGPU::isShader(F.getCallingConv()))
6565 return false;
6666
6767 visit(F);
424424 }
425425
426426 bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
427 SIMachineFunctionInfo *MFI = MF.getInfo();
428
429 if (MFI->getShaderType() != ShaderType::PIXEL)
427 if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
430428 return false;
431429
432430 Instructions.clear();
123123 return Result;
124124 }
125125
126 unsigned getShaderType(const Function &F) {
127 return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
126 unsigned getInitialPSInputAddr(const Function &F) {
127 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
128128 }
129129
130 unsigned getInitialPSInputAddr(const Function &F) {
131 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
130 bool isShader(CallingConv::ID cc) {
131 switch(cc) {
132 case CallingConv::AMDGPU_VS:
133 case CallingConv::AMDGPU_GS:
134 case CallingConv::AMDGPU_PS:
135 case CallingConv::AMDGPU_CS:
136 return true;
137 default:
138 return false;
139 }
140 }
141
142 bool isCompute(CallingConv::ID cc) {
143 return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
132144 }
133145
134146 bool isSI(const MCSubtargetInfo &STI) {
1010 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1111
1212 #include "AMDKernelCodeT.h"
13 #include "llvm/IR/CallingConv.h"
1314
1415 namespace llvm {
1516
4344 bool isGlobalSegment(const GlobalValue *GV);
4445 bool isReadOnlySegment(const GlobalValue *GV);
4546
46 unsigned getShaderType(const Function &F);
4747 unsigned getInitialPSInputAddr(const Function &F);
4848
49 bool isShader(CallingConv::ID cc);
50 bool isCompute(CallingConv::ID cc);
4951
5052 bool isSI(const MCSubtargetInfo &STI);
5153 bool isCI(const MCSubtargetInfo &STI);
88 ; CHECK: DIVERGENT: float %arg5
99 ; CHECK: DIVERGENT: i32 %arg6
1010
11 define void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
11 define cc 87 void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
1212 ret void
1313 }
1414
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3
4 ; GCN-LABEL: {{^}}shader_cc:
5 ; GCN: v_add_i32_e32 v0, vcc, s8, v0
6 define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
7 %vi = bitcast float %v to i32
8 %x = add i32 %vi, %w
9 %xf = bitcast i32 %x to float
10 ret float %xf
11 }
12
13 ; GCN-LABEL: {{^}}kernel_cc:
14 ; GCN: s_endpgm
15 define float @kernel_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
16 %vi = bitcast float %v to i32
17 %x = add i32 %vi, %w
18 %xf = bitcast i32 %x to float
19 ret float %xf
20 }
22 ; This test ensures that R600 backend can handle ifcvt properly
33 ; and do not generate ALU clauses with more than 128 instructions.
44
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #1 {
5 define amdgpu_ps void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) {
66 main_body:
77 %0 = extractelement <4 x float> %reg0, i32 0
88 %1 = extractelement <4 x float> %reg0, i32 1
12961296 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
12971297
12981298 attributes #0 = { alwaysinline nounwind readnone }
1299 attributes #1 = { "ShaderType"="0" }
13001299 attributes #2 = { readnone }
13011300 attributes #3 = { nounwind readnone }
13021301 attributes #4 = { readonly }
66
77 ; FUNC-LABEL: {{^}}v32i8_to_v8i32:
88 ; SI: s_endpgm
9 define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
9 define amdgpu_ps void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
1010 entry:
1111 %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
1212 %2 = bitcast <32 x i8> %1 to <8 x i32>
7474 store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
7575 ret void
7676 }
77
78 attributes #0 = { "ShaderType"="0" }
99 ; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
1010
1111
12 define void @call_fs() #0 {
12 define amdgpu_vs void @call_fs() {
1313 ret void
1414 }
15
16 attributes #0 = { "ShaderType"="1" } ; Vertex Shader
77 ; CHECK-NOT: ALU_PUSH_BEFORE
88 ; CHECK: END_LOOP
99 ; CHECK: END_LOOP
10 define void @main (<4 x float> inreg %reg0) #0 {
10 define amdgpu_ps void @main (<4 x float> inreg %reg0) {
1111 entry:
1212 br label %outer_loop
1313 outer_loop:
2727 exit:
2828 ret void
2929 }
30
31 attributes #0 = { "ShaderType"="0" }
33 ; GCN-LABEL: {{^}}main:
44 ; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
55 ; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
6 define void @main() #0 {
6 define amdgpu_ps void @main() #0 {
77 bb:
88 %tmp = fptosi float undef to i32
99 %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2424 declare i32 @llvm.SI.packf16(float, float) #1
2525 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
2626
27 attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
27 attributes #0 = { "enable-no-nans-fp-math"="true" }
2828 attributes #1 = { nounwind readnone }
11
22 ; CHECK: {{^}}main:
33 ; CHECK-NOT: MOV
4 define void @main(<4 x float> inreg %reg0) #0 {
4 define amdgpu_ps void @main(<4 x float> inreg %reg0) {
55 entry:
66 %0 = extractelement <4 x float> %reg0, i32 0
77 %1 = call float @fabs(float %0)
1414
1515 declare float @fabs(float ) readnone
1616 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
17
18 attributes #0 = { "ShaderType"="0" }
2323 ; TONGA-NEXT: .long 576
2424 ; CONFIG: .p2align 8
2525 ; CONFIG: test:
26 define void @test(i32 %p) #0 {
26 define amdgpu_ps void @test(i32 %p) {
2727 %i = add i32 %p, 2
2828 %r = bitcast i32 %i to float
2929 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
3131 }
3232
3333 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
34
35 attributes #0 = { "ShaderType"="0" } ; Pixel Shader
66 ; CHECK: Fetch clause
77 ; CHECK: Fetch clause
88
9 define void @fetch_limits_r600() #0 {
9 define amdgpu_ps void @fetch_limits_r600() {
1010 entry:
1111 %0 = load <4 x float>, <4 x float> addrspace(8)* null
1212 %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
4141 ret void
4242 }
4343
44 attributes #0 = { "ShaderType"="0" } ; Pixel Shader
45
4644 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
4745 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1515 ; CHECK: Fetch clause
1616 ; CHECK: Fetch clause
1717
18 define void @fetch_limits_r700() #0 {
18 define amdgpu_ps void @fetch_limits_r700() {
1919 entry:
2020 %0 = load <4 x float>, <4 x float> addrspace(8)* null
2121 %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
7474 ret void
7575 }
7676
77 attributes #0 = { "ShaderType"="0" } ; Pixel Shader
78
7977 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
8078 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
0 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s
11
22 ; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3 define void @test(<4 x float> inreg %reg0) #0 {
3 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
44 %r0 = extractelement <4 x float> %reg0, i32 0
55 %r1 = call float @floor(float %r0)
66 %vec = insertelement <4 x float> undef, float %r1, i32 0
1111 declare float @floor(float) readonly
1212 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
1313
14 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test(<4 x float> inreg %reg0) #0 {
4 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
55 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = extractelement <4 x float> %reg0, i32 2
1414
1515 declare float @fabs(float ) readnone
1616 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
17
18 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test(<4 x float> inreg %reg0) #0 {
4 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
55 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = fcmp oge float %r0, %r1
1212 }
1313
1414 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
15
16 attributes #0 = { "ShaderType"="0" }
11
22 ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test(<4 x float> inreg %reg0) #0 {
4 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
55 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = extractelement <4 x float> %reg0, i32 1
77 %r2 = fcmp uge float %r0, %r1
1212 }
1313
1414 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
15
16 attributes #0 = { "ShaderType"="0" }
1313 ; CHECK: {{^}}inline_asm_shader:
1414 ; CHECK: s_endpgm
1515 ; CHECK: s_endpgm
16 define void @inline_asm_shader() #0 {
16 define amdgpu_ps void @inline_asm_shader() {
1717 entry:
1818 call void asm sideeffect "s_endpgm", ""()
1919 ret void
2020 }
21
22 attributes #0 = { "ShaderType"="0" }
2321
2422
2523 ; CHECK: {{^}}branch_on_asm:
88 ;CM: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
99 ;CM: EXP_IEEE * T{{[0-9]+}}.W (MASKED), -|T{{[0-9]+}}.X|
1010
11 define void @test(<4 x float> inreg %reg0) #0 {
11 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
1212 %r0 = extractelement <4 x float> %reg0, i32 0
1313 %r1 = call float @llvm.fabs.f32(float %r0)
1414 %r2 = fsub float -0.000000e+00, %r1
2121 declare float @llvm.exp2.f32(float) readnone
2222 declare float @llvm.fabs.f32(float) readnone
2323 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
24
25 attributes #0 = { "ShaderType"="0" }
33 ; CHECK: EXPORT
44 ; CHECK-NOT: EXPORT
55
6 define void @main() #0 {
6 define amdgpu_ps void @main() {
77 main_body:
88 %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
99 %1 = extractelement <4 x float> %0, i32 0
4747 }
4848
4949 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
50
51 attributes #0 = { "ShaderType"="0" }
1111 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
1212
1313 ; ALL: ; ScratchSize: 32772
14 define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
14 define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
1515 %large = alloca [8192 x i32], align 4
1616 %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
1717 store volatile i32 %x, i32* %gep
3232 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
3333
3434 ; ALL: ; ScratchSize: 32772
35 define void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #1 {
35 define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
3636 %large = alloca [8192 x i32], align 4
3737 %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
3838 store volatile i32 %x, i32* %gep
4343 }
4444
4545 attributes #0 = { nounwind }
46 attributes #1 = { nounwind "ShaderType"="0" }
44 ; CHECK: CUBE T{{[0-9]}}.Y
55 ; CHECK: CUBE T{{[0-9]}}.Z
66 ; CHECK: CUBE * T{{[0-9]}}.W
7 define void @cube() #0 {
7 define amdgpu_ps void @cube() {
88 main_body:
99 %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
1010 %1 = extractelement <4 x float> %0, i32 3
4242 }
4343
4444 ; Function Attrs: readnone
45 declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #1
45 declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
4646
4747 ; Function Attrs: readnone
48 declare float @fabs(float) #1
48 declare float @fabs(float) #0
4949
5050 ; Function Attrs: readnone
51 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1
51 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #0
5252
5353 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
5454
55 attributes #0 = { "ShaderType"="0" }
56 attributes #1 = { readnone }
55 attributes #0 = { readnone }
5756
44 ; SI-NOT: v_cmpx_le_f32
55 ; SI: s_mov_b64 exec, 0
66
7 define void @kill_gs_const() #0 {
7 define amdgpu_gs void @kill_gs_const() {
88 main_body:
99 %0 = icmp ule i32 0, 3
1010 %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
2020 ; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
2121 ; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
2222 ; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
23 define void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #1 {
23 define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
2424 entry:
2525 %tmp0 = fcmp olt float %13, 0.0
2626 call void @llvm.AMDGPU.kill(float %14)
3232 declare void @llvm.AMDGPU.kill(float)
3333 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
3434
35 attributes #0 = { "ShaderType"="2" }
36 attributes #1 = { "ShaderType"="0" }
37
3835 !0 = !{!"const", null, i32 1}
99 ;GCN: v_interp_p1_f32
1010 ;GCN: v_interp_p2_f32
1111
12 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
12 define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
1313 main_body:
1414 %5 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
1515 %6 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %4)
2424 ; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
2525 ; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
2626
27 define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
27 define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) {
2828 main_body:
2929 %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
3030 %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
4141 }
4242
4343 ; Function Attrs: readnone
44 declare float @fabs(float) #2
44 declare float @fabs(float) #1
4545
4646 ; Function Attrs: nounwind readnone
47 declare i32 @llvm.SI.packf16(float, float) #1
47 declare i32 @llvm.SI.packf16(float, float) #0
4848
4949 ; Function Attrs: nounwind readnone
50 declare float @llvm.SI.fs.constant(i32, i32, i32) #1
50 declare float @llvm.SI.fs.constant(i32, i32, i32) #0
5151
5252 ; Function Attrs: nounwind readnone
53 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
53 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #0
5454
5555 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
5656
57 attributes #0 = { "ShaderType"="0" }
58 attributes #1 = { nounwind readnone }
59 attributes #2 = { readnone }
57 attributes #0 = { nounwind readnone }
58 attributes #1 = { readnone }
22
33 ;CHECK-LABEL: {{^}}gather4_v2:
44 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
5 define void @gather4_v2() #0 {
5 define amdgpu_ps void @gather4_v2() {
66 main_body:
77 %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
88 %r0 = extractelement <4 x float> %r, i32 0
1515
1616 ;CHECK-LABEL: {{^}}gather4:
1717 ;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
18 define void @gather4() #0 {
18 define amdgpu_ps void @gather4() {
1919 main_body:
2020 %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
2121 %r0 = extractelement <4 x float> %r, i32 0
2828
2929 ;CHECK-LABEL: {{^}}gather4_cl:
3030 ;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
31 define void @gather4_cl() #0 {
31 define amdgpu_ps void @gather4_cl() {
3232 main_body:
3333 %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
3434 %r0 = extractelement <4 x float> %r, i32 0
4141
4242 ;CHECK-LABEL: {{^}}gather4_l:
4343 ;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
44 define void @gather4_l() #0 {
44 define amdgpu_ps void @gather4_l() {
4545 main_body:
4646 %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
4747 %r0 = extractelement <4 x float> %r, i32 0
5454
5555 ;CHECK-LABEL: {{^}}gather4_b:
5656 ;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
57 define void @gather4_b() #0 {
57 define amdgpu_ps void @gather4_b() {
5858 main_body:
5959 %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
6060 %r0 = extractelement <4 x float> %r, i32 0
6767
6868 ;CHECK-LABEL: {{^}}gather4_b_cl:
6969 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
70 define void @gather4_b_cl() #0 {
70 define amdgpu_ps void @gather4_b_cl() {
7171 main_body:
7272 %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
7373 %r0 = extractelement <4 x float> %r, i32 0
8080
8181 ;CHECK-LABEL: {{^}}gather4_b_cl_v8:
8282 ;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
83 define void @gather4_b_cl_v8() #0 {
83 define amdgpu_ps void @gather4_b_cl_v8() {
8484 main_body:
8585 %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
8686 %r0 = extractelement <4 x float> %r, i32 0
9393
9494 ;CHECK-LABEL: {{^}}gather4_lz_v2:
9595 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
96 define void @gather4_lz_v2() #0 {
96 define amdgpu_ps void @gather4_lz_v2() {
9797 main_body:
9898 %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
9999 %r0 = extractelement <4 x float> %r, i32 0
106106
107107 ;CHECK-LABEL: {{^}}gather4_lz:
108108 ;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
109 define void @gather4_lz() #0 {
109 define amdgpu_ps void @gather4_lz() {
110110 main_body:
111111 %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
112112 %r0 = extractelement <4 x float> %r, i32 0
121121
122122 ;CHECK-LABEL: {{^}}gather4_o:
123123 ;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
124 define void @gather4_o() #0 {
124 define amdgpu_ps void @gather4_o() {
125125 main_body:
126126 %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
127127 %r0 = extractelement <4 x float> %r, i32 0
134134
135135 ;CHECK-LABEL: {{^}}gather4_cl_o:
136136 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
137 define void @gather4_cl_o() #0 {
137 define amdgpu_ps void @gather4_cl_o() {
138138 main_body:
139139 %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
140140 %r0 = extractelement <4 x float> %r, i32 0
147147
148148 ;CHECK-LABEL: {{^}}gather4_cl_o_v8:
149149 ;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
150 define void @gather4_cl_o_v8() #0 {
150 define amdgpu_ps void @gather4_cl_o_v8() {
151151 main_body:
152152 %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
153153 %r0 = extractelement <4 x float> %r, i32 0
160160
161161 ;CHECK-LABEL: {{^}}gather4_l_o:
162162 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
163 define void @gather4_l_o() #0 {
163 define amdgpu_ps void @gather4_l_o() {
164164 main_body:
165165 %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
166166 %r0 = extractelement <4 x float> %r, i32 0
173173
174174 ;CHECK-LABEL: {{^}}gather4_l_o_v8:
175175 ;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
176 define void @gather4_l_o_v8() #0 {
176 define amdgpu_ps void @gather4_l_o_v8() {
177177 main_body:
178178 %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
179179 %r0 = extractelement <4 x float> %r, i32 0
186186
187187 ;CHECK-LABEL: {{^}}gather4_b_o:
188188 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
189 define void @gather4_b_o() #0 {
189 define amdgpu_ps void @gather4_b_o() {
190190 main_body:
191191 %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
192192 %r0 = extractelement <4 x float> %r, i32 0
199199
200200 ;CHECK-LABEL: {{^}}gather4_b_o_v8:
201201 ;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
202 define void @gather4_b_o_v8() #0 {
202 define amdgpu_ps void @gather4_b_o_v8() {
203203 main_body:
204204 %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
205205 %r0 = extractelement <4 x float> %r, i32 0
212212
213213 ;CHECK-LABEL: {{^}}gather4_b_cl_o:
214214 ;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
215 define void @gather4_b_cl_o() #0 {
215 define amdgpu_ps void @gather4_b_cl_o() {
216216 main_body:
217217 %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
218218 %r0 = extractelement <4 x float> %r, i32 0
225225
226226 ;CHECK-LABEL: {{^}}gather4_lz_o:
227227 ;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
228 define void @gather4_lz_o() #0 {
228 define amdgpu_ps void @gather4_lz_o() {
229229 main_body:
230230 %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
231231 %r0 = extractelement <4 x float> %r, i32 0
240240
241241 ;CHECK-LABEL: {{^}}gather4_c:
242242 ;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
243 define void @gather4_c() #0 {
243 define amdgpu_ps void @gather4_c() {
244244 main_body:
245245 %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
246246 %r0 = extractelement <4 x float> %r, i32 0
253253
254254 ;CHECK-LABEL: {{^}}gather4_c_cl:
255255 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
256 define void @gather4_c_cl() #0 {
256 define amdgpu_ps void @gather4_c_cl() {
257257 main_body:
258258 %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
259259 %r0 = extractelement <4 x float> %r, i32 0
266266
267267 ;CHECK-LABEL: {{^}}gather4_c_cl_v8:
268268 ;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
269 define void @gather4_c_cl_v8() #0 {
269 define amdgpu_ps void @gather4_c_cl_v8() {
270270 main_body:
271271 %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
272272 %r0 = extractelement <4 x float> %r, i32 0
279279
280280 ;CHECK-LABEL: {{^}}gather4_c_l:
281281 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
282 define void @gather4_c_l() #0 {
282 define amdgpu_ps void @gather4_c_l() {
283283 main_body:
284284 %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
285285 %r0 = extractelement <4 x float> %r, i32 0
292292
293293 ;CHECK-LABEL: {{^}}gather4_c_l_v8:
294294 ;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
295 define void @gather4_c_l_v8() #0 {
295 define amdgpu_ps void @gather4_c_l_v8() {
296296 main_body:
297297 %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
298298 %r0 = extractelement <4 x float> %r, i32 0
305305
306306 ;CHECK-LABEL: {{^}}gather4_c_b:
307307 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
308 define void @gather4_c_b() #0 {
308 define amdgpu_ps void @gather4_c_b() {
309309 main_body:
310310 %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
311311 %r0 = extractelement <4 x float> %r, i32 0
318318
319319 ;CHECK-LABEL: {{^}}gather4_c_b_v8:
320320 ;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
321 define void @gather4_c_b_v8() #0 {
321 define amdgpu_ps void @gather4_c_b_v8() {
322322 main_body:
323323 %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
324324 %r0 = extractelement <4 x float> %r, i32 0
331331
332332 ;CHECK-LABEL: {{^}}gather4_c_b_cl:
333333 ;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
334 define void @gather4_c_b_cl() #0 {
334 define amdgpu_ps void @gather4_c_b_cl() {
335335 main_body:
336336 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
337337 %r0 = extractelement <4 x float> %r, i32 0
344344
345345 ;CHECK-LABEL: {{^}}gather4_c_lz:
346346 ;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
347 define void @gather4_c_lz() #0 {
347 define amdgpu_ps void @gather4_c_lz() {
348348 main_body:
349349 %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
350350 %r0 = extractelement <4 x float> %r, i32 0
359359
360360 ;CHECK-LABEL: {{^}}gather4_c_o:
361361 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
362 define void @gather4_c_o() #0 {
362 define amdgpu_ps void @gather4_c_o() {
363363 main_body:
364364 %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
365365 %r0 = extractelement <4 x float> %r, i32 0
372372
373373 ;CHECK-LABEL: {{^}}gather4_c_o_v8:
374374 ;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
375 define void @gather4_c_o_v8() #0 {
375 define amdgpu_ps void @gather4_c_o_v8() {
376376 main_body:
377377 %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
378378 %r0 = extractelement <4 x float> %r, i32 0
385385
386386 ;CHECK-LABEL: {{^}}gather4_c_cl_o:
387387 ;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
388 define void @gather4_c_cl_o() #0 {
388 define amdgpu_ps void @gather4_c_cl_o() {
389389 main_body:
390390 %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
391391 %r0 = extractelement <4 x float> %r, i32 0
398398
399399 ;CHECK-LABEL: {{^}}gather4_c_l_o:
400400 ;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
401 define void @gather4_c_l_o() #0 {
401 define amdgpu_ps void @gather4_c_l_o() {
402402 main_body:
403403 %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
404404 %r0 = extractelement <4 x float> %r, i32 0
411411
412412 ;CHECK-LABEL: {{^}}gather4_c_b_o:
413413 ;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
414 define void @gather4_c_b_o() #0 {
414 define amdgpu_ps void @gather4_c_b_o() {
415415 main_body:
416416 %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
417417 %r0 = extractelement <4 x float> %r, i32 0
424424
425425 ;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
426426 ;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
427 define void @gather4_c_b_cl_o() #0 {
427 define amdgpu_ps void @gather4_c_b_cl_o() {
428428 main_body:
429429 %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
430430 %r0 = extractelement <4 x float> %r, i32 0
437437
438438 ;CHECK-LABEL: {{^}}gather4_c_lz_o:
439439 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
440 define void @gather4_c_lz_o() #0 {
440 define amdgpu_ps void @gather4_c_lz_o() {
441441 main_body:
442442 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
443443 %r0 = extractelement <4 x float> %r, i32 0
450450
451451 ;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
452452 ;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x1 da
453 define void @gather4_c_lz_o_v8() #0 {
453 define amdgpu_ps void @gather4_c_lz_o_v8() {
454454 main_body:
455455 %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
456456 %r0 = extractelement <4 x float> %r, i32 0
463463
464464
465465
466 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
467 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
468 declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
469 declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
470 declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
471 declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
472 declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
473 declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
474 declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
475
476 declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
477 declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
478 declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
479 declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
480 declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
481 declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
482 declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
483 declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
484 declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
485
486 declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
487 declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
488 declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
489 declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
490 declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
491 declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
492 declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
493 declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
494 declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
495
496 declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
497 declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
498 declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
499 declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
500 declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
501 declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
502 declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
503 declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
466 declare <4 x float> @llvm.SI.gather4.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
467 declare <4 x float> @llvm.SI.gather4.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
468 declare <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
469 declare <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
470 declare <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
471 declare <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
472 declare <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
473 declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
474 declare <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
475
476 declare <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
477 declare <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
478 declare <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
479 declare <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
480 declare <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
481 declare <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
482 declare <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
483 declare <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
484 declare <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
485
486 declare <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
487 declare <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
488 declare <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
489 declare <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
490 declare <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
491 declare <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
492 declare <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
493 declare <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
494 declare <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
495
496 declare <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
497 declare <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
498 declare <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
499 declare <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
500 declare <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
501 declare <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
502 declare <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
503 declare <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
504504
505505 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
506506
507 attributes #0 = { "ShaderType"="0" }
508 attributes #1 = { nounwind readnone }
507 attributes #0 = { nounwind readnone }
22
33 ;CHECK-LABEL: {{^}}getlod:
44 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
5 define void @getlod() #0 {
5 define amdgpu_ps void @getlod() {
66 main_body:
77 %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
88 %r0 = extractelement <4 x float> %r, i32 0
1313
1414 ;CHECK-LABEL: {{^}}getlod_v2:
1515 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
16 define void @getlod_v2() #0 {
16 define amdgpu_ps void @getlod_v2() {
1717 main_body:
1818 %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
1919 %r0 = extractelement <4 x float> %r, i32 0
2424
2525 ;CHECK-LABEL: {{^}}getlod_v4:
2626 ;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x3 da
27 define void @getlod_v4() #0 {
27 define amdgpu_ps void @getlod_v4() {
2828 main_body:
2929 %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
3030 %r0 = extractelement <4 x float> %r, i32 0
3434 }
3535
3636
37 declare <4 x float> @llvm.SI.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
38 declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
39 declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
37 declare <4 x float> @llvm.SI.getlod.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
38 declare <4 x float> @llvm.SI.getlod.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
39 declare <4 x float> @llvm.SI.getlod.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
4040
4141 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
4242
43 attributes #0 = { "ShaderType"="0" }
44 attributes #1 = { nounwind readnone }
43 attributes #0 = { nounwind readnone }
22
33 ;CHECK-LABEL: {{^}}image_load:
44 ;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
5 define void @image_load() #0 {
5 define amdgpu_ps void @image_load() {
66 main_body:
77 %r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
88 %r0 = extractelement <4 x float> %r, i32 0
1515
1616 ;CHECK-LABEL: {{^}}image_load_mip:
1717 ;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
18 define void @image_load_mip() #0 {
18 define amdgpu_ps void @image_load_mip() {
1919 main_body:
2020 %r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2121 %r0 = extractelement <4 x float> %r, i32 0
2828
2929 ;CHECK-LABEL: {{^}}getresinfo:
3030 ;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
31 define void @getresinfo() #0 {
31 define amdgpu_ps void @getresinfo() {
3232 main_body:
3333 %r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
3434 %r0 = extractelement <4 x float> %r, i32 0
3939 ret void
4040 }
4141
42 declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
43 declare <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
44 declare <4 x float> @llvm.SI.getresinfo.i32(i32, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
42 declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
43 declare <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
44 declare <4 x float> @llvm.SI.getresinfo.i32(i32, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
4545
4646 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
4747
48 attributes #0 = { "ShaderType"="0" }
49 attributes #1 = { nounwind readnone }
48 attributes #0 = { nounwind readnone }
22
33 ; CHECK-LABEL: {{^}}v1:
44 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xd
5 define void @v1(i32 %a1) #0 {
5 define amdgpu_ps void @v1(i32 %a1) {
66 entry:
77 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
88 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
1515
1616 ; CHECK-LABEL: {{^}}v2:
1717 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xb
18 define void @v2(i32 %a1) #0 {
18 define amdgpu_ps void @v2(i32 %a1) {
1919 entry:
2020 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
2121 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2828
2929 ; CHECK-LABEL: {{^}}v3:
3030 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xe
31 define void @v3(i32 %a1) #0 {
31 define amdgpu_ps void @v3(i32 %a1) {
3232 entry:
3333 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
3434 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
4141
4242 ; CHECK-LABEL: {{^}}v4:
4343 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x7
44 define void @v4(i32 %a1) #0 {
44 define amdgpu_ps void @v4(i32 %a1) {
4545 entry:
4646 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
4747 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
5454
5555 ; CHECK-LABEL: {{^}}v5:
5656 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xa
57 define void @v5(i32 %a1) #0 {
57 define amdgpu_ps void @v5(i32 %a1) {
5858 entry:
5959 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
6060 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
6666
6767 ; CHECK-LABEL: {{^}}v6:
6868 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x6
69 define void @v6(i32 %a1) #0 {
69 define amdgpu_ps void @v6(i32 %a1) {
7070 entry:
7171 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
7272 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
7878
7979 ; CHECK-LABEL: {{^}}v7:
8080 ; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0x9
81 define void @v7(i32 %a1) #0 {
81 define amdgpu_ps void @v7(i32 %a1) {
8282 entry:
8383 %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
8484 %1 = call <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32> %0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
9191 declare <4 x float> @llvm.SI.image.sample.v1i32(<1 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) readnone
9292
9393 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
94
95 attributes #0 = { "ShaderType"="0" }
33 ;CHECK-LABEL: {{^}}sample:
44 ;CHECK: s_wqm
55 ;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
6 define void @sample() #0 {
6 define amdgpu_ps void @sample() {
77 main_body:
88 %r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
99 %r0 = extractelement <4 x float> %r, i32 0
1717 ;CHECK-LABEL: {{^}}sample_cl:
1818 ;CHECK: s_wqm
1919 ;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
20 define void @sample_cl() #0 {
20 define amdgpu_ps void @sample_cl() {
2121 main_body:
2222 %r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2323 %r0 = extractelement <4 x float> %r, i32 0
3131 ;CHECK-LABEL: {{^}}sample_d:
3232 ;CHECK-NOT: s_wqm
3333 ;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
34 define void @sample_d() #0 {
34 define amdgpu_ps void @sample_d() {
3535 main_body:
3636 %r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
3737 %r0 = extractelement <4 x float> %r, i32 0
4545 ;CHECK-LABEL: {{^}}sample_d_cl:
4646 ;CHECK-NOT: s_wqm
4747 ;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
48 define void @sample_d_cl() #0 {
48 define amdgpu_ps void @sample_d_cl() {
4949 main_body:
5050 %r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
5151 %r0 = extractelement <4 x float> %r, i32 0
5959 ;CHECK-LABEL: {{^}}sample_l:
6060 ;CHECK-NOT: s_wqm
6161 ;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
62 define void @sample_l() #0 {
62 define amdgpu_ps void @sample_l() {
6363 main_body:
6464 %r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
6565 %r0 = extractelement <4 x float> %r, i32 0
7373 ;CHECK-LABEL: {{^}}sample_b:
7474 ;CHECK: s_wqm
7575 ;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
76 define void @sample_b() #0 {
76 define amdgpu_ps void @sample_b() {
7777 main_body:
7878 %r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
7979 %r0 = extractelement <4 x float> %r, i32 0
8787 ;CHECK-LABEL: {{^}}sample_b_cl:
8888 ;CHECK: s_wqm
8989 ;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
90 define void @sample_b_cl() #0 {
90 define amdgpu_ps void @sample_b_cl() {
9191 main_body:
9292 %r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
9393 %r0 = extractelement <4 x float> %r, i32 0
101101 ;CHECK-LABEL: {{^}}sample_lz:
102102 ;CHECK-NOT: s_wqm
103103 ;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
104 define void @sample_lz() #0 {
104 define amdgpu_ps void @sample_lz() {
105105 main_body:
106106 %r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
107107 %r0 = extractelement <4 x float> %r, i32 0
115115 ;CHECK-LABEL: {{^}}sample_cd:
116116 ;CHECK-NOT: s_wqm
117117 ;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
118 define void @sample_cd() #0 {
118 define amdgpu_ps void @sample_cd() {
119119 main_body:
120120 %r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
121121 %r0 = extractelement <4 x float> %r, i32 0
129129 ;CHECK-LABEL: {{^}}sample_cd_cl:
130130 ;CHECK-NOT: s_wqm
131131 ;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
132 define void @sample_cd_cl() #0 {
132 define amdgpu_ps void @sample_cd_cl() {
133133 main_body:
134134 %r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
135135 %r0 = extractelement <4 x float> %r, i32 0
143143 ;CHECK-LABEL: {{^}}sample_c:
144144 ;CHECK: s_wqm
145145 ;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
146 define void @sample_c() #0 {
146 define amdgpu_ps void @sample_c() {
147147 main_body:
148148 %r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
149149 %r0 = extractelement <4 x float> %r, i32 0
157157 ;CHECK-LABEL: {{^}}sample_c_cl:
158158 ;CHECK: s_wqm
159159 ;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
160 define void @sample_c_cl() #0 {
160 define amdgpu_ps void @sample_c_cl() {
161161 main_body:
162162 %r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
163163 %r0 = extractelement <4 x float> %r, i32 0
171171 ;CHECK-LABEL: {{^}}sample_c_d:
172172 ;CHECK-NOT: s_wqm
173173 ;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
174 define void @sample_c_d() #0 {
174 define amdgpu_ps void @sample_c_d() {
175175 main_body:
176176 %r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
177177 %r0 = extractelement <4 x float> %r, i32 0
185185 ;CHECK-LABEL: {{^}}sample_c_d_cl:
186186 ;CHECK-NOT: s_wqm
187187 ;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
188 define void @sample_c_d_cl() #0 {
188 define amdgpu_ps void @sample_c_d_cl() {
189189 main_body:
190190 %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
191191 %r0 = extractelement <4 x float> %r, i32 0
199199 ;CHECK-LABEL: {{^}}sample_c_l:
200200 ;CHECK-NOT: s_wqm
201201 ;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
202 define void @sample_c_l() #0 {
202 define amdgpu_ps void @sample_c_l() {
203203 main_body:
204204 %r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
205205 %r0 = extractelement <4 x float> %r, i32 0
213213 ;CHECK-LABEL: {{^}}sample_c_b:
214214 ;CHECK: s_wqm
215215 ;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
216 define void @sample_c_b() #0 {
216 define amdgpu_ps void @sample_c_b() {
217217 main_body:
218218 %r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
219219 %r0 = extractelement <4 x float> %r, i32 0
227227 ;CHECK-LABEL: {{^}}sample_c_b_cl:
228228 ;CHECK: s_wqm
229229 ;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
230 define void @sample_c_b_cl() #0 {
230 define amdgpu_ps void @sample_c_b_cl() {
231231 main_body:
232232 %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
233233 %r0 = extractelement <4 x float> %r, i32 0
241241 ;CHECK-LABEL: {{^}}sample_c_lz:
242242 ;CHECK-NOT: s_wqm
243243 ;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
244 define void @sample_c_lz() #0 {
244 define amdgpu_ps void @sample_c_lz() {
245245 main_body:
246246 %r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
247247 %r0 = extractelement <4 x float> %r, i32 0
255255 ;CHECK-LABEL: {{^}}sample_c_cd:
256256 ;CHECK-NOT: s_wqm
257257 ;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
258 define void @sample_c_cd() #0 {
258 define amdgpu_ps void @sample_c_cd() {
259259 main_body:
260260 %r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
261261 %r0 = extractelement <4 x float> %r, i32 0
269269 ;CHECK-LABEL: {{^}}sample_c_cd_cl:
270270 ;CHECK-NOT: s_wqm
271271 ;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
272 define void @sample_c_cd_cl() #0 {
272 define amdgpu_ps void @sample_c_cd_cl() {
273273 main_body:
274274 %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
275275 %r0 = extractelement <4 x float> %r, i32 0
281281 }
282282
283283
284 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
285 declare <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
286 declare <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
287 declare <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
288 declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
289 declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
290 declare <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
291 declare <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
292 declare <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
293 declare <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
294
295 declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
296 declare <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
297 declare <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
298 declare <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
299 declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
300 declare <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
301 declare <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
302 declare <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
303 declare <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
304 declare <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
284 declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
285 declare <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
286 declare <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
287 declare <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
288 declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
289 declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
290 declare <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
291 declare <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
292 declare <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
293 declare <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
294
295 declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
296 declare <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
297 declare <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
298 declare <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
299 declare <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
300 declare <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
301 declare <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
302 declare <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
303 declare <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
304 declare <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
305305
306306 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
307307
308 attributes #0 = { "ShaderType"="0" }
309 attributes #1 = { nounwind readnone }
308 attributes #0 = { nounwind readnone }
33 ;CHECK-LABEL: {{^}}sample:
44 ;CHECK: s_wqm
55 ;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
6 define void @sample() #0 {
6 define amdgpu_ps void @sample() {
77 main_body:
88 %r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
99 %r0 = extractelement <4 x float> %r, i32 0
1717 ;CHECK-LABEL: {{^}}sample_cl:
1818 ;CHECK: s_wqm
1919 ;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
20 define void @sample_cl() #0 {
20 define amdgpu_ps void @sample_cl() {
2121 main_body:
2222 %r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
2323 %r0 = extractelement <4 x float> %r, i32 0
3131 ;CHECK-LABEL: {{^}}sample_d:
3232 ;CHECK-NOT: s_wqm
3333 ;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
34 define void @sample_d() #0 {
34 define amdgpu_ps void @sample_d() {
3535 main_body:
3636 %r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
3737 %r0 = extractelement <4 x float> %r, i32 0
4545 ;CHECK-LABEL: {{^}}sample_d_cl:
4646 ;CHECK-NOT: s_wqm
4747 ;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
48 define void @sample_d_cl() #0 {
48 define amdgpu_ps void @sample_d_cl() {
4949 main_body:
5050 %r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
5151 %r0 = extractelement <4 x float> %r, i32 0
5959 ;CHECK-LABEL: {{^}}sample_l:
6060 ;CHECK-NOT: s_wqm
6161 ;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
62 define void @sample_l() #0 {
62 define amdgpu_ps void @sample_l() {
6363 main_body:
6464 %r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
6565 %r0 = extractelement <4 x float> %r, i32 0
7373 ;CHECK-LABEL: {{^}}sample_b:
7474 ;CHECK: s_wqm
7575 ;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
76 define void @sample_b() #0 {
76 define amdgpu_ps void @sample_b() {
7777 main_body:
7878 %r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
7979 %r0 = extractelement <4 x float> %r, i32 0
8787 ;CHECK-LABEL: {{^}}sample_b_cl:
8888 ;CHECK: s_wqm
8989 ;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
90 define void @sample_b_cl() #0 {
90 define amdgpu_ps void @sample_b_cl() {
9191 main_body:
9292 %r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
9393 %r0 = extractelement <4 x float> %r, i32 0
101101 ;CHECK-LABEL: {{^}}sample_lz:
102102 ;CHECK-NOT: s_wqm
103103 ;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
104 define void @sample_lz() #0 {
104 define amdgpu_ps void @sample_lz() {
105105 main_body:
106106 %r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
107107 %r0 = extractelement <4 x float> %r, i32 0
115115 ;CHECK-LABEL: {{^}}sample_cd:
116116 ;CHECK-NOT: s_wqm
117117 ;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
118 define void @sample_cd() #0 {
118 define amdgpu_ps void @sample_cd() {
119119 main_body:
120120 %r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
121121 %r0 = extractelement <4 x float> %r, i32 0
129129 ;CHECK-LABEL: {{^}}sample_cd_cl:
130130 ;CHECK-NOT: s_wqm
131131 ;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
132 define void @sample_cd_cl() #0 {
132 define amdgpu_ps void @sample_cd_cl() {
133133 main_body:
134134 %r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
135135 %r0 = extractelement <4 x float> %r, i32 0
143143 ;CHECK-LABEL: {{^}}sample_c:
144144 ;CHECK: s_wqm
145145 ;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
146 define void @sample_c() #0 {
146 define amdgpu_ps void @sample_c() {
147147 main_body:
148148 %r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
149149 %r0 = extractelement <4 x float> %r, i32 0
157157 ;CHECK-LABEL: {{^}}sample_c_cl:
158158 ;CHECK: s_wqm
159159 ;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
160 define void @sample_c_cl() #0 {
160 define amdgpu_ps void @sample_c_cl() {
161161 main_body:
162162 %r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
163163 %r0 = extractelement <4 x float> %r, i32 0
171171 ;CHECK-LABEL: {{^}}sample_c_d:
172172 ;CHECK-NOT: s_wqm
173173 ;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
174 define void @sample_c_d() #0 {
174 define amdgpu_ps void @sample_c_d() {
175175 main_body:
176176 %r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
177177 %r0 = extractelement <4 x float> %r, i32 0
185185 ;CHECK-LABEL: {{^}}sample_c_d_cl:
186186 ;CHECK-NOT: s_wqm
187187 ;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
188 define void @sample_c_d_cl() #0 {
188 define amdgpu_ps void @sample_c_d_cl() {
189189 main_body:
190190 %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
191191 %r0 = extractelement <4 x float> %r, i32 0
199199 ;CHECK-LABEL: {{^}}sample_c_l:
200200 ;CHECK-NOT: s_wqm
201201 ;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
202 define void @sample_c_l() #0 {
202 define amdgpu_ps void @sample_c_l() {
203203 main_body:
204204 %r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
205205 %r0 = extractelement <4 x float> %r, i32 0
213213 ;CHECK-LABEL: {{^}}sample_c_b:
214214 ;CHECK: s_wqm
215215 ;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
216 define void @sample_c_b() #0 {
216 define amdgpu_ps void @sample_c_b() {
217217 main_body:
218218 %r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
219219 %r0 = extractelement <4 x float> %r, i32 0
227227 ;CHECK-LABEL: {{^}}sample_c_b_cl:
228228 ;CHECK: s_wqm
229229 ;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
230 define void @sample_c_b_cl() #0 {
230 define amdgpu_ps void @sample_c_b_cl() {
231231 main_body:
232232 %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
233233 %r0 = extractelement <4 x float> %r, i32 0
241241 ;CHECK-LABEL: {{^}}sample_c_lz:
242242 ;CHECK-NOT: s_wqm
243243 ;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
244 define void @sample_c_lz() #0 {
244 define amdgpu_ps void @sample_c_lz() {
245245 main_body:
246246 %r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
247247 %r0 = extractelement <4 x float> %r, i32 0
255255 ;CHECK-LABEL: {{^}}sample_c_cd:
256256 ;CHECK-NOT: s_wqm
257257 ;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
258 define void @sample_c_cd() #0 {
258 define amdgpu_ps void @sample_c_cd() {
259259 main_body:
260260 %r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
261261 %r0 = extractelement <4 x float> %r, i32 0
269269 ;CHECK-LABEL: {{^}}sample_c_cd_cl:
270270 ;CHECK-NOT: s_wqm
271271 ;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
272 define void @sample_c_cd_cl() #0 {
272 define amdgpu_ps void @sample_c_cd_cl() {
273273 main_body:
274274 %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
275275 %r0 = extractelement <4 x float> %r, i32 0
281281 }
282282
283283
284 declare <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
285 declare <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
286 declare <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
287 declare <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
288 declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
289 declare <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
290 declare <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
291 declare <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
292 declare <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
293 declare <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
294
295 declare <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
296 declare <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
297 declare <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
298 declare <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
299 declare <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
300 declare <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
301 declare <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
302 declare <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
303 declare <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
304 declare <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
284 declare <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
285 declare <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
286 declare <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
287 declare <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
288 declare <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
289 declare <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
290 declare <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
291 declare <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
292 declare <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
293 declare <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
294
295 declare <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
296 declare <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
297 declare <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
298 declare <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
299 declare <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
300 declare <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
301 declare <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
302 declare <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
303 declare <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
304 declare <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
305305
306306 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
307307
308 attributes #0 = { "ShaderType"="0" }
309 attributes #1 = { nounwind readnone }
308 attributes #0 = { nounwind readnone }
1313 ; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
1414 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
1515
16 define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
16 define amdgpu_vs void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
1717 main_body:
1818 %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
1919 %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
3939 }
4040
4141 ; Function Attrs: nounwind readonly
42 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
42 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #0
4343
4444 ; Function Attrs: nounwind readonly
45 declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
45 declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
4646
4747 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
4848
49 attributes #0 = { "ShaderType"="1" }
50 attributes #1 = { nounwind readonly }
49 attributes #0 = { nounwind readonly }
5150
5251 !0 = !{!"const", null, i32 1}
55 ; GCN: v_cvt_pkrtz_f16_f32
66 ; GCN-NOT: v_cvt_pkrtz_f16_f32
77
8 define void @main(float %src) #0 {
8 define amdgpu_ps void @main(float %src) {
99 main_body:
1010 %p1 = call i32 @llvm.SI.packf16(float undef, float %src)
1111 %p2 = call i32 @llvm.SI.packf16(float %src, float undef)
2020 }
2121
2222 ; Function Attrs: nounwind readnone
23 declare i32 @llvm.SI.packf16(float, float) #1
23 declare i32 @llvm.SI.packf16(float, float) #0
2424
2525 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
2626
27 attributes #0 = { "ShaderType"="0" }
28 attributes #1 = { nounwind readnone }
27 attributes #0 = { nounwind readnone }
66 ; BOTH-NEXT: s_sendmsg Gs_done(nop)
77 ; BOTH-NEXT: s_endpgm
88
9 define void @main(i32 inreg %a) #0 {
9 define amdgpu_gs void @main(i32 inreg %a) #0 {
1010 main_body:
1111 call void @llvm.SI.sendmsg(i32 3, i32 %a)
1212 ret void
1515 ; Function Attrs: nounwind
1616 declare void @llvm.SI.sendmsg(i32, i32) #1
1717
18 attributes #0 = { "ShaderType"="2" "unsafe-fp-math"="true" }
18 attributes #0 = { "unsafe-fp-math"="true" }
1919 attributes #1 = { nounwind }
22
33 ;CHECK-LABEL: {{^}}test1:
44 ;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
5 define void @test1(i32 %a1, i32 %vaddr) #0 {
5 define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) {
66 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
77 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
88 i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
1212
1313 ;CHECK-LABEL: {{^}}test2:
1414 ;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
15 define void @test2(i32 %a1, i32 %vaddr) #0 {
15 define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) {
1616 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
1717 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
1818 i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
2222
2323 ;CHECK-LABEL: {{^}}test3:
2424 ;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
25 define void @test3(i32 %a1, i32 %vaddr) #0 {
25 define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) {
2626 %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
2727 call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
2828 i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
3232
3333 ;CHECK-LABEL: {{^}}test4:
3434 ;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
35 define void @test4(i32 %vdata, i32 %vaddr) #0 {
35 define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) {
3636 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
3737 i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
3838 i32 1, i32 0)
4242 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
4343 declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
4444 declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
45
46 attributes #0 = { "ShaderType"="1" }
44 ;SI: v_mbcnt_hi_u32_b32_e32
55 ;VI: v_mbcnt_hi_u32_b32_e64
66
7 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
7 define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
88 main_body:
99 %4 = call i32 @llvm.SI.tid()
1010 %5 = bitcast i32 %4 to float
1515 ;CHECK: buffer_atomic_swap v0, s[0:3], [[SOFS]] offset:1 glc
1616 ;CHECK: s_waitcnt vmcnt(0)
1717 ;CHECK: buffer_atomic_swap v0, s[0:3], 0{{$}}
18 define float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) #0 {
18 define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
1919 main_body:
2020 %o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
2121 %o2 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %o1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
4747 ;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 idxen glc
4848 ;CHECK: s_waitcnt vmcnt(0)
4949 ;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 idxen glc
50 define float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
50 define amdgpu_ps float @test2(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
5151 main_body:
5252 %t1 = call i32 @llvm.amdgcn.buffer.atomic.add(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
5353 %t2 = call i32 @llvm.amdgcn.buffer.atomic.sub(i32 %t1, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
7979 ;CHECK-DAG: s_waitcnt vmcnt(0)
8080 ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
8181 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc
82 define float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) #0 {
82 define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
8383 main_body:
8484 %o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
8585 %o2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
9999 ret float %out
100100 }
101101
102 declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #1
103 declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #1
104 declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #1
105 declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #1
106 declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #1
107 declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #1
108 declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #1
109 declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #1
110 declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #1
111 declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #1
112 declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #1
102 declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0
103 declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0
104 declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0
105 declare i32 @llvm.amdgcn.buffer.atomic.smin(i32, <4 x i32>, i32, i32, i1) #0
106 declare i32 @llvm.amdgcn.buffer.atomic.umin(i32, <4 x i32>, i32, i32, i1) #0
107 declare i32 @llvm.amdgcn.buffer.atomic.smax(i32, <4 x i32>, i32, i32, i1) #0
108 declare i32 @llvm.amdgcn.buffer.atomic.umax(i32, <4 x i32>, i32, i32, i1) #0
109 declare i32 @llvm.amdgcn.buffer.atomic.and(i32, <4 x i32>, i32, i32, i1) #0
110 declare i32 @llvm.amdgcn.buffer.atomic.or(i32, <4 x i32>, i32, i32, i1) #0
111 declare i32 @llvm.amdgcn.buffer.atomic.xor(i32, <4 x i32>, i32, i32, i1) #0
112 declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
113113
114 attributes #0 = { "ShaderType"="0" }
115 attributes #1 = { nounwind }
114 attributes #0 = { nounwind }
55 ;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
66 ;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
77 ;CHECK: s_waitcnt
8 define {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) #0 {
8 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
99 main_body:
1010 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
1111 %data_glc = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
1919 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
2020 ;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
2121 ;CHECK: s_waitcnt
22 define <4 x float> @buffer_load_immoffs(<4 x i32> inreg) #0 {
22 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
2323 main_body:
2424 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
2525 ret <4 x float> %data
3232 ;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
3333 ;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
3434 ;CHECK: s_waitcnt
35 define <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) #0 {
35 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
3636 main_body:
3737 %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0)
3838 %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0)
4848 ;CHECK-NOT: s_mov
4949 ;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
5050 ;CHECK: s_waitcnt
51 define <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) #0 {
51 define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
5252 main_body:
5353 %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4160, i1 0, i1 0)
5454 %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4176, i1 0, i1 0)
5959 ;CHECK-LABEL: {{^}}buffer_load_idx:
6060 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
6161 ;CHECK: s_waitcnt
62 define <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) #0 {
62 define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
6363 main_body:
6464 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i1 0, i1 0)
6565 ret <4 x float> %data
6868 ;CHECK-LABEL: {{^}}buffer_load_ofs:
6969 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen
7070 ;CHECK: s_waitcnt
71 define <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) #0 {
71 define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
7272 main_body:
7373 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i1 0, i1 0)
7474 ret <4 x float> %data
7777 ;CHECK-LABEL: {{^}}buffer_load_ofs_imm:
7878 ;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58
7979 ;CHECK: s_waitcnt
80 define <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) #0 {
80 define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
8181 main_body:
8282 %ofs = add i32 %1, 58
8383 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0)
8787 ;CHECK-LABEL: {{^}}buffer_load_both:
8888 ;CHECK: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
8989 ;CHECK: s_waitcnt
90 define <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) #0 {
90 define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
9191 main_body:
9292 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i1 0, i1 0)
9393 ret <4 x float> %data
9797 ;CHECK: v_mov_b32_e32 v2, v0
9898 ;CHECK: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
9999 ;CHECK: s_waitcnt
100 define <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) #0 {
100 define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
101101 main_body:
102102 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i1 0, i1 0)
103103 ret <4 x float> %data
106106 ;CHECK-LABEL: {{^}}buffer_load_x:
107107 ;CHECK: buffer_load_format_x v0, s[0:3], 0
108108 ;CHECK: s_waitcnt
109 define float @buffer_load_x(<4 x i32> inreg %rsrc) #0 {
109 define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
110110 main_body:
111111 %data = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
112112 ret float %data
115115 ;CHECK-LABEL: {{^}}buffer_load_xy:
116116 ;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0
117117 ;CHECK: s_waitcnt
118 define <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) #0 {
118 define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
119119 main_body:
120120 %data = call <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
121121 ret <2 x float> %data
122122 }
123123
124 declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #1
125 declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #1
126 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
124 declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
125 declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
126 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
127127
128 attributes #0 = { "ShaderType"="0" }
129 attributes #1 = { nounwind readonly }
128 attributes #0 = { nounwind readonly }
44 ;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
55 ;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
66 ;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
7 define void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) #0 {
7 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
88 main_body:
99 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
1010 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %2, <4 x i32> %0, i32 0, i32 0, i1 1, i1 0)
1414
1515 ;CHECK-LABEL: {{^}}buffer_store_immoffs:
1616 ;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
17 define void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) #0 {
17 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
1818 main_body:
1919 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
2020 ret void
2222
2323 ;CHECK-LABEL: {{^}}buffer_store_idx:
2424 ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
25 define void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) #0 {
25 define amdgpu_ps void @buffer_store_idx(<4 x i32> inreg, <4 x float>, i32) {
2626 main_body:
2727 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
2828 ret void
3030
3131 ;CHECK-LABEL: {{^}}buffer_store_ofs:
3232 ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 offen
33 define void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) #0 {
33 define amdgpu_ps void @buffer_store_ofs(<4 x i32> inreg, <4 x float>, i32) {
3434 main_body:
3535 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 %2, i1 0, i1 0)
3636 ret void
3838
3939 ;CHECK-LABEL: {{^}}buffer_store_both:
4040 ;CHECK: buffer_store_format_xyzw v[0:3], v[4:5], s[0:3], 0 idxen offen
41 define void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
41 define amdgpu_ps void @buffer_store_both(<4 x i32> inreg, <4 x float>, i32, i32) {
4242 main_body:
4343 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 %3, i1 0, i1 0)
4444 ret void
4747 ;CHECK-LABEL: {{^}}buffer_store_both_reversed:
4848 ;CHECK: v_mov_b32_e32 v6, v4
4949 ;CHECK: buffer_store_format_xyzw v[0:3], v[5:6], s[0:3], 0 idxen offen
50 define void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) #0 {
50 define amdgpu_ps void @buffer_store_both_reversed(<4 x i32> inreg, <4 x float>, i32, i32) {
5151 main_body:
5252 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %3, i32 %2, i1 0, i1 0)
5353 ret void
6161 ;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
6262 ;CHECK: s_waitcnt vmcnt(0)
6363 ;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
64 define void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) #0 {
64 define amdgpu_ps void @buffer_store_wait(<4 x i32> inreg, <4 x float>, i32, i32, i32) {
6565 main_body:
6666 call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 %2, i32 0, i1 0, i1 0)
6767 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 %3, i32 0, i1 0, i1 0)
6969 ret void
7070 }
7171
72 declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
73 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2
72 declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
73 declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
7474
75 attributes #0 = { "ShaderType"="0" }
76 attributes #1 = { nounwind }
77 attributes #2 = { nounwind readonly }
75 attributes #0 = { nounwind }
76 attributes #1 = { nounwind readonly }
44 ;SI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x04,0x00,0x00]
55 ;VI: image_atomic_swap v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x04,0x00,0x00]
66 ;CHECK: s_waitcnt vmcnt(0)
7 define float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) #0 {
7 define amdgpu_ps float @image_atomic_swap(<8 x i32> inreg, <4 x i32>, i32) {
88 main_body:
99 %orig = call i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
1010 %orig.f = bitcast i32 %orig to float
1515 ;SI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x02,0x00,0x00]
1616 ;VI: image_atomic_swap v2, v[0:1], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x02,0x00,0x00]
1717 ;CHECK: s_waitcnt vmcnt(0)
18 define float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) #0 {
18 define amdgpu_ps float @image_atomic_swap_v2i32(<8 x i32> inreg, <2 x i32>, i32) {
1919 main_body:
2020 %orig = call i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32 %2, <2 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
2121 %orig.f = bitcast i32 %orig to float
2626 ;SI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x3c,0xf0,0x00,0x01,0x00,0x00]
2727 ;VI: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x40,0xf0,0x00,0x01,0x00,0x00]
2828 ;CHECK: s_waitcnt vmcnt(0)
29 define float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) #0 {
29 define amdgpu_ps float @image_atomic_swap_i32(<8 x i32> inreg, i32, i32) {
3030 main_body:
3131 %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %2, i32 %1, <8 x i32> %0, i1 0, i1 0, i1 0)
3232 %orig.f = bitcast i32 %orig to float
3838 ;VI: image_atomic_cmpswap v[4:5], v[0:3], s[0:7] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x44,0xf0,0x00,0x04,0x00,0x00]
3939 ;CHECK: s_waitcnt vmcnt(0)
4040 ;CHECK: v_mov_b32_e32 v0, v4
41 define float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) #0 {
41 define amdgpu_ps float @image_atomic_cmpswap(<8 x i32> inreg, <4 x i32>, i32, i32) {
4242 main_body:
4343 %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32 %2, i32 %3, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
4444 %orig.f = bitcast i32 %orig to float
4949 ;SI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x44,0xf0,0x00,0x04,0x00,0x00]
5050 ;VI: image_atomic_add v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
5151 ;CHECK: s_waitcnt vmcnt(0)
52 define float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) #0 {
52 define amdgpu_ps float @image_atomic_add(<8 x i32> inreg, <4 x i32>, i32) {
5353 main_body:
5454 %orig = call i32 @llvm.amdgcn.image.atomic.add.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
5555 %orig.f = bitcast i32 %orig to float
6060 ;SI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x48,0xf0,0x00,0x04,0x00,0x00]
6161 ;VI: image_atomic_sub v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4c,0xf0,0x00,0x04,0x00,0x00]
6262 ;CHECK: s_waitcnt vmcnt(0)
63 define float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) #0 {
63 define amdgpu_ps float @image_atomic_sub(<8 x i32> inreg, <4 x i32>, i32) {
6464 main_body:
6565 %orig = call i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
6666 %orig.f = bitcast i32 %orig to float
8686 ;CHECK: s_waitcnt vmcnt(0)
8787 ;CHECK: image_atomic_dec v4, v[0:3], s[0:7] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x70,0xf0,0x00,0x04,0x00,0x00]
8888 ;CHECK: s_waitcnt vmcnt(0)
89 define float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) #0 {
89 define amdgpu_ps float @image_atomic_unchanged(<8 x i32> inreg, <4 x i32>, i32) {
9090 main_body:
9191 %t0 = call i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32 %2, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
9292 %t1 = call i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32 %t0, <4 x i32> %1, <8 x i32> %0, i1 0, i1 0, i1 0)
101101 ret float %out
102102 }
103103
104 declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #1
105 declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #1
106 declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
104 declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
105 declare i32 @llvm.amdgcn.image.atomic.swap.v2i32(i32, <2 x i32>, <8 x i32>, i1, i1, i1) #0
106 declare i32 @llvm.amdgcn.image.atomic.swap.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
107107
108 declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #1
108 declare i32 @llvm.amdgcn.image.atomic.cmpswap.v4i32(i32, i32, <4 x i32>, <8 x i32>,i1, i1, i1) #0
109109
110 declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
111 declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
112 declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
113 declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
114 declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
115 declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
116 declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
117 declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
118 declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
119 declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
120 declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #1
110 declare i32 @llvm.amdgcn.image.atomic.add.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
111 declare i32 @llvm.amdgcn.image.atomic.sub.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
112 declare i32 @llvm.amdgcn.image.atomic.smin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
113 declare i32 @llvm.amdgcn.image.atomic.umin.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
114 declare i32 @llvm.amdgcn.image.atomic.smax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
115 declare i32 @llvm.amdgcn.image.atomic.umax.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
116 declare i32 @llvm.amdgcn.image.atomic.and.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
117 declare i32 @llvm.amdgcn.image.atomic.or.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
118 declare i32 @llvm.amdgcn.image.atomic.xor.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
119 declare i32 @llvm.amdgcn.image.atomic.inc.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
120 declare i32 @llvm.amdgcn.image.atomic.dec.v4i32(i32, <4 x i32>, <8 x i32>, i1, i1, i1) #0
121121
122 attributes #0 = { "ShaderType"="0" }
123 attributes #1 = { nounwind }
122 attributes #0 = { nounwind }
33 ;CHECK-LABEL: {{^}}image_load_v4i32:
44 ;CHECK: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm
55 ;CHECK: s_waitcnt vmcnt(0)
6 define <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
6 define amdgpu_ps <4 x float> @image_load_v4i32(<8 x i32> inreg %rsrc, <4 x i32> %c) {
77 main_body:
88 %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
99 ret <4 x float> %tex
1212 ;CHECK-LABEL: {{^}}image_load_v2i32:
1313 ;CHECK: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
1414 ;CHECK: s_waitcnt vmcnt(0)
15 define <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) #0 {
15 define amdgpu_ps <4 x float> @image_load_v2i32(<8 x i32> inreg %rsrc, <2 x i32> %c) {
1616 main_body:
1717 %tex = call <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
1818 ret <4 x float> %tex
2121 ;CHECK-LABEL: {{^}}image_load_i32:
2222 ;CHECK: image_load v[0:3], v0, s[0:7] dmask:0xf unorm
2323 ;CHECK: s_waitcnt vmcnt(0)
24 define <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) #0 {
24 define amdgpu_ps <4 x float> @image_load_i32(<8 x i32> inreg %rsrc, i32 %c) {
2525 main_body:
2626 %tex = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
2727 ret <4 x float> %tex
3030 ;CHECK-LABEL: {{^}}image_load_mip:
3131 ;CHECK: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm
3232 ;CHECK: s_waitcnt vmcnt(0)
33 define <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
33 define amdgpu_ps <4 x float> @image_load_mip(<8 x i32> inreg %rsrc, <4 x i32> %c) {
3434 main_body:
3535 %tex = call <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
3636 ret <4 x float> %tex
3939 ;CHECK-LABEL: {{^}}image_load_1:
4040 ;CHECK: image_load v0, v[0:3], s[0:7] dmask:0x1 unorm
4141 ;CHECK: s_waitcnt vmcnt(0)
42 define float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) #0 {
42 define amdgpu_ps float @image_load_1(<8 x i32> inreg %rsrc, <4 x i32> %c) {
4343 main_body:
4444 %tex = call <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32> %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
4545 %elt = extractelement <4 x float> %tex, i32 0
4949
5050 ;CHECK-LABEL: {{^}}image_store_v4i32:
5151 ;CHECK: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm
52 define void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
52 define amdgpu_ps void @image_store_v4i32(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
5353 main_body:
5454 call void @llvm.amdgcn.image.store.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
5555 ret void
5757
5858 ;CHECK-LABEL: {{^}}image_store_v2i32:
5959 ;CHECK: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
60 define void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) #0 {
60 define amdgpu_ps void @image_store_v2i32(<8 x i32> inreg %rsrc, <4 x float> %data, <2 x i32> %coords) {
6161 main_body:
6262 call void @llvm.amdgcn.image.store.v2i32(<4 x float> %data, <2 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
6363 ret void
6565
6666 ;CHECK-LABEL: {{^}}image_store_i32:
6767 ;CHECK: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
68 define void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) #0 {
68 define amdgpu_ps void @image_store_i32(<8 x i32> inreg %rsrc, <4 x float> %data, i32 %coords) {
6969 main_body:
7070 call void @llvm.amdgcn.image.store.i32(<4 x float> %data, i32 %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
7171 ret void
7373
7474 ;CHECK-LABEL: {{^}}image_store_mip:
7575 ;CHECK: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm
76 define void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) #0 {
76 define amdgpu_ps void @image_store_mip(<8 x i32> inreg %rsrc, <4 x float> %data, <4 x i32> %coords) {
7777 main_body:
7878 call void @llvm.amdgcn.image.store.mip.v4i32(<4 x float> %data, <4 x i32> %coords, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
7979 ret void
8787 ;CHECK: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
8888 ;CHECK: s_waitcnt vmcnt(0)
8989 ;CHECK: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
90 define void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) #0 {
90 define amdgpu_ps void @image_store_wait(<8 x i32> inreg, <8 x i32> inreg, <8 x i32> inreg, <4 x float>, i32) {
9191 main_body:
9292 call void @llvm.amdgcn.image.store.i32(<4 x float> %3, i32 %4, <8 x i32> %0, i32 15, i1 0, i1 0, i1 0, i1 0)
9393 %data = call <4 x float> @llvm.amdgcn.image.load.i32(i32 %4, <8 x i32> %1, i32 15, i1 0, i1 0, i1 0, i1 0)
9595 ret void
9696 }
9797
98 declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #1
99 declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
100 declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
101 declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
98 declare void @llvm.amdgcn.image.store.i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
99 declare void @llvm.amdgcn.image.store.v2i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
100 declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
101 declare void @llvm.amdgcn.image.store.mip.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #0
102102
103 declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2
104 declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
105 declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
106 declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
103 declare <4 x float> @llvm.amdgcn.image.load.i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
104 declare <4 x float> @llvm.amdgcn.image.load.v2i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
105 declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
106 declare <4 x float> @llvm.amdgcn.image.load.mip.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
107107
108 attributes #0 = { "ShaderType"="0" }
109 attributes #1 = { nounwind }
110 attributes #2 = { nounwind readonly }
108 attributes #0 = { nounwind }
109 attributes #1 = { nounwind readonly }
55 ;GCN: s_mov_b32 m0, s{{[0-9]+}}
66 ;GCN: v_interp_p1_f32
77 ;GCN: v_interp_p2_f32
8 define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
8 define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
99 main_body:
1010 %i = extractelement <2 x i32> %4, i32 0
1111 %j = extractelement <2 x i32> %4, i32 1
1818 }
1919
2020 ; Function Attrs: nounwind readnone
21 declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1
21 declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #0
2222
2323 ; Function Attrs: nounwind readnone
24 declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1
24 declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #0
2525
2626 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
2727
28 attributes #0 = { "ShaderType"="0" }
29 attributes #1 = { nounwind readnone }
28 attributes #0 = { nounwind readnone }
55 ;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
66 ;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
77
8 define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
8 define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
99 main_body:
1010 %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
1111 %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
22
33 ; SI-LABEL: {{^}}kilp_gs_const:
44 ; SI: s_mov_b64 exec, 0
5 define void @kilp_gs_const() #0 {
5 define amdgpu_gs void @kilp_gs_const() {
66 main_body:
77 %0 = icmp ule i32 0, 3
88 %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
1515
1616 declare void @llvm.AMDGPU.kilp(float)
1717
18 attributes #0 = { "ShaderType"="2" }
19
2018 !0 = !{!"const", null, i32 1}
3636
3737 declare float @llvm.cos.f32(float) readnone
3838 declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
39
40 attributes #0 = { "ShaderType"="0" }
44 ;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
55 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
66
7 define void @test1(<4 x float> inreg %reg0) #0 {
7 define amdgpu_ps void @test1(<4 x float> inreg %reg0) {
88 %r0 = extractelement <4 x float> %reg0, i32 0
99 %r1 = extractelement <4 x float> %reg0, i32 1
1010 %r2 = call float @llvm.pow.f32( float %r0, float %r1)
2626 ;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}},
2727 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
2828 ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
29 define void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
29 define amdgpu_ps void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
3030 %vec = call <4 x float> @llvm.pow.v4f32( <4 x float> %reg0, <4 x float> %reg1)
3131 call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
3232 ret void
3535 declare float @llvm.pow.f32(float ,float ) readonly
3636 declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
3737 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
38
39 attributes #0 = { "ShaderType"="0" }
8787
8888 declare float @llvm.sin.f32(float) readnone
8989 declare <4 x float> @llvm.sin.v4f32(<4 x float>) readnone
90
91 attributes #0 = { "ShaderType"="0" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman
11
2 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
2 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) {
33 main_body:
44 %0 = extractelement <4 x float> %reg1, i32 0
55 %1 = extractelement <4 x float> %reg1, i32 1
110110
111111 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
112112
113 attributes #0 = { "ShaderType"="1" }
114113 attributes #1 = { readnone }
115114 attributes #2 = { readonly }
116115 attributes #3 = { nounwind readonly }
44
55 ; CHECK-LABEL: {{^}}main:
66 ; CHECK-NOT: v_readlane_b32 m0
7 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
7 define amdgpu_ps void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) {
88 main_body:
99 %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
1010 %cmp = fcmp ueq float 0.0, %4
22 ; CHECK-LABEL: {{^}}main:
33 ; CHECK: ADD *
44
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
5 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) {
66 main_body:
77 %0 = extractelement <4 x float> %reg1, i32 0
88 %1 = extractelement <4 x float> %reg1, i32 1
3131 ; CHECK-LABEL: {{^}}main2:
3232 ; CHECK-NOT: ADD *
3333
34 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
34 define amdgpu_vs void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) {
3535 main_body:
3636 %0 = extractelement <4 x float> %reg1, i32 0
3737 %1 = extractelement <4 x float> %reg1, i32 1
6262
6363 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
6464
65 attributes #0 = { "ShaderType"="1" }
6665 attributes #1 = { readnone }
5454
5555 ; CHECK-LABEL: {{^}}soffset_max_imm:
5656 ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
57 define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
57 define amdgpu_gs void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
5858 main_body:
5959 %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
6060 %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
7373 ; CHECK-LABEL: {{^}}soffset_no_fold:
7474 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
7575 ; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
76 define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
76 define amdgpu_gs void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) {
7777 main_body:
7878 %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
7979 %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
178178 declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
179179 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
180180
181 attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
181 attributes #1 = { "unsafe-fp-math"="true" }
182182 attributes #3 = { nounwind readonly }
0 ; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s
11
22 ; CHECK: in function pixel_s{{.*}}: unsupported non-compute shaders with HSA
3 define void @pixel_shader() #0 {
3 define amdgpu_ps void @pixel_shader() #0 {
44 ret void
55 }
66
7 define void @vertex_shader() #1 {
7 define amdgpu_vs void @vertex_shader() #0 {
88 ret void
99 }
1010
11 define void @geometry_shader() #2 {
11 define amdgpu_gs void @geometry_shader() #0 {
1212 ret void
1313 }
14
15 attributes #0 = { nounwind "ShaderType"="0" }
16 attributes #1 = { nounwind "ShaderType"="1" }
17 attributes #2 = { nounwind "ShaderType"="2" }
22 ; CHECK-LABEL: {{^}}main:
33 ; CHECK: PRED_SETE_INT * Pred,
44 ; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
5 define void @main(<4 x float> inreg) #0 {
5 define amdgpu_ps void @main(<4 x float> inreg) {
66 main_body:
77 %1 = extractelement <4 x float> %0, i32 0
88 %2 = bitcast float %1 to i32
2323 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
2424 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
2525 attributes #1 = { readnone }
26 attributes #0 = { "ShaderType"="0" }
22 ;CHECK: DOT4 T{{[0-9]\.X}}
33 ;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
44
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
5 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) {
66 main_body:
77 %0 = extractelement <4 x float> %reg1, i32 0
88 %1 = extractelement <4 x float> %reg1, i32 1
4040
4141 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
4242
43 attributes #0 = { "ShaderType"="1" }
4443 attributes #1 = { readnone }
22 ; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
33 ; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
44
5 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
5 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) {
66 main_body:
77 %0 = extractelement <4 x float> %reg1, i32 0
88 %1 = extractelement <4 x float> %reg1, i32 1
234234
235235 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
236236
237 attributes #0 = { "ShaderType"="1" }
238237 attributes #1 = { readnone }
239238 attributes #2 = { readonly }
240239 attributes #3 = { nounwind readonly }
99 ; R600: {{^}}test:
1010 ; R600: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
1111
12 define void @test(<4 x float> inreg %reg0) #0 {
12 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
1313 entry:
1414 %r0 = extractelement <4 x float> %reg0, i32 0
1515 %r1 = extractelement <4 x float> %reg0, i32 1
2020 }
2121
2222 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
23
24 attributes #0 = { "ShaderType"="0" }
99 ;CHECK: EXPORT T{{[0-9]}}.0000
1010
1111
12 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
12 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
1313 main_body:
1414 %0 = extractelement <4 x float> %reg1, i32 0
1515 %1 = extractelement <4 x float> %reg1, i32 1
137137 }
138138
139139 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
140
141 attributes #0 = { "ShaderType"="1" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman
11
2 define void @main(<4 x float> inreg, <4 x float> inreg) #0 {
2 define amdgpu_ps void @main(<4 x float> inreg, <4 x float> inreg) {
33 main_body:
44 %2 = extractelement <4 x float> %0, i32 0
55 %3 = extractelement <4 x float> %0, i32 1
5353
5454 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
5555
56 attributes #0 = { "ShaderType"="0" }
5756 attributes #1 = { readnone }
0 ;RUN: llc < %s -march=r600 -mcpu=redwood
11
2 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
2 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
33 main_body:
44 %0 = extractelement <4 x float> %reg1, i32 0
55 %1 = extractelement <4 x float> %reg1, i32 1
114114 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
115115
116116 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
117
118 attributes #0 = { "ShaderType"="1" }
11
22 ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33
4 define void @test(<4 x float> inreg %reg0) #0 {
4 define amdgpu_ps void @test(<4 x float> inreg %reg0) {
55 %r0 = extractelement <4 x float> %reg0, i32 0
66 %r1 = fdiv float 1.0, %r0
77 %vec = insertelement <4 x float> undef, float %r1, i32 0
1010 }
1111
1212 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
13
14 attributes #0 = { "ShaderType"="0" }
0 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
11 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3 attributes #0 = { "ShaderType"="1" }
42
53 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
64
108 ; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
119 ; GCN: s_waitcnt expcnt(0)
1210 ; GCN-NOT: s_endpgm
13 define {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
11 define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
1412 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
1513 %x = fadd float %3, 1.0
1614 %a = insertvalue {float, float} undef, float %x, 0
2725 ; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
2826 ; GCN: s_waitcnt expcnt(0)
2927 ; GCN-NOT: s_endpgm
30 define {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
28 define amdgpu_vs {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
3129 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
3230 ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
3331 }
4442 ; GCN: v_mov_b32_e32 v3, v4
4543 ; GCN: v_mov_b32_e32 v4, v6
4644 ; GCN-NOT: s_endpgm
47 attributes #1 = { "ShaderType"="0" "InitialPSInputAddr"="0" }
48 define {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
45 attributes #0 = { "InitialPSInputAddr"="0" }
46 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
4947 %i0 = extractelement <2 x i32> %4, i32 0
5048 %i1 = extractelement <2 x i32> %4, i32 1
5149 %i2 = extractelement <2 x i32> %7, i32 0
7068 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
7169 ; GCN: v_mov_b32_e32 v0, 1.0
7270 ; GCN-NOT: s_endpgm
73 define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
71 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
7472 ret float 1.0
7573 }
7674
8482 ; GCN-DAG: v_mov_b32_e32 v1, v2
8583 ; GCN: v_mov_b32_e32 v2, v3
8684 ; GCN-NOT: s_endpgm
87 define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
85 define amdgpu_ps {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
8886 %f = bitcast <2 x i32> %8 to <2 x float>
8987 %s = insertvalue {float, <2 x float>} undef, float %14, 0
9088 %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
103101 ; GCN-DAG: v_mov_b32_e32 v3, v6
104102 ; GCN-DAG: v_mov_b32_e32 v4, v8
105103 ; GCN-NOT: s_endpgm
106 attributes #2 = { "ShaderType"="0" "InitialPSInputAddr"="1" }
107 define {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
104 attributes #1 = { "InitialPSInputAddr"="1" }
105 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
108106 %i0 = extractelement <2 x i32> %4, i32 0
109107 %i1 = extractelement <2 x i32> %4, i32 1
110108 %i2 = extractelement <2 x i32> %7, i32 0
133131 ; GCN: v_mov_b32_e32 v3, v8
134132 ; GCN: v_mov_b32_e32 v4, v12
135133 ; GCN-NOT: s_endpgm
136 attributes #3 = { "ShaderType"="0" "InitialPSInputAddr"="119" }
137 define {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
134 attributes #2 = { "InitialPSInputAddr"="119" }
135 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
138136 %i0 = extractelement <2 x i32> %4, i32 0
139137 %i1 = extractelement <2 x i32> %4, i32 1
140138 %i2 = extractelement <2 x i32> %7, i32 0
163161 ; GCN: v_mov_b32_e32 v3, v4
164162 ; GCN: v_mov_b32_e32 v4, v8
165163 ; GCN-NOT: s_endpgm
166 attributes #4 = { "ShaderType"="0" "InitialPSInputAddr"="418" }
167 define {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #4 {
164 attributes #3 = { "InitialPSInputAddr"="418" }
165 define amdgpu_ps {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
168166 %i0 = extractelement <2 x i32> %4, i32 0
169167 %i1 = extractelement <2 x i32> %4, i32 1
170168 %i2 = extractelement <2 x i32> %7, i32 0
186184 ; GCN: s_add_i32 s0, s3, 2
187185 ; GCN: s_mov_b32 s2, s3
188186 ; GCN-NOT: s_endpgm
189 define {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
187 define amdgpu_vs {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
190188 %x = add i32 %2, 2
191189 %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
192190 %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
202200 ; GCN-DAG: s_mov_b32 s2, 7
203201 ; GCN-DAG: s_mov_b32 s3, 8
204202 ; GCN-NOT: s_endpgm
205 define {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
203 define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
206204 %x = add i32 %2, 2
207205 ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
208206 }
217215 ; GCN: s_mov_b32 s2, s3
218216 ; GCN: s_waitcnt expcnt(0)
219217 ; GCN-NOT: s_endpgm
220 define {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
218 define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
221219 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
222220 %v = fadd float %3, 1.0
223221 %s = add i32 %2, 2
238236 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
239237 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
240238 ; GCN-DAG: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
241 define {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
239 define amdgpu_vs {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
242240 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
243241 ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> }}
244242 }
1111 ; ModuleID = 'bugpoint-reduced-simplified.bc'
1212 target triple = "amdgcn--"
1313
14 define <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
14 define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
1515 main_body:
1616 %p83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
1717 %p87 = fmul float undef, %p83
5252 ; Function Attrs: nounwind readnone
5353 declare float @llvm.floor.f32(float) #1
5454
55 attributes #0 = { "InitialPSInputAddr"="36983" "ShaderType"="0" }
55 attributes #0 = { "InitialPSInputAddr"="36983" }
5656 attributes #1 = { nounwind readnone }
11
22 ; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
33
4 define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
4 define amdgpu_vs void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
55 %1 = extractelement <4 x float> %reg1, i32 0
66 %2 = extractelement <4 x float> %reg1, i32 1
77 %3 = extractelement <4 x float> %reg1, i32 2
3636 declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
3737
3838 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
39
40 attributes #0 = { "ShaderType"="1" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
11 ;REQUIRES: asserts
22
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
3 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
44 main_body:
55 %0 = extractelement <4 x float> %reg1, i32 0
66 %1 = extractelement <4 x float> %reg1, i32 1
7878 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
7979
8080 attributes #0 = { readnone }
81 attributes #1 = { "ShaderType"="1" }
66
77
88 ; SI-LABEL: {{^}}main(
9 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
9 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
1010 main_body:
1111 %0 = extractelement <4 x float> %reg1, i32 0
1212 %1 = extractelement <4 x float> %reg1, i32 2
158158 %115 = fadd float %temp4.0, 1.000000e+00
159159 br label %Flow1
160160 }
161
162 attributes #0 = { "ShaderType"="1" }
0 ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
11 ;REQUIRES: asserts
22
3 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
3 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
44 main_body:
55 %0 = extractelement <4 x float> %reg1, i32 0
66 %1 = extractelement <4 x float> %reg1, i32 1
127127 }
128128
129129 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
130
131 attributes #0 = { "ShaderType"="1" }
1010 ; CHECK-LABEL: {{^}}phi1:
1111 ; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
1212 ; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
13 define void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
13 define amdgpu_ps void @phi1(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
1414 main_body:
1515 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
1616 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
3434
3535 ; Make sure this program doesn't crash
3636 ; CHECK-LABEL: {{^}}phi2:
37 define void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
37 define amdgpu_ps void @phi2(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
3838 main_body:
3939 %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg, i32 0
4040 %tmp20 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
155155
156156 ; We just want ot make sure the program doesn't crash
157157 ; CHECK-LABEL: {{^}}loop:
158 define void @loop(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <8 x i32> addrspace(2)* inreg %arg2, i32 inreg %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <3 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19) #0 {
158 define amdgpu_ps void @loop(<16 x i8> addrspace(2)*