llvm.org GIT mirror llvm / da360ea
[AMDGPU] Add support for a16 modifiear for gfx9 Summary: Adding support for a16 for gfx9. A16 bit replaces r128 bit for gfx9. Change-Id: Ie8b881e4e6d2f023fb5e0150420893513e5f4841 Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, jfb, llvm-commits Differential Revision: https://reviews.llvm.org/D50575 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@340831 91177308-0d34-0410-b5e6-96231b3b80d8 Ryan Taylor 1 year, 6 months ago
12 changed file(s) with 691 addition(s) and 45 deletion(s). Raw diff Collapse all Expand all
241241 "Support DPP (Data Parallel Primitives) extension"
242242 >;
243243
244 def FeatureR128A16 : SubtargetFeature<"r128-a16",
245 "HasR128A16",
246 "true",
247 "Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
248 >;
249
244250 def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
245251 "HasIntClamp",
246252 "true",
443449 FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
444450 FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
445451 FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
446 FeatureAddNoCarryInsts, FeatureScalarAtomics
452 FeatureAddNoCarryInsts, FeatureScalarAtomics, FeatureR128A16
447453 ]
448454 >;
449455
702708 def HasDPP : Predicate<"Subtarget->hasDPP()">,
703709 AssemblerPredicate<"FeatureDPP">;
704710
711 def HasR128A16 : Predicate<"Subtarget->hasR128A16()">,
712 AssemblerPredicate<"FeatureR128A16">;
713
705714 def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">,
706715 AssemblerPredicate<"FeatureIntClamp">;
707716
196196 HasSDWAMac(false),
197197 HasSDWAOutModsVOPC(false),
198198 HasDPP(false),
199 HasR128A16(false),
199200 HasDLInsts(false),
200201 D16PreservesUnusedBits(false),
201202 FlatAddressSpace(false),
357357 bool HasSDWAMac;
358358 bool HasSDWAOutModsVOPC;
359359 bool HasDPP;
360 bool HasR128A16;
360361 bool HasDLInsts;
361362 bool D16PreservesUnusedBits;
362363 bool FlatAddressSpace;
788789
789790 bool hasDPP() const {
790791 return HasDPP;
792 }
793
794 bool hasR128A16() const {
795 return HasR128A16;
791796 }
792797
793798 bool enableSIScheduler() const {
155155 ImmTyDMask,
156156 ImmTyUNorm,
157157 ImmTyDA,
158 ImmTyR128,
158 ImmTyR128A16,
159159 ImmTyLWE,
160160 ImmTyExpTgt,
161161 ImmTyExpCompr,
289289 bool isDMask() const { return isImmTy(ImmTyDMask); }
290290 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
291291 bool isDA() const { return isImmTy(ImmTyDA); }
292 bool isR128() const { return isImmTy(ImmTyR128); }
292 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
293293 bool isLWE() const { return isImmTy(ImmTyLWE); }
294294 bool isOff() const { return isImmTy(ImmTyOff); }
295295 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
677677 case ImmTyDMask: OS << "DMask"; break;
678678 case ImmTyUNorm: OS << "UNorm"; break;
679679 case ImmTyDA: OS << "DA"; break;
680 case ImmTyR128: OS << "R128"; break;
680 case ImmTyR128A16: OS << "R128A16"; break;
681681 case ImmTyLWE: OS << "LWE"; break;
682682 case ImmTyOff: OS << "Off"; break;
683683 case ImmTyExpTgt: OS << "ExpTgt"; break;
10891089 bool validateMIMGAtomicDMask(const MCInst &Inst);
10901090 bool validateMIMGGatherDMask(const MCInst &Inst);
10911091 bool validateMIMGDataSize(const MCInst &Inst);
1092 bool validateMIMGR128(const MCInst &Inst);
10931092 bool validateMIMGD16(const MCInst &Inst);
10941093 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
10951094 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
24442443 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
24452444 }
24462445
2447 bool AMDGPUAsmParser::validateMIMGR128(const MCInst &Inst) {
2448
2449 const unsigned Opc = Inst.getOpcode();
2450 const MCInstrDesc &Desc = MII.get(Opc);
2451
2452 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2453 return true;
2454
2455 int Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
2456 assert(Idx != -1);
2457
2458 bool R128 = (Inst.getOperand(Idx).getImm() != 0);
2459
2460 return !R128 || hasMIMG_R128();
2461 }
2462
24632446 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
24642447
24652448 const unsigned Opc = Inst.getOpcode();
24922475 if (!validateIntClampSupported(Inst)) {
24932476 Error(IDLoc,
24942477 "integer clamping is not supported on this GPU");
2495 return false;
2496 }
2497 if (!validateMIMGR128(Inst)) {
2498 Error(IDLoc,
2499 "r128 modifier is not supported on this GPU");
25002478 return false;
25012479 }
25022480 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
34623440 case AsmToken::Identifier: {
34633441 StringRef Tok = Parser.getTok().getString();
34643442 if (Tok == Name) {
3443 if (Tok == "r128" && isGFX9())
3444 Error(S, "r128 modifier is not supported on this GPU");
3445 if (Tok == "a16" && !isGFX9())
3446 Error(S, "a16 modifier is not supported on this GPU");
34653447 Bit = 1;
34663448 Parser.Lex();
34673449 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
47044686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
47054687 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
47064688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
4707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
4689 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
47084690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
47094691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
47104692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
48144796 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
48154797 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
48164798 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
4817 {"r128", AMDGPUOperand::ImmTyR128, true, nullptr},
4799 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
4800 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr},
48184801 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
48194802 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
48204803 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
206206 printNamedBit(MI, OpNo, O, "da");
207207 }
208208
209 void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
209 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
210210 const MCSubtargetInfo &STI, raw_ostream &O) {
211 printNamedBit(MI, OpNo, O, "r128");
211 if (STI.hasFeature(AMDGPU::FeatureR128A16))
212 printNamedBit(MI, OpNo, O, "a16");
213 else
214 printNamedBit(MI, OpNo, O, "r128");
212215 }
213216
214217 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
7979 raw_ostream &O);
8080 void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
8181 raw_ostream &O);
82 void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
82 void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
8383 raw_ostream &O);
8484 void printLWE(const MCInst *MI, unsigned OpNo,
8585 const MCSubtargetInfo &STI, raw_ostream &O);
140140
141141 let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
142142 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
143 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
143 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
144144 !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
145145 let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
146146 #!if(BaseOpcode.HasD16, "$d16", "");
198198
199199 let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
200200 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
201 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
201 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
202202 !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
203203 let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
204204 #!if(BaseOpcode.HasD16, "$d16", "");
251251
252252 let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
253253 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
254 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
254 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
255255 let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da";
256256 }
257257
315315
316316 let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
317317 DMask:$dmask, UNorm:$unorm, GLC:$glc, SLC:$slc,
318 R128:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
318 R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
319319 !if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
320320 let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da"
321321 #!if(BaseOpcode.HasD16, "$d16", "");
45754575 const AMDGPU::ImageDimIntrinsicInfo *Intr,
45764576 SelectionDAG &DAG) const {
45774577 SDLoc DL(Op);
4578 MachineFunction &MF = DAG.getMachineFunction();
4579 const GCNSubtarget* ST = &MF.getSubtarget();
45784580 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
45794581 AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
45804582 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
45844586
45854587 SmallVector ResultTypes(Op->value_begin(), Op->value_end());
45864588 bool IsD16 = false;
4589 bool IsA16 = false;
45874590 SDValue VData;
45884591 int NumVDataDwords;
45894592 unsigned AddrIdx; // Index of first address argument
46594662 }
46604663 }
46614664
4662 unsigned NumVAddrs = BaseOpcode->NumExtraArgs +
4663 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
4664 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
4665 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
4665 unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0;
4666 unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0;
4667 unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0;
4668 unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients +
4669 NumCoords + NumLCM;
4670 unsigned NumMIVAddrs = NumVAddrs;
4671
46664672 SmallVector VAddrs;
4667 for (unsigned i = 0; i < NumVAddrs; ++i)
4668 VAddrs.push_back(Op.getOperand(AddrIdx + i));
46694673
46704674 // Optimize _L to _LZ when _L is zero
46714675 if (LZMappingInfo) {
46724676 if (auto ConstantLod =
4673 dyn_cast(VAddrs[NumVAddrs-1].getNode())) {
4677 dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) {
46744678 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
46754679 IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l
4676 VAddrs.pop_back(); // remove 'lod'
4680 NumMIVAddrs--; // remove 'lod'
46774681 }
46784682 }
4683 }
4684
4685 // Check for 16 bit addresses and pack if true.
4686 unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs;
4687 MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType();
4688 if (VAddrVT.getScalarType() == MVT::f16 &&
4689 ST->hasFeature(AMDGPU::FeatureR128A16)) {
4690 IsA16 = true;
4691 for (unsigned i = AddrIdx; i < (AddrIdx + NumMIVAddrs); ++i) {
4692 SDValue AddrLo, AddrHi;
4693 // Push back extra arguments.
4694 if (i < DimIdx) {
4695 AddrLo = Op.getOperand(i);
4696 } else {
4697 AddrLo = Op.getOperand(i);
4698 // Dz/dh, dz/dv and the last odd coord are packed with undef. Also,
4699 // in 1D, derivatives dx/dh and dx/dv are packed with undef.
4700 if (((i + 1) >= (AddrIdx + NumMIVAddrs)) ||
4701 ((NumGradients / 2) % 2 == 1 &&
4702 (i == DimIdx + (NumGradients / 2) - 1 ||
4703 i == DimIdx + NumGradients - 1))) {
4704 AddrHi = DAG.getUNDEF(MVT::f16);
4705 } else {
4706 AddrHi = Op.getOperand(i + 1);
4707 i++;
4708 }
4709 AddrLo = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f16,
4710 {AddrLo, AddrHi});
4711 AddrLo = DAG.getBitcast(MVT::i32, AddrLo);
4712 }
4713 VAddrs.push_back(AddrLo);
4714 }
4715 } else {
4716 for (unsigned i = 0; i < NumMIVAddrs; ++i)
4717 VAddrs.push_back(Op.getOperand(AddrIdx + i));
46794718 }
46804719
46814720 SDValue VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
47244763 Ops.push_back(Unorm);
47254764 Ops.push_back(GLC);
47264765 Ops.push_back(SLC);
4727 Ops.push_back(False); // r128
4766 Ops.push_back(IsA16 && // a16 or r128
4767 ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
47284768 Ops.push_back(False); // tfe
47294769 Ops.push_back(False); // lwe
47304770 Ops.push_back(DimInfo->DA ? True : False);
753753 def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
754754 def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
755755 def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
756 def R128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
756 def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
757757 def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>;
758758 def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
759759 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
1
2 ; GCN-LABEL: {{^}}gather4_2d:
3 ; GCN: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
4 define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
5 main_body:
6 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
7 ret <4 x float> %v
8 }
9
10 ; GCN-LABEL: {{^}}gather4_cube:
11 ; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
12 define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
13 main_body:
14 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
15 ret <4 x float> %v
16 }
17
18 ; GCN-LABEL: {{^}}gather4_2darray:
19 ; GCN: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da{{$}}
20 define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
21 main_body:
22 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
23 ret <4 x float> %v
24 }
25
26 ; GCN-LABEL: {{^}}gather4_c_2d:
27 ; GCN: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
28 define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
29 main_body:
30 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
31 ret <4 x float> %v
32 }
33
34 ; GCN-LABEL: {{^}}gather4_cl_2d:
35 ; GCN: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
36 define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
37 main_body:
38 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
39 ret <4 x float> %v
40 }
41
42 ; GCN-LABEL: {{^}}gather4_c_cl_2d:
43 ; GCN: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
44 define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
45 main_body:
46 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
47 ret <4 x float> %v
48 }
49
50 ; GCN-LABEL: {{^}}gather4_b_2d:
51 ; GCN: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
52 define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
53 main_body:
54 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
55 ret <4 x float> %v
56 }
57
58 ; GCN-LABEL: {{^}}gather4_c_b_2d:
59 ; GCN: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
60 define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
61 main_body:
62 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
63 ret <4 x float> %v
64 }
65
66 ; GCN-LABEL: {{^}}gather4_b_cl_2d:
67 ; GCN: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
68 define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
69 main_body:
70 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
71 ret <4 x float> %v
72 }
73
74 ; GCN-LABEL: {{^}}gather4_c_b_cl_2d:
75 ; GCN: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
76 define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
77 main_body:
78 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
79 ret <4 x float> %v
80 }
81
82 ; GCN-LABEL: {{^}}gather4_l_2d:
83 ; GCN: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
84 define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
85 main_body:
86 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
87 ret <4 x float> %v
88 }
89
90 ; GCN-LABEL: {{^}}gather4_c_l_2d:
91 ; GCN: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16{{$}}
92 define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
93 main_body:
94 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
95 ret <4 x float> %v
96 }
97
98 ; GCN-LABEL: {{^}}gather4_lz_2d:
99 ; GCN: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16{{$}}
100 define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
101 main_body:
102 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
103 ret <4 x float> %v
104 }
105
106 ; GCN-LABEL: {{^}}gather4_c_lz_2d:
107 ; GCN: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16{{$}}
108 define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
109 main_body:
110 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
111 ret <4 x float> %v
112 }
113
114 declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
115 declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
116 declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
117
118 declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
119 declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
120 declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
121
122 declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
123 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
124 declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
125 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
126
127 declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
128 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
129
130 declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
131 declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
132
133 attributes #0 = { nounwind }
134 attributes #1 = { nounwind readonly }
135 attributes #2 = { nounwind readnone }
0 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
1 ; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
2 define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
3 main_body:
4 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
5 ret <4 x float> %v
6 }
7
8 ; GCN-LABEL: {{^}}sample_2d:
9 ; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
10 define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
11 main_body:
12 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
13 ret <4 x float> %v
14 }
15
16 ; GCN-LABEL: {{^}}sample_3d:
17 ; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
18 define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
19 main_body:
20 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
21 ret <4 x float> %v
22 }
23
24 ; GCN-LABEL: {{^}}sample_cube:
25 ; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
26 define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
27 main_body:
28 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
29 ret <4 x float> %v
30 }
31
32 ; GCN-LABEL: {{^}}sample_1darray:
33 ; GCN: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16 da{{$}}
34 define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
35 main_body:
36 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
37 ret <4 x float> %v
38 }
39
40 ; GCN-LABEL: {{^}}sample_2darray:
41 ; GCN: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16 da{{$}}
42 define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
43 main_body:
44 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
45 ret <4 x float> %v
46 }
47
48 ; GCN-LABEL: {{^}}sample_c_1d:
49 ; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
50 define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
51 main_body:
52 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
53 ret <4 x float> %v
54 }
55
56 ; GCN-LABEL: {{^}}sample_c_2d:
57 ; GCN: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
58 define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
59 main_body:
60 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
61 ret <4 x float> %v
62 }
63
64 ; GCN-LABEL: {{^}}sample_cl_1d:
65 ; GCN: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
66 define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
67 main_body:
68 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
69 ret <4 x float> %v
70 }
71
72 ; GCN-LABEL: {{^}}sample_cl_2d:
73 ; GCN: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
74 define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
75 main_body:
76 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
77 ret <4 x float> %v
78 }
79
80 ; GCN-LABEL: {{^}}sample_c_cl_1d:
81 ; GCN: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
82 define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
83 main_body:
84 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
85 ret <4 x float> %v
86 }
87
88 ; GCN-LABEL: {{^}}sample_c_cl_2d:
89 ; GCN: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
90 define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
91 main_body:
92 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
93 ret <4 x float> %v
94 }
95
96 ; GCN-LABEL: {{^}}sample_b_1d:
97 ; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
98 define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
99 main_body:
100 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
101 ret <4 x float> %v
102 }
103
104 ; GCN-LABEL: {{^}}sample_b_2d:
105 ; GCN: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
106 define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
107 main_body:
108 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
109 ret <4 x float> %v
110 }
111
112 ; GCN-LABEL: {{^}}sample_c_b_1d:
113 ; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
114 define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
115 main_body:
116 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
117 ret <4 x float> %v
118 }
119
120 ; GCN-LABEL: {{^}}sample_c_b_2d:
121 ; GCN: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
122 define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
123 main_body:
124 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
125 ret <4 x float> %v
126 }
127
128 ; GCN-LABEL: {{^}}sample_b_cl_1d:
129 ; GCN: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
130 define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
131 main_body:
132 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
133 ret <4 x float> %v
134 }
135
136 ; GCN-LABEL: {{^}}sample_b_cl_2d:
137 ; GCN: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
138 define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
139 main_body:
140 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
141 ret <4 x float> %v
142 }
143
144 ; GCN-LABEL: {{^}}sample_c_b_cl_1d:
145 ; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
146 define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
147 main_body:
148 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
149 ret <4 x float> %v
150 }
151
152 ; GCN-LABEL: {{^}}sample_c_b_cl_2d:
153 ; GCN: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
154 define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
155 main_body:
156 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
157 ret <4 x float> %v
158 }
159
160 ; GCN-LABEL: {{^}}sample_d_1d:
161 ; GCN: image_sample_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
162 define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
163 main_body:
164 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
165 ret <4 x float> %v
166 }
167
168 ; GCN-LABEL: {{^}}sample_d_2d:
169 ; GCN: image_sample_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
170 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
171 main_body:
172 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
173 ret <4 x float> %v
174 }
175
176 ; GCN-LABAL: {{^}}sample_d_3d:
177 ; GCN: image_sample_d v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
178 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
179 main_body:
180 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
181 ret <4 x float> %v
182 }
183
184 ; GCN-LABEL: {{^}}sample_c_d_1d:
185 ; GCN: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
186 define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
187 main_body:
188 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
189 ret <4 x float> %v
190 }
191
192 ; GCN-LABEL: {{^}}sample_c_d_2d:
193 ; GCN: image_sample_c_d v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
194 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
195 main_body:
196 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
197 ret <4 x float> %v
198 }
199
200 ; GCN-LABEL: {{^}}sample_d_cl_1d:
201 ; GCN: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
202 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
203 main_body:
204 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
205 ret <4 x float> %v
206 }
207
208 ; GCN-LABEL: {{^}}sample_d_cl_2d:
209 ; GCN: image_sample_d_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
210 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
211 main_body:
212 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
213 ret <4 x float> %v
214 }
215
216 ; GCN-LABEL: {{^}}sample_c_d_cl_1d:
217 ; GCN: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
218 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
219 main_body:
220 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
221 ret <4 x float> %v
222 }
223
224 ; GCN-LABEL: {{^}}sample_c_d_cl_2d:
225 ; GCN: image_sample_c_d_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
226 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
227 main_body:
228 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
229 ret <4 x float> %v
230 }
231
232 ; GCN-LABEL: {{^}}sample_cd_1d:
233 ; GCN: image_sample_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
234 define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
235 main_body:
236 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
237 ret <4 x float> %v
238 }
239
240 ; GCN-LABEL: {{^}}sample_cd_2d:
241 ; GCN: image_sample_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
242 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
243 main_body:
244 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
245 ret <4 x float> %v
246 }
247
248 ; GCN-LABEL: {{^}}sample_c_cd_1d:
249 ; GCN: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
250 define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
251 main_body:
252 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
253 ret <4 x float> %v
254 }
255
256 ; GCN-LABEL: {{^}}sample_c_cd_2d:
257 ; GCN: image_sample_c_cd v[0:3], v[1:4], s[0:7], s[8:11] dmask:0xf a16{{$}}
258 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
259 main_body:
260 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
261 ret <4 x float> %v
262 }
263
264 ; GCN-LABEL: {{^}}sample_cd_cl_1d:
265 ; GCN: image_sample_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
266 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
267 main_body:
268 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
269 ret <4 x float> %v
270 }
271
272 ; GCN-LABEL: {{^}}sample_cd_cl_2d:
273 ; GCN: image_sample_cd_cl v[0:3], v[2:5], s[0:7], s[8:11] dmask:0xf a16{{$}}
274 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
275 main_body:
276 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
277 ret <4 x float> %v
278 }
279
280 ; GCN-LABEL: {{^}}sample_c_cd_cl_1d:
281 ; GCN: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
282 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
283 main_body:
284 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
285 ret <4 x float> %v
286 }
287
288 ; GCN-LABEL: {{^}}sample_c_cd_cl_2d:
289 ; GCN: image_sample_c_cd_cl v[0:3], v[2:9], s[0:7], s[8:11] dmask:0xf a16{{$}}
290 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
291 main_body:
292 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
293 ret <4 x float> %v
294 }
295
296 ; GCN-LABEL: {{^}}sample_l_1d:
297 ; GCN: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
298 define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
299 main_body:
300 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
301 ret <4 x float> %v
302 }
303
304 ; GCN-LABEL: {{^}}sample_l_2d:
305 ; GCN: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
306 define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
307 main_body:
308 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
309 ret <4 x float> %v
310 }
311
312 ; GCN-LABEL: {{^}}sample_c_l_1d:
313 ; GCN: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
314 define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
315 main_body:
316 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
317 ret <4 x float> %v
318 }
319
320 ; GCN-LABEL: {{^}}sample_c_l_2d:
321 ; GCN: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16{{$}}
322 define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
323 main_body:
324 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
325 ret <4 x float> %v
326 }
327
328 ; GCN-LABEL: {{^}}sample_lz_1d:
329 ; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
330 define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
331 main_body:
332 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
333 ret <4 x float> %v
334 }
335
336 ; GCN-LABEL: {{^}}sample_lz_2d:
337 ; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16{{$}}
338 define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
339 main_body:
340 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
341 ret <4 x float> %v
342 }
343
344 ; GCN-LABEL: {{^}}sample_c_lz_1d:
345 ; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
346 define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
347 main_body:
348 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
349 ret <4 x float> %v
350 }
351
352 ; GCN-LABEL: {{^}}sample_c_lz_2d:
353 ; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16{{$}}
354 define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
355 main_body:
356 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
357 ret <4 x float> %v
358 }
359
360 ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V1:
361 ; GCN: image_sample_c_d_o v0, v[2:9], s[0:7], s[8:11] dmask:0x4 a16 da{{$}}
362 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
363 main_body:
364 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
365 ret float %v
366 }
367
368 ; GCN-LABEL: {{^}}sample_c_d_o_2darray_V2:
369 ; GCN: image_sample_c_d_o v[0:1], v[2:9], s[0:7], s[8:11] dmask:0x6 a16 da{{$}}
370 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
371 main_body:
372 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
373 ret <2 x float> %v
374 }
375
376 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
377 declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
378 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
379 declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
380 declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
381 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
382 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
383
384 declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
385 declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
386 declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
387 declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
388 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
389 declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
390
391 declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
392 declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
393 declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
394 declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
395 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
396 declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
397 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
398 declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
399
400 declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
401 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
402 declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
403 declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
404 declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
405 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
406 declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
407 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
408 declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
409
410 declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
411 declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
412 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
413 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
414 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
415 declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
416 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
417 declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
418
419 declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
420 declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
421 declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
422 declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
423
424 declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
425 declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
426 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
427 declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
428
429 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
430 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
431
432 attributes #0 = { nounwind }
433 attributes #1 = { nounwind readonly }
434 attributes #2 = { nounwind readnone }
267267 // GFX9: image_sample v[193:194], v[237:240], s[28:35], s[4:7] dmask:0x7 d16 ; encoding: [0x00,0x07,0x80,0xf0,0xed,0xc1,0x27,0x80]
268268
269269 //===----------------------------------------------------------------------===//
270 // Image Sample: a16
271 //===----------------------------------------------------------------------===//
272
273 image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
274 // GFX9: image_sample v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x80,0xf0,0xed,0xc1,0x27,0x00]
275 // NOSICI: error: a16 modifier is not supported on this GPU
276 // NOVI: error: a16 modifier is not supported on this GPU
277
278 image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
279 // GFX9: image_sample_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0x88,0xf0,0xed,0xc1,0x27,0x00]
280 // NOSICI: error: a16 modifier is not supported on this GPU
281 // NOVI: error: a16 modifier is not supported on this GPU
282
283 image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
284 // GFX9: image_sample_c_d v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xa8,0xf0,0xed,0xc1,0x27,0x00]
285 // NOSICI: error: a16 modifier is not supported on this GPU
286 // NOVI: error: a16 modifier is not supported on this GPU
287
288 image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16
289 // GFX9: image_sample_c_d_cl v[193:196], v[237:240], s[28:35], s[4:7] dmask:0xf a16 ; encoding: [0x00,0x8f,0xac,0xf0,0xed,0xc1,0x27,0x00]
290 // NOSICI: error: a16 modifier is not supported on this GPU
291 // NOVI: error: a16 modifier is not supported on this GPU
292
293 //===----------------------------------------------------------------------===//
270294 // Image Atomics
271295 //===----------------------------------------------------------------------===//
272296
371395 // NOGFX8_0: error: image data size does not match dmask and tfe
372396 // NOGFX8_1: error: image data size does not match dmask and tfe
373397 // NOGFX9: error: image data size does not match dmask and tfe
398
399 image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16
400 // GFX9: image_gather4 v[5:8], v1, s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x00,0xf1,0x01,0x05,0x62,0x00]
401 // NOSICI: error: a16 modifier is not supported on this GPU
402 // NOVI: error: a16 modifier is not supported on this GPU
403
404 image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16
405 // GFX9: image_gather4_b_cl v[5:8], v[1:4], s[8:15], s[12:15] dmask:0x1 a16 ; encoding: [0x00,0x81,0x18,0xf1,0x01,0x05,0x62,0x00]
406 // NOSICI: error: a16 modifier is not supported on this GPU
407 // NOVI: error: a16 modifier is not supported on this GPU