llvm.org GIT mirror llvm / df4626e
R600/SI: Assign a register class to the $vaddr operand for MIMG instructions The previous code declared the operand as unknown:$vaddr, which made it possible for scalar registers to be used instead of vector registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188425 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
8 changed file(s) with 157 addition(s) and 64 deletion(s). Raw diff Collapse all Expand all
99
1010 #ifndef SIDEFINES_H_
1111 #define SIDEFINES_H_
12
13 namespace SIInstrFlags {
14 enum {
15 MIMG = 1 << 3
16 };
17 }
1218
1319 #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
1420 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
10211021 /// \brief Fold the instructions after slecting them
10221022 SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
10231023 SelectionDAG &DAG) const {
1024 const SIInstrInfo *TII =
1025 static_cast(getTargetMachine().getInstrInfo());
10241026 Node = AdjustRegClass(Node, DAG);
10251027
1026 if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
1028 if (TII->isMIMG(Node->getMachineOpcode()))
10271029 adjustWritemask(Node, DAG);
10281030
10291031 return foldOperands(Node, DAG);
10331035 /// bits set in the writemask
10341036 void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
10351037 SDNode *Node) const {
1036 if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
1038 const SIInstrInfo *TII =
1039 static_cast(getTargetMachine().getInstrInfo());
1040 if (!TII->isMIMG(MI->getOpcode()))
10371041 return;
10381042
10391043 unsigned VReg = MI->getOperand(0).getReg();
1616 field bits<1> VM_CNT = 0;
1717 field bits<1> EXP_CNT = 0;
1818 field bits<1> LGKM_CNT = 0;
19 field bits<1> MIMG = 0;
1920
2021 let TSFlags{0} = VM_CNT;
2122 let TSFlags{1} = EXP_CNT;
2223 let TSFlags{2} = LGKM_CNT;
24 let TSFlags{3} = MIMG;
2325 }
2426
2527 class Enc32 pattern> :
413415
414416 let VM_CNT = 1;
415417 let EXP_CNT = 1;
418 let MIMG = 1;
416419 }
417420
418421 def EXP : Enc64<
1414
1515 #include "SIInstrInfo.h"
1616 #include "AMDGPUTargetMachine.h"
17 #include "SIDefines.h"
1718 #include "llvm/CodeGen/MachineInstrBuilder.h"
1819 #include "llvm/CodeGen/MachineRegisterInfo.h"
1920 #include "llvm/MC/MCInstrDesc.h"
223224 return RC != &AMDGPU::EXECRegRegClass;
224225 }
225226
227 int SIInstrInfo::isMIMG(uint16_t Opcode) const {
228 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
229 }
230
226231 //===----------------------------------------------------------------------===//
227232 // Indirect addressing callbacks
228233 //===----------------------------------------------------------------------===//
4646 virtual bool isMov(unsigned Opcode) const;
4747
4848 virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
49 int isMIMG(uint16_t Opcode) const;
4950
5051 virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
5152
7980 int getVOPe64(uint16_t Opcode);
8081 int getCommuteRev(uint16_t Opcode);
8182 int getCommuteOrig(uint16_t Opcode);
82 int isMIMG(uint16_t Opcode);
8383
8484 } // End namespace AMDGPU
8585
396396 let mayStore = 0;
397397 }
398398
399 class MIMG_NoSampler_Helper op, string asm> : MIMG <
399 class MIMG_NoSampler_Helper op, string asm,
400 RegisterClass src_rc> : MIMG <
400401 op,
401402 (outs VReg_128:$vdata),
402403 (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
403 i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
404 i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
404405 SReg_256:$srsrc),
405406 asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
406407 #" $tfe, $lwe, $slc, $vaddr, $srsrc",
411412 let hasPostISelHook = 1;
412413 }
413414
414 class MIMG_Sampler_Helper op, string asm> : MIMG <
415 multiclass MIMG_NoSampler op, string asm> {
416 def _V1 : MIMG_NoSampler_Helper ;
417 def _V2 : MIMG_NoSampler_Helper ;
418 def _V4 : MIMG_NoSampler_Helper ;
419 }
420
421 class MIMG_Sampler_Helper op, string asm,
422 RegisterClass src_rc> : MIMG <
415423 op,
416424 (outs VReg_128:$vdata),
417425 (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
418 i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
426 i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
419427 SReg_256:$srsrc, SReg_128:$ssamp),
420428 asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
421429 #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
423431 let mayLoad = 1;
424432 let mayStore = 0;
425433 let hasPostISelHook = 1;
434 }
435
436 multiclass MIMG_Sampler op, string asm> {
437 def _V1 : MIMG_Sampler_Helper ;
438 def _V2 : MIMG_Sampler_Helper ;
439 def _V4 : MIMG_Sampler_Helper ;
440 def _V8 : MIMG_Sampler_Helper ;
441 def _V16 : MIMG_Sampler_Helper ;
426442 }
427443
428444 //===----------------------------------------------------------------------===//
456472 let ValueCols = [["1"]];
457473 }
458474
459 // Test if the supplied opcode is an MIMG instruction
460 def isMIMG : InstrMapping {
461 let FilterClass = "MIMG";
462 let RowFields = ["Inst"];
463 let ColFields = ["Size"];
464 let KeyCol = ["8"];
465 let ValueCols = [["8"]];
466 }
467
468475 include "SIInstructions.td"
499499
500500 //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
501501 //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
502 def IMAGE_LOAD : MIMG_NoSampler_Helper <0x00000000, "IMAGE_LOAD">;
503 def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
502 defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
503 defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
504504 //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
505505 //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
506506 //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
509509 //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
510510 //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
511511 //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
512 def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
512 def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO", VReg_32>;
513513 //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
514514 //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
515515 //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
527527 //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
528528 //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
529529 //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
530 def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">;
530 defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
531531 //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
532 def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
532 defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
533533 //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
534 def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
535 def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
534 defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
535 defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
536536 //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
537537 //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
538 def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
538 defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
539539 //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
540 def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">;
540 defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
541541 //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
542 def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
543 def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
542 defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
543 defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
544544 //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
545545 //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
546546 //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
13261326 /* int_SI_sample for simple 1D texture lookup */
13271327 def : Pat <
13281328 (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
1329 (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
1329 (IMAGE_SAMPLE_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
13301330 >;
13311331
13321332 class SamplePattern : Pat <
13571357 >;
13581358
13591359 /* int_SI_sample* for texture lookups consuming more address parameters */
1360 multiclass SamplePatterns {
1361 def : SamplePattern ;
1362 def : SampleRectPattern ;
1363 def : SampleArrayPattern ;
1364 def : SampleShadowPattern ;
1365 def : SampleShadowArrayPattern ;
1366
1367 def : SamplePattern ;
1368 def : SampleArrayPattern ;
1369 def : SampleShadowPattern ;
1370 def : SampleShadowArrayPattern ;
1371
1372 def : SamplePattern ;
1373 def : SampleArrayPattern ;
1374 def : SampleShadowPattern ;
1375 def : SampleShadowArrayPattern ;
1376
1377 def : SamplePattern ;
1378 def : SampleArrayPattern ;
1379 def : SampleShadowPattern ;
1380 def : SampleShadowArrayPattern ;
1381 }
1382
1383 defm : SamplePatterns;
1384 defm : SamplePatterns;
1385 defm : SamplePatterns;
1386 defm : SamplePatterns;
1360 multiclass SamplePatterns
1361 MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
1362 MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
1363 def : SamplePattern ;
1364 def : SampleRectPattern ;
1365 def : SampleArrayPattern ;
1366 def : SampleShadowPattern ;
1367 def : SampleShadowArrayPattern ;
1368
1369 def : SamplePattern ;
1370 def : SampleArrayPattern ;
1371 def : SampleShadowPattern ;
1372 def : SampleShadowArrayPattern ;
1373
1374 def : SamplePattern ;
1375 def : SampleArrayPattern ;
1376 def : SampleShadowPattern ;
1377 def : SampleShadowArrayPattern ;
1378
1379 def : SamplePattern ;
1380 def : SampleArrayPattern ;
1381 def : SampleShadowPattern ;
1382 def : SampleShadowArrayPattern ;
1383 }
1384
1385 defm : SamplePatterns
1386 IMAGE_SAMPLE_L_V2, IMAGE_SAMPLE_C_L_V2,
1387 IMAGE_SAMPLE_B_V2, IMAGE_SAMPLE_C_B_V2,
1388 IMAGE_SAMPLE_D_V2, IMAGE_SAMPLE_C_D_V2,
1389 v2i32>;
1390 defm : SamplePatterns
1391 IMAGE_SAMPLE_L_V4, IMAGE_SAMPLE_C_L_V4,
1392 IMAGE_SAMPLE_B_V4, IMAGE_SAMPLE_C_B_V4,
1393 IMAGE_SAMPLE_D_V4, IMAGE_SAMPLE_C_D_V4,
1394 v4i32>;
1395 defm : SamplePatterns
1396 IMAGE_SAMPLE_L_V8, IMAGE_SAMPLE_C_L_V8,
1397 IMAGE_SAMPLE_B_V8, IMAGE_SAMPLE_C_B_V8,
1398 IMAGE_SAMPLE_D_V8, IMAGE_SAMPLE_C_D_V8,
1399 v8i32>;
1400 defm : SamplePatterns
1401 IMAGE_SAMPLE_L_V16, IMAGE_SAMPLE_C_L_V16,
1402 IMAGE_SAMPLE_B_V16, IMAGE_SAMPLE_C_B_V16,
1403 IMAGE_SAMPLE_D_V16, IMAGE_SAMPLE_C_D_V16,
1404 v16i32>;
13871405
13881406 /* int_SI_imageload for texture fetches consuming varying address parameters */
13891407 class ImageLoadPattern : Pat <
14061424 (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
14071425 >;
14081426
1409 multiclass ImageLoadPatterns {
1410 def : ImageLoadPattern ;
1411 def : ImageLoadArrayPattern ;
1412 def : ImageLoadMSAAPattern ;
1413 def : ImageLoadArrayMSAAPattern ;
1414 }
1415
1416 defm : ImageLoadPatterns;
1417 defm : ImageLoadPatterns;
1427 multiclass ImageLoadPatterns {
1428 def : ImageLoadPattern ;
1429 def : ImageLoadArrayPattern ;
1430 }
1431
1432 multiclass ImageLoadMSAAPatterns {
1433 def : ImageLoadMSAAPattern ;
1434 def : ImageLoadArrayMSAAPattern ;
1435 }
1436
1437 defm : ImageLoadPatterns;
1438 defm : ImageLoadPatterns;
1439
1440 defm : ImageLoadMSAAPatterns;
1441 defm : ImageLoadMSAAPatterns;
14181442
14191443 /* Image resource information */
14201444 def : Pat <
8181 ret void
8282 }
8383
84 ; Test that ccordinates are stored in vgprs and not sgprs
85 ; CHECK: vgpr_coords
86 ; CHECK: IMAGE_LOAD_MIP VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}, 15, 0, 0, 0, 0, 0, 0, 0, VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}_VGPR{{[0-9]}}
87 define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
88 main_body:
89 %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
90 %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !0
91 %22 = getelementptr float addrspace(2)* %21, i32 0
92 %23 = load float addrspace(2)* %22, !tbaa !0, !invariant.load !1
93 %24 = getelementptr float addrspace(2)* %21, i32 1
94 %25 = load float addrspace(2)* %24, !tbaa !0, !invariant.load !1
95 %26 = getelementptr float addrspace(2)* %21, i32 4
96 %27 = load float addrspace(2)* %26, !tbaa !0, !invariant.load !1
97 %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
98 %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0
99 %30 = bitcast float %27 to i32
100 %31 = bitcast float %23 to i32
101 %32 = bitcast float %25 to i32
102 %33 = insertelement <4 x i32> undef, i32 %31, i32 0
103 %34 = insertelement <4 x i32> %33, i32 %32, i32 1
104 %35 = insertelement <4 x i32> %34, i32 %30, i32 2
105 %36 = insertelement <4 x i32> %35, i32 undef, i32 3
106 %37 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> %36, <32 x i8> %29, i32 2)
107 %38 = extractelement <4 x i32> %37, i32 0
108 %39 = extractelement <4 x i32> %37, i32 1
109 %40 = extractelement <4 x i32> %37, i32 2
110 %41 = extractelement <4 x i32> %37, i32 3
111 %42 = bitcast i32 %38 to float
112 %43 = bitcast i32 %39 to float
113 %44 = bitcast i32 %40 to float
114 %45 = bitcast i32 %41 to float
115 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %42, float %43, float %44, float %45)
116 ret void
117 }
118
84119 declare <4 x i32> @llvm.SI.imageload.(<4 x i32>, <8 x i32>, i32) readnone
120 ; Function Attrs: nounwind readnone
121 declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
85122
86123 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
124
125 attributes #0 = { "ShaderType"="0" }
126 attributes #1 = { nounwind readnone }
127
128 !0 = metadata !{metadata !"const", null, i32 1}
129 !1 = metadata !{}
130