llvm.org GIT mirror llvm / a3c2bcf
R600/SI: expose TBUFFER_STORE_FORMAT_* for OpenGL transform feedback For _XYZ, the type of VDATA is v4i32, because v3i32 doesn't exist. The ADDR64 bit is not exposed. A simpler intrinsic that doesn't take a resource descriptor might be nicer. The maximum number of input SGPRs is bumped to 17. Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190575 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
8 changed file(s) with 159 addition(s) and 7 deletion(s). Raw diff Collapse all Expand all
1818
1919 CCIfInReg
2020 SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
21 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
21 SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
22 SGPR16
2223 ]>>>,
2324
2425 CCIfInReg
723723 NODE_NAME_CASE(SAMPLED)
724724 NODE_NAME_CASE(SAMPLEL)
725725 NODE_NAME_CASE(STORE_MSKOR)
726 }
727 }
726 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
727 }
728 }
159159 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
160160 STORE_MSKOR,
161161 LOAD_CONSTANT,
162 TBUFFER_STORE_FORMAT,
162163 LAST_AMDGPU_ISD_NUMBER
163164 };
164165
8484 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
8585 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
8686 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
87
88 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
8789
8890 setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
8991
462464 Op.getOperand(3));
463465 }
464466 }
467
468 case ISD::INTRINSIC_VOID:
469 SDValue Chain = Op.getOperand(0);
470 unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue();
471
472 switch (IntrinsicID) {
473 case AMDGPUIntrinsic::SI_tbuffer_store: {
474 SDLoc DL(Op);
475 SDValue Ops [] = {
476 Chain,
477 ResourceDescriptorToi128(Op.getOperand(2), DAG),
478 Op.getOperand(3),
479 Op.getOperand(4),
480 Op.getOperand(5),
481 Op.getOperand(6),
482 Op.getOperand(7),
483 Op.getOperand(8),
484 Op.getOperand(9),
485 Op.getOperand(10),
486 Op.getOperand(11),
487 Op.getOperand(12),
488 Op.getOperand(13),
489 Op.getOperand(14)
490 };
491 EVT VT = Op.getOperand(3).getValueType();
492
493 MachineMemOperand *MMO = MF.getMachineMemOperand(
494 MachinePointerInfo(),
495 MachineMemOperand::MOStore,
496 VT.getSizeInBits() / 8, 4);
497 return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
498 Op->getVTList(), Ops,
499 sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
500 }
501 default:
502 break;
503 }
465504 }
466505 return SDValue();
467506 }
1818 def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
1919 SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
2020 [SDNPMayLoad, SDNPMemOperand]
21 >;
22
23 def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
24 SDTypeProfile<0, 13,
25 [SDTCisVT<0, i128>, // rsrc(SGPR)
26 SDTCisVT<1, iAny>, // vdata(VGPR)
27 SDTCisVT<2, i32>, // num_channels(imm)
28 SDTCisVT<3, i32>, // vaddr(VGPR)
29 SDTCisVT<4, i32>, // soffset(SGPR)
30 SDTCisVT<5, i32>, // inst_offset(imm)
31 SDTCisVT<6, i32>, // dfmt(imm)
32 SDTCisVT<7, i32>, // nfmt(imm)
33 SDTCisVT<8, i32>, // offen(imm)
34 SDTCisVT<9, i32>, // idxen(imm)
35 SDTCisVT<10, i32>, // glc(imm)
36 SDTCisVT<11, i32>, // slc(imm)
37 SDTCisVT<12, i32> // tfe(imm)
38 ]>,
39 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
2140 >;
2241
2342 def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
6382 N->getZExtValue() >> 2, MVT::i32);
6483 }]>
6584 >;
85
86 def as_i1imm : SDNodeXForm
87 return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
88 }]>;
89
90 def as_i8imm : SDNodeXForm
91 return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
92 }]>;
6693
6794 def as_i16imm : SDNodeXForm
6895 return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
476476 //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
477477 //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
478478 def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
479 //def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
480 //def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
481 //def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
482 //def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
479 def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
480 def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
481 def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
482 def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
483483
484484 let mayLoad = 1 in {
485485
18801880 defm : MUBUFStore_Pattern ;
18811881 defm : MUBUFStore_Pattern ;
18821882
1883 //===----------------------------------------------------------------------===//
1884 // MTBUF Patterns
1885 //===----------------------------------------------------------------------===//
1886
1887 // TBUFFER_STORE_FORMAT_*, addr64=0
1888 class MTBUF_StoreResource : Pat<
1889 (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
1890 i32:$soffset, imm:$inst_offset, imm:$dfmt,
1891 imm:$nfmt, imm:$offen, imm:$idxen,
1892 imm:$glc, imm:$slc, imm:$tfe),
1893 (opcode
1894 $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
1895 (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
1896 (as_i1imm $slc), (as_i1imm $tfe), $soffset)
1897 >;
1898
1899 def : MTBUF_StoreResource ;
1900 def : MTBUF_StoreResource ;
1901 def : MTBUF_StoreResource ;
1902 def : MTBUF_StoreResource ;
1903
18831904 /********** ====================== **********/
18841905 /********** Indirect adressing **********/
18851906 /********** ====================== **********/
1818 def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
1919 def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
2020 def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
21
22 // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
23 def int_SI_tbuffer_store : Intrinsic <
24 [],
25 [llvm_anyint_ty, // rsrc(SGPR)
26 llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
27 llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
28 llvm_i32_ty, // vaddr(VGPR)
29 llvm_i32_ty, // soffset(SGPR)
30 llvm_i32_ty, // inst_offset(imm)
31 llvm_i32_ty, // dfmt(imm)
32 llvm_i32_ty, // nfmt(imm)
33 llvm_i32_ty, // offen(imm)
34 llvm_i32_ty, // idxen(imm)
35 llvm_i32_ty, // glc(imm)
36 llvm_i32_ty, // slc(imm)
37 llvm_i32_ty], // tfe(imm)
38 []>;
2139
2240 class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
2341
0 ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
1
2 ;CHECK_LABEL: @test1
3 ;CHECK: TBUFFER_STORE_FORMAT_XYZW {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 32, -1, 0, -1, 0, 14, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
4 define void @test1(i32 %a1, i32 %vaddr) {
5 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
6 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
7 i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
8 i32 1, i32 0)
9 ret void
10 }
11
12 ;CHECK_LABEL: @test2
13 ;CHECK: TBUFFER_STORE_FORMAT_XYZ {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 24, -1, 0, -1, 0, 13, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
14 define void @test2(i32 %a1, i32 %vaddr) {
15 %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
16 call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
17 i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
18 i32 1, i32 0)
19 ret void
20 }
21
22 ;CHECK_LABEL: @test3
23 ;CHECK: TBUFFER_STORE_FORMAT_XY {{VGPR[0-9]+_VGPR[0-9]+}}, 16, -1, 0, -1, 0, 11, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
24 define void @test3(i32 %a1, i32 %vaddr) {
25 %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
26 call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
27 i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
28 i32 1, i32 0)
29 ret void
30 }
31
32 ;CHECK_LABEL: @test4
33 ;CHECK: TBUFFER_STORE_FORMAT_X {{VGPR[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
34 define void @test4(i32 %vdata, i32 %vaddr) {
35 call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
36 i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
37 i32 1, i32 0)
38 ret void
39 }
40
41 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
42 declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
43 declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)