llvm.org GIT mirror llvm / 4d0e8a8
R600/SI: dynamical figure out the reg class of MIMG Depending on the number of bits set in the writemask. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179166 91177308-0d34-0410-b5e6-96231b3b80d8 Christian Konig 7 years ago
7 changed file(s) with 78 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
106106 } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
107107 isSGPR = false;
108108 width = 2;
109 } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
110 isSGPR = false;
111 width = 3;
109112 } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
110113 isSGPR = true;
111114 width = 4;
719719 void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
720720 SelectionDAG &DAG) const {
721721 SDNode *Users[4] = { };
722 unsigned Writemask = 0;
722 unsigned Writemask = 0, Lane = 0;
723723
724724 // Try to figure out the used register components
725725 for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
730730 I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
731731 return;
732732
733 unsigned Lane = SubIdx2Lane(I->getConstantOperandVal(1));
733 Lane = SubIdx2Lane(I->getConstantOperandVal(1));
734734
735735 // Abort if we have more than one user per component
736736 if (Users[Lane])
751751 Ops.push_back(Node->getOperand(i));
752752 Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
753753
754 // If we only got one lane, replace it with a copy
755 if (Writemask == (1U << Lane)) {
756 SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
757 SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
758 DebugLoc(), MVT::f32,
759 SDValue(Node, 0), RC);
760 DAG.ReplaceAllUsesWith(Users[Lane], Copy);
761 return;
762 }
763
754764 // Update the users of the node with the new indices
755765 for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
756766
779789
780790 return foldOperands(Node, DAG);
781791 }
792
793 /// \brief Assign the register class depending on the number of
794 /// bits set in the writemask
795 void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
796 SDNode *Node) const {
797 if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
798 return;
799
800 unsigned VReg = MI->getOperand(0).getReg();
801 unsigned Writemask = MI->getOperand(1).getImm();
802 unsigned BitsSet = 0;
803 for (unsigned i = 0; i < 4; ++i)
804 BitsSet += Writemask & (1 << i) ? 1 : 0;
805
806 const TargetRegisterClass *RC;
807 switch (BitsSet) {
808 default: return;
809 case 1: RC = &AMDGPU::VReg_32RegClass; break;
810 case 2: RC = &AMDGPU::VReg_64RegClass; break;
811 case 3: RC = &AMDGPU::VReg_96RegClass; break;
812 }
813
814 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
815 MRI.setRegClass(VReg, RC);
816 }
5252 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
5353 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
5454 virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
55 virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
56 SDNode *Node) const;
5557
5658 int32_t analyzeImmediate(const SDNode *N) const;
5759 };
5757 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
5858 };
5959
60 const int16_t Sub0_2[] = {
61 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
62 };
63
6064 const int16_t Sub0_1[] = {
6165 AMDGPU::sub0, AMDGPU::sub1, 0
6266 };
123127 AMDGPU::SReg_64RegClass.contains(SrcReg));
124128 Opcode = AMDGPU::V_MOV_B32_e32;
125129 SubIndices = Sub0_1;
130
131 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
132 assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
133 Opcode = AMDGPU::V_MOV_B32_e32;
134 SubIndices = Sub0_2;
126135
127136 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
128137 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
345345 []> {
346346 let mayLoad = 1;
347347 let mayStore = 0;
348 let hasPostISelHook = 1;
348349 }
349350
350351 //===----------------------------------------------------------------------===//
378379 let ValueCols = [["1"]];
379380 }
380381
382 // Test if the supplied opcode is an MIMG instruction
381383 def isMIMG : InstrMapping {
382384 let FilterClass = "MIMG_Load_Helper";
383385 let RowFields = ["Inst"];
9393 [(add (trunc VGPR_32, 255)),
9494 (add (shl VGPR_32, 1))]>;
9595
96 // VGPR 96-bit registers
97 def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
98 [(add (trunc VGPR_32, 254)),
99 (add (shl VGPR_32, 1)),
100 (add (shl VGPR_32, 2))]>;
101
96102 // VGPR 128-bit registers
97103 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
98104 [(add (trunc VGPR_32, 253)),
161167
162168 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
163169
170 def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
171 let Size = 96;
172 }
173
164174 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
165175
166176 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
0 ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
11
22 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
3 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 3
4 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 2
5 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 1
6 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 4
7 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
8 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 5
9 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 9
10 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 6
11 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 10
12 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 12
13 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
14 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
15 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
16 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
17 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
3 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
4 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
5 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
6 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
7 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
8 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
9 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
10 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
11 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
12 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
13 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
14 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
15 ;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
16 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
17 ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
1818
1919 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
2020 %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0