llvm.org GIT mirror llvm / 6269b4f
AMDGPU/GlobalISel: Implement select() for >32-bit G_STORE Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D46153 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332154 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 2 years ago
3 changed file(s) with 47 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
159159
160160 bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
161161 MachineBasicBlock *BB = I.getParent();
162 MachineFunction *MF = BB->getParent();
163 MachineRegisterInfo &MRI = MF->getRegInfo();
162164 DebugLoc DL = I.getDebugLoc();
165 unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
166 unsigned Opcode;
163167
164168 // FIXME: Select store instruction based on address space
165 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
169 switch (StoreSize) {
170 default:
171 return false;
172 case 32:
173 Opcode = AMDGPU::FLAT_STORE_DWORD;
174 break;
175 case 64:
176 Opcode = AMDGPU::FLAT_STORE_DWORDX2;
177 break;
178 case 96:
179 Opcode = AMDGPU::FLAT_STORE_DWORDX3;
180 break;
181 case 128:
182 Opcode = AMDGPU::FLAT_STORE_DWORDX4;
183 break;
184 }
185
186 MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
166187 .add(I.getOperand(1))
167188 .add(I.getOperand(0))
168189 .addImm(0) // offset
15781578 case 64:
15791579 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
15801580 &AMDGPU::SReg_64_XEXECRegClass;
1581 case 96:
1582 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
1583 nullptr;
1584 case 128:
1585 return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
1586 &AMDGPU::SReg_128RegClass;
15811587 default:
15821588 llvm_unreachable("not implemented");
15831589 }
33 # REQUIRES: global-isel
44
55 --- |
6 define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
6 define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0,
7 i64 addrspace(1)* %global1,
8 i96 addrspace(1)* %global2,
9 i128 addrspace(1)* %global3) { ret void }
710 ...
811 ---
912
1316
1417 # GCN: global_addrspace
1518 # GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
16 # GCN: [[VAL:%[0-9]+]]:vgpr_32 = COPY $vgpr2
17 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0
19 # GCN: [[VAL4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
20 # GCN: [[VAL8:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
21 # GCN: [[VAL12:%[0-9]+]]:vreg_96 = COPY $vgpr5_vgpr6_vgpr7
22 # GCN: [[VAL16:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
23 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL4]], 0, 0, 0
24 # GCN: FLAT_STORE_DWORDX2 [[PTR]], [[VAL8]], 0, 0, 0
25 # GCN: FLAT_STORE_DWORDX3 [[PTR]], [[VAL12]], 0, 0, 0
26 # GCN: FLAT_STORE_DWORDX4 [[PTR]], [[VAL16]], 0, 0, 0
1827
1928 body: |
2029 bb.0:
21 liveins: $vgpr0_vgpr1, $vgpr2
30 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
2231
2332 %0:vgpr(p1) = COPY $vgpr0_vgpr1
2433 %1:vgpr(s32) = COPY $vgpr2
34 %2:vgpr(s64) = COPY $vgpr3_vgpr4
35 %3:vgpr(s96) = COPY $vgpr5_vgpr6_vgpr7
36 %4:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
2537 G_STORE %1, %0 :: (store 4 into %ir.global0)
38 G_STORE %2, %0 :: (store 8 into %ir.global1)
39 G_STORE %3, %0 :: (store 12 into %ir.global2, align 16)
40 G_STORE %4, %0 :: (store 16 into %ir.global3)
2641
2742 ...
2843 ---