llvm.org GIT mirror llvm / 124ad83
AMDGPU: Make m0 unallocatable m0 may need to be written for spill code, so we don't want general code uses relying on the value stored in it. This introduces a few code quality regressions where copies from m0 are not coalesced into copies of a copy of m0. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287841 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
13 changed file(s) with 73 addition(s) and 60 deletion(s). Raw diff Collapse all Expand all
252252 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
253253 switch (NumVectorElts) {
254254 case 1:
255 return AMDGPU::SReg_32RegClassID;
255 return AMDGPU::SReg_32_XM0RegClassID;
256256 case 2:
257257 return AMDGPU::SReg_64RegClassID;
258258 case 4:
5858 addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
5959 addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
6060
61 addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
61 addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass);
6262 addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
6363
6464 addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
7878 addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
7979
8080 if (Subtarget->has16BitInsts()) {
81 addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
82 addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
81 addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass);
82 addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
8383 }
8484
8585 computeRegisterProperties(STI.getRegisterInfo());
940940 // Start adding system SGPRs.
941941 if (Info->hasWorkGroupIDX()) {
942942 unsigned Reg = Info->addWorkGroupIDX();
943 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
943 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
944944 CCInfo.AllocateReg(Reg);
945945 }
946946
947947 if (Info->hasWorkGroupIDY()) {
948948 unsigned Reg = Info->addWorkGroupIDY();
949 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
949 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
950950 CCInfo.AllocateReg(Reg);
951951 }
952952
953953 if (Info->hasWorkGroupIDZ()) {
954954 unsigned Reg = Info->addWorkGroupIDZ();
955 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
955 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
956956 CCInfo.AllocateReg(Reg);
957957 }
958958
959959 if (Info->hasWorkGroupInfo()) {
960960 unsigned Reg = Info->addWorkGroupInfo();
961 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
961 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
962962 CCInfo.AllocateReg(Reg);
963963 }
964964
24132413 SI::KernelInputOffsets::LOCAL_SIZE_Z);
24142414 case Intrinsic::amdgcn_workgroup_id_x:
24152415 case Intrinsic::r600_read_tgid_x:
2416 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
2416 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
24172417 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
24182418 case Intrinsic::amdgcn_workgroup_id_y:
24192419 case Intrinsic::r600_read_tgid_y:
2420 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
2420 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
24212421 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
24222422 case Intrinsic::amdgcn_workgroup_id_z:
24232423 case Intrinsic::r600_read_tgid_z:
2424 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
2424 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
24252425 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
24262426 case Intrinsic::amdgcn_workitem_id_x:
24272427 case Intrinsic::r600_read_tidig_x:
41814181 default:
41824182 return std::make_pair(0U, nullptr);
41834183 case 32:
4184 return std::make_pair(0U, &AMDGPU::SReg_32RegClass);
4184 return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
41854185 case 64:
41864186 return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
41874187 case 128:
363363 return;
364364 }
365365
366 if (RC == &AMDGPU::SReg_32RegClass) {
366 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
367 RC == &AMDGPU::SReg_32RegClass) {
367368 if (SrcReg == AMDGPU::SCC) {
368369 BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
369370 .addImm(-1)
232232
233233 MachineRegisterInfo &MRI = MF->getRegInfo();
234234 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
235 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
235 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
236236
237237 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
238238 .addImm(Offset);
116116
117117 def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
118118 let CopyCost = -1;
119 let isAllocatable = 0;
120 }
121
122 def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
123 let CopyCost = 1;
119124 let isAllocatable = 0;
120125 }
121126
258263
259264 // Register class for all scalar registers (SGPRs + Special Registers)
260265 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
261 (add SReg_32_XM0, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)> {
266 (add SReg_32_XM0, M0_CLASS)> {
262267 let AllocationPriority = 1;
268 let isAllocatable = 0;
263269 }
264270
265271 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
436436 MachineBasicBlock::iterator
437437 SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
438438 MachineBasicBlock::iterator Before) {
439 unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
439 unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
440440
441441 MachineInstr *Save =
442442 BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
2626 # CHECK: S_NOP 0, implicit undef %5.sub0
2727 name: test0
2828 registers:
29 - { id: 0, class: sreg_32 }
30 - { id: 1, class: sreg_32 }
31 - { id: 2, class: sreg_32 }
29 - { id: 0, class: sreg_32_xm0 }
30 - { id: 1, class: sreg_32_xm0 }
31 - { id: 2, class: sreg_32_xm0 }
3232 - { id: 3, class: sreg_128 }
3333 - { id: 4, class: sreg_64 }
3434 - { id: 5, class: sreg_64 }
8686 - { id: 0, class: sreg_128 }
8787 - { id: 1, class: sreg_128 }
8888 - { id: 2, class: sreg_64 }
89 - { id: 3, class: sreg_32 }
89 - { id: 3, class: sreg_32_xm0 }
9090 - { id: 4, class: sreg_128 }
9191 - { id: 5, class: sreg_64 }
92 - { id: 6, class: sreg_32 }
93 - { id: 7, class: sreg_32 }
92 - { id: 6, class: sreg_32_xm0 }
93 - { id: 7, class: sreg_32_xm0 }
9494 - { id: 8, class: sreg_64 }
95 - { id: 9, class: sreg_32 }
95 - { id: 9, class: sreg_32_xm0 }
9696 - { id: 10, class: sreg_128 }
9797 body: |
9898 bb.0:
161161
162162 name: test2
163163 registers:
164 - { id: 0, class: sreg_32 }
165 - { id: 1, class: sreg_32 }
164 - { id: 0, class: sreg_32_xm0 }
165 - { id: 1, class: sreg_32_xm0 }
166166 - { id: 2, class: sreg_64 }
167167 - { id: 3, class: sreg_128 }
168 - { id: 4, class: sreg_32 }
169 - { id: 5, class: sreg_32 }
168 - { id: 4, class: sreg_32_xm0 }
169 - { id: 5, class: sreg_32_xm0 }
170170 - { id: 6, class: sreg_64 }
171171 - { id: 7, class: sreg_128 }
172172 - { id: 8, class: sreg_64 }
259259 name: test5
260260 tracksRegLiveness: true
261261 registers:
262 - { id: 0, class: sreg_32 }
262 - { id: 0, class: sreg_32_xm0 }
263263 - { id: 1, class: sreg_64 }
264264 body: |
265265 bb.0:
285285 name: loop0
286286 tracksRegLiveness: true
287287 registers:
288 - { id: 0, class: sreg_32 }
289 - { id: 1, class: sreg_32 }
290 - { id: 2, class: sreg_32 }
288 - { id: 0, class: sreg_32_xm0 }
289 - { id: 1, class: sreg_32_xm0 }
290 - { id: 2, class: sreg_32_xm0 }
291291 - { id: 3, class: sreg_128 }
292292 - { id: 4, class: sreg_128 }
293293 - { id: 5, class: sreg_128 }
338338 name: loop1
339339 tracksRegLiveness: true
340340 registers:
341 - { id: 0, class: sreg_32 }
342 - { id: 1, class: sreg_32 }
343 - { id: 2, class: sreg_32 }
344 - { id: 3, class: sreg_32 }
341 - { id: 0, class: sreg_32_xm0 }
342 - { id: 1, class: sreg_32_xm0 }
343 - { id: 2, class: sreg_32_xm0 }
344 - { id: 3, class: sreg_32_xm0 }
345345 - { id: 4, class: sreg_128 }
346346 - { id: 5, class: sreg_128 }
347347 - { id: 6, class: sreg_128 }
389389 name: loop2
390390 tracksRegLiveness: true
391391 registers:
392 - { id: 0, class: sreg_32 }
392 - { id: 0, class: sreg_32_xm0 }
393393 - { id: 1, class: sreg_128 }
394394 - { id: 2, class: sreg_128 }
395395 - { id: 3, class: sreg_128 }
2121 ret void
2222 }
2323
24 ; FIXME: Should be able to avoid copy
2425 ; GCN-LABEL: {{^}}inline_sreg_constraint_m0:
2526 ; GCN: s_mov_b32 m0, -1
26 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0
27 ; GCN: ; use m0
27 ; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
28 ; GCN: ; use [[COPY_M0]]
2829 define void @inline_sreg_constraint_m0() {
2930 %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()
3031 tail call void asm sideeffect "; use $0", "s"(i32 %m0)
2121 ; TODO: m0 should be folded.
2222 ; CHECK-LABEL: {{^}}test_readfirstlane_m0:
2323 ; CHECK: s_mov_b32 m0, -1
24 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
24 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
25 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
2526 ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]]
2627 define void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 {
2728 %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
2121 ; TODO: m0 should be folded.
2222 ; CHECK-LABEL: {{^}}test_readlane_m0_sreg:
2323 ; CHECK: s_mov_b32 m0, -1
24 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
24 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
25 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
2526 ; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}}
2627 define void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
2728 %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
22 declare i32 @llvm.read_register.i32(metadata) #0
33 declare i64 @llvm.read_register.i64(metadata) #0
44
5 ; FIXME: Should be able to eliminate copy
56 ; CHECK-LABEL: {{^}}test_read_m0:
67 ; CHECK: s_mov_b32 m0, -1
7 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
8 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
9 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], [[COPY_M0]]
810 ; CHECK: buffer_store_dword [[COPY]]
911 define void @test_read_m0(i32 addrspace(1)* %out) #0 {
1012 store volatile i32 0, i32 addrspace(3)* undef
88 ; GCN-LABEL: {{^}}spill_m0:
99 ; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0
1010
11 ; GCN: s_cmp_lg_u32
11 ; GCN-DAG: s_cmp_lg_u32
1212
13 ; TOVGPR: s_mov_b32 vcc_hi, m0
14 ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0
13 ; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
14 ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
1515
16 ; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
16 ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
17 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
1718 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
1819 ; TOVMEM: s_waitcnt vmcnt(0)
1920
20 ; TOSMEM: s_mov_b32 vcc_hi, m0
21 ; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
2122 ; TOSMEM: s_mov_b32 m0, s3{{$}}
22 ; TOSMEM-NOT: vcc_hi
23 ; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill
23 ; TOSMEM-NOT: [[M0_COPY]]
24 ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s[84:87], m0 ; 4-byte Folded Spill
2425 ; TOSMEM: s_waitcnt lgkmcnt(0)
2526
2627 ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
2728
2829 ; GCN: [[ENDIF]]:
29 ; TOVGPR: v_readlane_b32 vcc_hi, [[SPILL_VREG]], 0
30 ; TOVGPR: s_mov_b32 m0, vcc_hi
30 ; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0
31 ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
3132
3233 ; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
3334 ; TOVMEM: s_waitcnt vmcnt(0)
34 ; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
35 ; TOVMEM: s_mov_b32 m0, vcc_hi
35 ; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
36 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
3637
3738 ; TOSMEM: s_mov_b32 m0, s3{{$}}
38 ; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload
39 ; TOSMEM-NOT: vcc_hi
40 ; TOSMEM: s_mov_b32 m0, vcc_hi
39 ; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s[84:87], m0 ; 4-byte Folded Reload
40 ; TOSMEM-NOT: [[M0_RESTORE]]
41 ; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
4142
42 ; GCN: s_add_i32 m0, m0, 1
43 ; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
4344 define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
4445 entry:
4546 %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
55 name: phi_visit_order
66 tracksRegLiveness: true
77 registers:
8 - { id: 0, class: sreg_32 }
8 - { id: 0, class: sreg_32_xm0 }
99 - { id: 1, class: sreg_64 }
10 - { id: 2, class: sreg_32 }
10 - { id: 2, class: sreg_32_xm0 }
1111 - { id: 7, class: vgpr_32 }
12 - { id: 8, class: sreg_32 }
12 - { id: 8, class: sreg_32_xm0 }
1313 - { id: 9, class: vgpr_32 }
1414 - { id: 10, class: sreg_64 }
15 - { id: 11, class: sreg_32 }
15 - { id: 11, class: sreg_32_xm0 }
1616
1717 body: |
1818 ; GCN-LABEL: name: phi_visit_order