llvm.org GIT mirror llvm / 2acdc08
Revert "AMDGPU: Make m0 unallocatable" This reverts commit 124ad83dae04514f943902446520c859adee0e96. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287932 91177308-0d34-0410-b5e6-96231b3b80d8 Marek Olsak 3 years ago
13 changed file(s) with 60 addition(s) and 73 deletion(s). Raw diff Collapse all Expand all
252252 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
253253 switch (NumVectorElts) {
254254 case 1:
255 return AMDGPU::SReg_32_XM0RegClassID;
255 return AMDGPU::SReg_32RegClassID;
256256 case 2:
257257 return AMDGPU::SReg_64RegClassID;
258258 case 4:
5858 addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
5959 addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
6060
61 addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass);
61 addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
6262 addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
6363
6464 addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
7878 addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
7979
8080 if (Subtarget->has16BitInsts()) {
81 addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass);
82 addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
81 addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
82 addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
8383 }
8484
8585 computeRegisterProperties(STI.getRegisterInfo());
940940 // Start adding system SGPRs.
941941 if (Info->hasWorkGroupIDX()) {
942942 unsigned Reg = Info->addWorkGroupIDX();
943 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
943 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
944944 CCInfo.AllocateReg(Reg);
945945 }
946946
947947 if (Info->hasWorkGroupIDY()) {
948948 unsigned Reg = Info->addWorkGroupIDY();
949 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
949 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
950950 CCInfo.AllocateReg(Reg);
951951 }
952952
953953 if (Info->hasWorkGroupIDZ()) {
954954 unsigned Reg = Info->addWorkGroupIDZ();
955 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
955 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
956956 CCInfo.AllocateReg(Reg);
957957 }
958958
959959 if (Info->hasWorkGroupInfo()) {
960960 unsigned Reg = Info->addWorkGroupInfo();
961 MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
961 MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
962962 CCInfo.AllocateReg(Reg);
963963 }
964964
24132413 SI::KernelInputOffsets::LOCAL_SIZE_Z);
24142414 case Intrinsic::amdgcn_workgroup_id_x:
24152415 case Intrinsic::r600_read_tgid_x:
2416 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
2416 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
24172417 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
24182418 case Intrinsic::amdgcn_workgroup_id_y:
24192419 case Intrinsic::r600_read_tgid_y:
2420 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
2420 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
24212421 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
24222422 case Intrinsic::amdgcn_workgroup_id_z:
24232423 case Intrinsic::r600_read_tgid_z:
2424 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
2424 return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
24252425 TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
24262426 case Intrinsic::amdgcn_workitem_id_x:
24272427 case Intrinsic::r600_read_tidig_x:
41814181 default:
41824182 return std::make_pair(0U, nullptr);
41834183 case 32:
4184 return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
4184 return std::make_pair(0U, &AMDGPU::SReg_32RegClass);
41854185 case 64:
41864186 return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
41874187 case 128:
363363 return;
364364 }
365365
366 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
367 RC == &AMDGPU::SReg_32RegClass) {
366 if (RC == &AMDGPU::SReg_32RegClass) {
368367 if (SrcReg == AMDGPU::SCC) {
369368 BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
370369 .addImm(-1)
242242
243243 MachineRegisterInfo &MRI = MF->getRegInfo();
244244 unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
245 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
245 unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
246246
247247 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
248248 .addImm(Offset);
116116
117117 def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
118118 let CopyCost = -1;
119 let isAllocatable = 0;
120 }
121
122 def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
123 let CopyCost = 1;
124119 let isAllocatable = 0;
125120 }
126121
263258
264259 // Register class for all scalar registers (SGPRs + Special Registers)
265260 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
266 (add SReg_32_XM0, M0_CLASS)> {
261 (add SReg_32_XM0, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)> {
267262 let AllocationPriority = 1;
268 let isAllocatable = 0;
269263 }
270264
271265 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
436436 MachineBasicBlock::iterator
437437 SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
438438 MachineBasicBlock::iterator Before) {
439 unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
439 unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
440440
441441 MachineInstr *Save =
442442 BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
2626 # CHECK: S_NOP 0, implicit undef %5.sub0
2727 name: test0
2828 registers:
29 - { id: 0, class: sreg_32_xm0 }
30 - { id: 1, class: sreg_32_xm0 }
31 - { id: 2, class: sreg_32_xm0 }
29 - { id: 0, class: sreg_32 }
30 - { id: 1, class: sreg_32 }
31 - { id: 2, class: sreg_32 }
3232 - { id: 3, class: sreg_128 }
3333 - { id: 4, class: sreg_64 }
3434 - { id: 5, class: sreg_64 }
8686 - { id: 0, class: sreg_128 }
8787 - { id: 1, class: sreg_128 }
8888 - { id: 2, class: sreg_64 }
89 - { id: 3, class: sreg_32_xm0 }
89 - { id: 3, class: sreg_32 }
9090 - { id: 4, class: sreg_128 }
9191 - { id: 5, class: sreg_64 }
92 - { id: 6, class: sreg_32_xm0 }
93 - { id: 7, class: sreg_32_xm0 }
92 - { id: 6, class: sreg_32 }
93 - { id: 7, class: sreg_32 }
9494 - { id: 8, class: sreg_64 }
95 - { id: 9, class: sreg_32_xm0 }
95 - { id: 9, class: sreg_32 }
9696 - { id: 10, class: sreg_128 }
9797 body: |
9898 bb.0:
161161
162162 name: test2
163163 registers:
164 - { id: 0, class: sreg_32_xm0 }
165 - { id: 1, class: sreg_32_xm0 }
164 - { id: 0, class: sreg_32 }
165 - { id: 1, class: sreg_32 }
166166 - { id: 2, class: sreg_64 }
167167 - { id: 3, class: sreg_128 }
168 - { id: 4, class: sreg_32_xm0 }
169 - { id: 5, class: sreg_32_xm0 }
168 - { id: 4, class: sreg_32 }
169 - { id: 5, class: sreg_32 }
170170 - { id: 6, class: sreg_64 }
171171 - { id: 7, class: sreg_128 }
172172 - { id: 8, class: sreg_64 }
259259 name: test5
260260 tracksRegLiveness: true
261261 registers:
262 - { id: 0, class: sreg_32_xm0 }
262 - { id: 0, class: sreg_32 }
263263 - { id: 1, class: sreg_64 }
264264 body: |
265265 bb.0:
285285 name: loop0
286286 tracksRegLiveness: true
287287 registers:
288 - { id: 0, class: sreg_32_xm0 }
289 - { id: 1, class: sreg_32_xm0 }
290 - { id: 2, class: sreg_32_xm0 }
288 - { id: 0, class: sreg_32 }
289 - { id: 1, class: sreg_32 }
290 - { id: 2, class: sreg_32 }
291291 - { id: 3, class: sreg_128 }
292292 - { id: 4, class: sreg_128 }
293293 - { id: 5, class: sreg_128 }
338338 name: loop1
339339 tracksRegLiveness: true
340340 registers:
341 - { id: 0, class: sreg_32_xm0 }
342 - { id: 1, class: sreg_32_xm0 }
343 - { id: 2, class: sreg_32_xm0 }
344 - { id: 3, class: sreg_32_xm0 }
341 - { id: 0, class: sreg_32 }
342 - { id: 1, class: sreg_32 }
343 - { id: 2, class: sreg_32 }
344 - { id: 3, class: sreg_32 }
345345 - { id: 4, class: sreg_128 }
346346 - { id: 5, class: sreg_128 }
347347 - { id: 6, class: sreg_128 }
389389 name: loop2
390390 tracksRegLiveness: true
391391 registers:
392 - { id: 0, class: sreg_32_xm0 }
392 - { id: 0, class: sreg_32 }
393393 - { id: 1, class: sreg_128 }
394394 - { id: 2, class: sreg_128 }
395395 - { id: 3, class: sreg_128 }
2121 ret void
2222 }
2323
24 ; FIXME: Should be able to avoid copy
2524 ; GCN-LABEL: {{^}}inline_sreg_constraint_m0:
2625 ; GCN: s_mov_b32 m0, -1
27 ; GCN: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
28 ; GCN: ; use [[COPY_M0]]
26 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0
27 ; GCN: ; use m0
2928 define void @inline_sreg_constraint_m0() {
3029 %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"()
3130 tail call void asm sideeffect "; use $0", "s"(i32 %m0)
2121 ; TODO: m0 should be folded.
2222 ; CHECK-LABEL: {{^}}test_readfirstlane_m0:
2323 ; CHECK: s_mov_b32 m0, -1
24 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
25 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
24 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
2625 ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]]
2726 define void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 {
2827 %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
2121 ; TODO: m0 should be folded.
2222 ; CHECK-LABEL: {{^}}test_readlane_m0_sreg:
2323 ; CHECK: s_mov_b32 m0, -1
24 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
25 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
24 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0
2625 ; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}}
2726 define void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
2827 %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
22 declare i32 @llvm.read_register.i32(metadata) #0
33 declare i64 @llvm.read_register.i64(metadata) #0
44
5 ; FIXME: Should be able to eliminate copy
65 ; CHECK-LABEL: {{^}}test_read_m0:
76 ; CHECK: s_mov_b32 m0, -1
8 ; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
9 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], [[COPY_M0]]
7 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
108 ; CHECK: buffer_store_dword [[COPY]]
119 define void @test_read_m0(i32 addrspace(1)* %out) #0 {
1210 store volatile i32 0, i32 addrspace(3)* undef
88 ; GCN-LABEL: {{^}}spill_m0:
99 ; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0
1010
11 ; GCN-DAG: s_cmp_lg_u32
11 ; GCN: s_cmp_lg_u32
1212
13 ; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
14 ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0
13 ; TOVGPR: s_mov_b32 vcc_hi, m0
14 ; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], vcc_hi, 0
1515
16 ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
17 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
16 ; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
1817 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
1918 ; TOVMEM: s_waitcnt vmcnt(0)
2019
21 ; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
20 ; TOSMEM: s_mov_b32 vcc_hi, m0
2221 ; TOSMEM: s_mov_b32 m0, s3{{$}}
23 ; TOSMEM-NOT: [[M0_COPY]]
24 ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s[84:87], m0 ; 4-byte Folded Spill
22 ; TOSMEM-NOT: vcc_hi
23 ; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill
2524 ; TOSMEM: s_waitcnt lgkmcnt(0)
2625
2726 ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
2827
2928 ; GCN: [[ENDIF]]:
30 ; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 0
31 ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]]
29 ; TOVGPR: v_readlane_b32 vcc_hi, [[SPILL_VREG]], 0
30 ; TOVGPR: s_mov_b32 m0, vcc_hi
3231
3332 ; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
3433 ; TOVMEM: s_waitcnt vmcnt(0)
35 ; TOVMEM: v_readfirstlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]]
36 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]]
34 ; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
35 ; TOVMEM: s_mov_b32 m0, vcc_hi
3736
3837 ; TOSMEM: s_mov_b32 m0, s3{{$}}
39 ; TOSMEM: s_buffer_load_dword [[M0_RESTORE:s[0-9]+]], s[84:87], m0 ; 4-byte Folded Reload
40 ; TOSMEM-NOT: [[M0_RESTORE]]
41 ; TOSMEM: s_mov_b32 m0, [[M0_RESTORE]]
38 ; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload
39 ; TOSMEM-NOT: vcc_hi
40 ; TOSMEM: s_mov_b32 m0, vcc_hi
4241
43 ; GCN: s_add_i32 s{{[0-9]+}}, m0, 1
42 ; GCN: s_add_i32 m0, m0, 1
4443 define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
4544 entry:
4645 %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={M0}"() #0
55 name: phi_visit_order
66 tracksRegLiveness: true
77 registers:
8 - { id: 0, class: sreg_32_xm0 }
8 - { id: 0, class: sreg_32 }
99 - { id: 1, class: sreg_64 }
10 - { id: 2, class: sreg_32_xm0 }
10 - { id: 2, class: sreg_32 }
1111 - { id: 7, class: vgpr_32 }
12 - { id: 8, class: sreg_32_xm0 }
12 - { id: 8, class: sreg_32 }
1313 - { id: 9, class: vgpr_32 }
1414 - { id: 10, class: sreg_64 }
15 - { id: 11, class: sreg_32_xm0 }
15 - { id: 11, class: sreg_32 }
1616
1717 body: |
1818 ; GCN-LABEL: name: phi_visit_order