llvm.org GIT mirror llvm / 47bf9db
Merging r262732: ------------------------------------------------------------------------ r262732 | thomas.stellard | 2016-03-04 10:31:18 -0800 (Fri, 04 Mar 2016) | 12 lines AMDGPU/SI: Add support for spiling SGPRs to scratch buffer Summary: This is necessary for when we run out of VGPRs and can no longer use v_{read,write}_lane for spilling SGPRs. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D17592 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@271722 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 years ago
6 changed file(s) with 138 addition(s) and 35 deletion(s). Raw diff Collapse all Expand all
588588 .addFrameIndex(FrameIndex) // frame_idx
589589 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
590590 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
591 .addImm(0) // offset
591592 .addMemOperand(MMO);
592593 }
593594
670671 .addFrameIndex(FrameIndex) // frame_idx
671672 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
672673 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
674 .addImm(0) // offset
673675 .addMemOperand(MMO);
674676 }
675677
20052005 def _SAVE : InstSI <
20062006 (outs),
20072007 (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
2008 SReg_32:$scratch_offset),
2009 "", []
2010 > {
2008 SReg_32:$scratch_offset, i32imm:$offset),
2009 "", []> {
20112010 let mayStore = 1;
20122011 let mayLoad = 0;
20132012 }
20142013
20152014 def _RESTORE : InstSI <
20162015 (outs vgpr_class:$dst),
2017 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
2018 "", []
2019 > {
2016 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset,
2017 i32imm:$offset),
2018 "", []> {
20202019 let mayStore = 0;
20212020 let mayLoad = 1;
20222021 }
161161 MachineFunction *MF,
162162 unsigned FrameIndex,
163163 unsigned SubIdx) {
164 const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
164 MachineFrameInfo *FrameInfo = MF->getFrameInfo();
165165 const SIRegisterInfo *TRI = static_cast(
166166 MF->getSubtarget().getRegisterInfo());
167167 MachineRegisterInfo &MRI = MF->getRegInfo();
172172 unsigned Lane = (Offset / 4) % 64;
173173
174174 struct SpilledReg Spill;
175 Spill.Lane = Lane;
175176
176177 if (!LaneVGPRs.count(LaneVGPRIdx)) {
177178 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
178179
179 if (LaneVGPR == AMDGPU::NoRegister) {
180 LLVMContext &Ctx = MF->getFunction()->getContext();
181 Ctx.emitError("Ran out of VGPRs for spilling SGPR");
182
183 // When compiling from inside Mesa, the compilation continues.
184 // Select an arbitrary register to avoid triggering assertions
185 // during subsequent passes.
186 LaneVGPR = AMDGPU::VGPR0;
187 }
180 if (LaneVGPR == AMDGPU::NoRegister)
181 // We have no VGPRs left for spilling SGPRs.
182 return Spill;
183
188184
189185 LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
190186
197193 }
198194
199195 Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
200 Spill.Lane = Lane;
201196 return Spill;
202197 }
203198
112112 unsigned VGPR;
113113 int Lane;
114114 SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
115 SpilledReg() : VGPR(0), Lane(-1) { }
115 SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
116116 bool hasLane() { return Lane != -1;}
117 bool hasReg() { return VGPR != AMDGPU::NoRegister;}
117118 };
118119
119120 // SIMachineFunctionInfo definition
306306 case AMDGPU::SI_SPILL_S64_SAVE:
307307 case AMDGPU::SI_SPILL_S32_SAVE: {
308308 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
309 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
309310
310311 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
311312 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
313314 struct SIMachineFunctionInfo::SpilledReg Spill =
314315 MFI->getSpilledReg(MF, Index, i);
315316
316 BuildMI(*MBB, MI, DL,
317 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
318 Spill.VGPR)
319 .addReg(SubReg)
320 .addImm(Spill.Lane);
321
322 // FIXME: Since this spills to another register instead of an actual
323 // frame index, we should delete the frame index when all references to
324 // it are fixed.
317 if (Spill.hasReg()) {
318 BuildMI(*MBB, MI, DL,
319 TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
320 Spill.VGPR)
321 .addReg(SubReg)
322 .addImm(Spill.Lane);
323
324 // FIXME: Since this spills to another register instead of an actual
325 // frame index, we should delete the frame index when all references to
326 // it are fixed.
327 } else {
328 // Spill SGPR to a frame index.
329 // FIXME we should use S_STORE_DWORD here for VI.
330 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
331 .addReg(SubReg);
332
333 unsigned Size = FrameInfo->getObjectSize(Index);
334 unsigned Align = FrameInfo->getObjectAlignment(Index);
335 MachinePointerInfo PtrInfo
336 = MachinePointerInfo::getFixedStack(*MF, Index);
337 MachineMemOperand *MMO
338 = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
339 Size, Align);
340 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
341 .addReg(TmpReg) // src
342 .addFrameIndex(Index) // frame_idx
343 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
344 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
345 .addImm(i * 4) // offset
346 .addMemOperand(MMO);
347 }
325348 }
326349 MI->eraseFromParent();
327350 break;
334357 case AMDGPU::SI_SPILL_S64_RESTORE:
335358 case AMDGPU::SI_SPILL_S32_RESTORE: {
336359 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
360 unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
337361
338362 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
339363 unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
341365 struct SIMachineFunctionInfo::SpilledReg Spill =
342366 MFI->getSpilledReg(MF, Index, i);
343367
344 BuildMI(*MBB, MI, DL,
345 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
346 SubReg)
347 .addReg(Spill.VGPR)
348 .addImm(Spill.Lane)
349 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
368 if (Spill.hasReg()) {
369 BuildMI(*MBB, MI, DL,
370 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
371 SubReg)
372 .addReg(Spill.VGPR)
373 .addImm(Spill.Lane)
374 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
375 } else {
376 // Restore SGPR from a stack slot.
377 // FIXME: We should use S_LOAD_DWORD here for VI.
378
379 unsigned Align = FrameInfo->getObjectAlignment(Index);
380 unsigned Size = FrameInfo->getObjectSize(Index);
381
382 MachinePointerInfo PtrInfo
383 = MachinePointerInfo::getFixedStack(*MF, Index);
384
385 MachineMemOperand *MMO = MF->getMachineMemOperand(
386 PtrInfo, MachineMemOperand::MOLoad, Size, Align);
387
388 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
389 .addFrameIndex(Index) // frame_idx
390 .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
391 .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
392 .addImm(i * 4) // offset
393 .addMemOperand(MMO);
394 BuildMI(*MBB, MI, DL,
395 TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
396 .addReg(TmpReg)
397 .addImm(0)
398 .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
399 }
350400 }
351401
352402 // TODO: only do this when it is needed
380430 TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
381431 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
382432 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
383 FrameInfo->getObjectOffset(Index));
433 FrameInfo->getObjectOffset(Index) +
434 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
384435 MI->eraseFromParent();
385436 break;
386437 case AMDGPU::SI_SPILL_V32_RESTORE:
393444 TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
394445 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
395446 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
396 FrameInfo->getObjectOffset(Index));
447 FrameInfo->getObjectOffset(Index) +
448 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
397449 MI->eraseFromParent();
398450 break;
399451 }
0 ; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck %s
1
2 ; Make sure this doesn't crash.
3 ; CHECK: {{^}}test:
4 ; CHECK: s_endpgm
5 define void @test(i32 addrspace(1)* %out, i32 %in) {
6 call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
7 call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
8 call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" ()
9 call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" ()
10 call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" ()
11 call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" ()
12 call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" ()
13 call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" ()
14 call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" ()
15 call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" ()
16 call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" ()
17 call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" ()
18 call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" ()
19 call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" ()
20 call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" ()
21 call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" ()
22 call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" ()
23 call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" ()
24 call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" ()
25 call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" ()
26 call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" ()
27 call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" ()
28 call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" ()
29 call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" ()
30 call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" ()
31 call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" ()
32 call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" ()
33 call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" ()
34 call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" ()
35 call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" ()
36 call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" ()
37 call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" ()
38 call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" ()
39 call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" ()
40 call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" ()
41 call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" ()
42 call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" ()
43 call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" ()
44 call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" ()
45 call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" ()
46 call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" ()
47 call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" ()
48 call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" ()
49 call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" ()
50
51 store i32 %in, i32 addrspace(1)* %out
52 ret void
53 }