llvm.org GIT mirror llvm / f957779
Merging r226586: ------------------------------------------------------------------------ r226586 | thomas.stellard | 2015-01-20 12:49:47 -0500 (Tue, 20 Jan 2015) | 6 lines R600/SI: Use external symbols for scratch buffer We were passing the scratch buffer address to the shaders via user sgprs, but now we use external symbols and have the driver patch the shader using reloc information. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@226725 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 4 years ago
9 changed file(s) with 95 addition(s) and 85 deletion(s). Raw diff Collapse all Expand all
7676
7777 namespace AMDGPU {
7878 enum TargetIndex {
79 TI_CONSTDATA_START
79 TI_CONSTDATA_START,
80 TI_SCRATCH_RSRC_DWORD0,
81 TI_SCRATCH_RSRC_DWORD1,
82 TI_SCRATCH_RSRC_DWORD2,
83 TI_SCRATCH_RSRC_DWORD3
8084 };
8185 }
8286
961961 const SITargetLowering& Lowering =
962962 *static_cast(getTargetLowering());
963963
964 unsigned ScratchPtrReg =
965 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
966964 unsigned ScratchOffsetReg =
967965 TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
968966 Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
969967 ScratchOffsetReg, MVT::i32);
970
971 SDValue ScratchPtr =
972 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
973 MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
968 SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
969 SDValue ScratchRsrcDword0 =
970 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
971
972 SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
973 SDValue ScratchRsrcDword1 =
974 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
975
976 const SDValue RsrcOps[] = {
977 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
978 ScratchRsrcDword0,
979 CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
980 ScratchRsrcDword1,
981 CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
982 };
983 SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
984 MVT::v2i32, RsrcOps), 0);
974985 Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
975986 SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
976987 MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
7979 MCOp = MCOperand::CreateExpr(Expr);
8080 break;
8181 }
82 case MachineOperand::MO_ExternalSymbol: {
83 MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
84 const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
85 MCOp = MCOperand::CreateExpr(Expr);
86 break;
87 }
8288 }
8389 OutMI.addOperand(MCOp);
8490 }
162162 #define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
163163 #define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
164164
165
165166 #endif
481481 .addFrameIndex(FrameIndex)
482482 // Place-holder registers, these will be filled in by
483483 // SIPrepareScratchRegs.
484 .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
484 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
485485 .addReg(AMDGPU::SGPR0, RegState::Undef);
486486 } else {
487487 LLVMContext &Ctx = MF->getFunction()->getContext();
527527 .addFrameIndex(FrameIndex)
528528 // Place-holder registers, these will be filled in by
529529 // SIPrepareScratchRegs.
530 .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
530 .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
531531 .addReg(AMDGPU::SGPR0, RegState::Undef);
532532
533533 } else {
19481948 let UseNamedOperandTable = 1 in {
19491949 def _SAVE : InstSI <
19501950 (outs),
1951 (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
1951 (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
19521952 SReg_32:$scratch_offset),
19531953 "", []
19541954 >;
19551955
19561956 def _RESTORE : InstSI <
19571957 (outs sgpr_class:$dst),
1958 (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
1958 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
19591959 "", []
19601960 >;
19611961 } // End UseNamedOperandTable = 1
19711971 let UseNamedOperandTable = 1 in {
19721972 def _SAVE : InstSI <
19731973 (outs),
1974 (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
1974 (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
19751975 SReg_32:$scratch_offset),
19761976 "", []
19771977 >;
19781978
19791979 def _RESTORE : InstSI <
19801980 (outs vgpr_class:$dst),
1981 (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
1981 (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
19821982 "", []
19831983 >;
19841984 } // End UseNamedOperandTable = 1
2828 #include "llvm/IR/Function.h"
2929 #include "llvm/IR/LLVMContext.h"
3030
31 #include "llvm/Support/Debug.h"
3132 using namespace llvm;
3233
3334 namespace {
8384 if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
8485 Entry->addLiveIn(ScratchOffsetPreloadReg);
8586
86 // Load the scratch pointer
87 unsigned ScratchPtrReg =
88 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
89 int ScratchPtrFI = -1;
90
91 if (ScratchPtrReg != AMDGPU::NoRegister) {
92 // Found an SGPR to use.
93 MRI.setPhysRegUsed(ScratchPtrReg);
94 BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
95 .addReg(ScratchPtrPreloadReg);
96 } else {
97 // No SGPR is available, we must spill.
98 ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
99 BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
100 .addReg(ScratchPtrPreloadReg)
101 .addFrameIndex(ScratchPtrFI);
102 }
103
10487 // Load the scratch offset.
10588 unsigned ScratchOffsetReg =
10689 TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
107 int ScratchOffsetFI = ~0;
90 int ScratchOffsetFI = -1;
10891
10992 if (ScratchOffsetReg != AMDGPU::NoRegister) {
11093 // Found an SGPR to use
124107 // add them to all the SI_SPILL_V* instructions.
125108
126109 RegScavenger RS;
127 bool UseRegScavenger =
128 (ScratchPtrReg == AMDGPU::NoRegister ||
129 ScratchOffsetReg == AMDGPU::NoRegister);
110 unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
111 RS.addScavengingFrameIndex(ScratchRsrcFI);
112
130113 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
131114 BI != BE; ++BI) {
132115
133116 MachineBasicBlock &MBB = *BI;
134 if (UseRegScavenger)
135 RS.enterBasicBlock(&MBB);
117 // Add the scratch offset reg as a live-in so that the register scavenger
118 // doesn't re-use it.
119 if (!MBB.isLiveIn(ScratchOffsetReg))
120 MBB.addLiveIn(ScratchOffsetReg);
121 RS.enterBasicBlock(&MBB);
136122
137123 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
138124 I != E; ++I) {
139125 MachineInstr &MI = *I;
126 RS.forward(I);
140127 DebugLoc DL = MI.getDebugLoc();
141128 switch(MI.getOpcode()) {
142 default: break;;
129 default: break;
143130 case AMDGPU::SI_SPILL_V512_SAVE:
144131 case AMDGPU::SI_SPILL_V256_SAVE:
145132 case AMDGPU::SI_SPILL_V128_SAVE:
152139 case AMDGPU::SI_SPILL_V256_RESTORE:
153140 case AMDGPU::SI_SPILL_V512_RESTORE:
154141
155 // Scratch Pointer
156 if (ScratchPtrReg == AMDGPU::NoRegister) {
157 ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
158 BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
159 ScratchPtrReg)
160 .addFrameIndex(ScratchPtrFI)
161 .addReg(AMDGPU::NoRegister)
162 .addReg(AMDGPU::NoRegister);
163 } else if (!MBB.isLiveIn(ScratchPtrReg)) {
164 MBB.addLiveIn(ScratchPtrReg);
165 }
166
142 // Scratch resource
143 unsigned ScratchRsrcReg =
144 RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
145
146 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
147 0xffffffff; // Size
148
149 unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
150 unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
151 unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
152 unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
153
154 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
155 .addExternalSymbol("SCRATCH_RSRC_DWORD0")
156 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
157
158 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
159 .addExternalSymbol("SCRATCH_RSRC_DWORD1")
160 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
161
162 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
163 .addImm(Rsrc & 0xffffffff)
164 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
165
166 BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
167 .addImm(Rsrc >> 32)
168 .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
169
170 // Scratch Offset
167171 if (ScratchOffsetReg == AMDGPU::NoRegister) {
168172 ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
169173 BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
175179 MBB.addLiveIn(ScratchOffsetReg);
176180 }
177181
178 if (ScratchPtrReg == AMDGPU::NoRegister ||
182 if (ScratchRsrcReg == AMDGPU::NoRegister ||
179183 ScratchOffsetReg == AMDGPU::NoRegister) {
180184 LLVMContext &Ctx = MF.getFunction()->getContext();
181185 Ctx.emitError("ran out of SGPRs for spilling VGPRs");
182 ScratchPtrReg = AMDGPU::SGPR0;
186 ScratchRsrcReg = AMDGPU::SGPR0;
183187 ScratchOffsetReg = AMDGPU::SGPR0;
184188 }
185 MI.getOperand(2).setReg(ScratchPtrReg);
189 MI.getOperand(2).setReg(ScratchRsrcReg);
190 MI.getOperand(2).setIsKill(true);
191 MI.getOperand(2).setIsUndef(false);
186192 MI.getOperand(3).setReg(ScratchOffsetReg);
187
193 MI.getOperand(3).setIsUndef(false);
194 MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
195 MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
196 MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
197 MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
198
199 MI.dump();
188200 break;
189201 }
190 if (UseRegScavenger)
191 RS.forward();
192202 }
193203 }
194204 return true;
9797 void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
9898 unsigned LoadStoreOp,
9999 unsigned Value,
100 unsigned ScratchPtr,
100 unsigned ScratchRsrcReg,
101101 unsigned ScratchOffset,
102102 int64_t Offset,
103103 RegScavenger *RS) const {
112112 bool RanOutOfSGPRs = false;
113113 unsigned SOffset = ScratchOffset;
114114
115 unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
116 if (RsrcReg == AMDGPU::NoRegister) {
117 RanOutOfSGPRs = true;
118 RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
119 }
120
121115 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
122116 unsigned Size = NumSubRegs * 4;
123117
124 uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
125 0xffffffff; // Size
126
127 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
128 getSubReg(RsrcReg, AMDGPU::sub0_sub1))
129 .addReg(ScratchPtr)
130 .addReg(RsrcReg, RegState::ImplicitDefine);
131
132 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
133 getSubReg(RsrcReg, AMDGPU::sub2))
134 .addImm(Rsrc & 0xffffffff)
135 .addReg(RsrcReg, RegState::ImplicitDefine);
136
137 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
138 getSubReg(RsrcReg, AMDGPU::sub3))
139 .addImm(Rsrc >> 32)
140 .addReg(RsrcReg, RegState::ImplicitDefine);
141
142118 if (!isUInt<12>(Offset + Size)) {
119 dbgs() << "Offset scavenge\n";
143120 SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
144121 if (SOffset == AMDGPU::NoRegister) {
145122 RanOutOfSGPRs = true;
162139
163140 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
164141 .addReg(SubReg, getDefRegState(IsLoad))
165 .addReg(RsrcReg, getKillRegState(IsKill))
142 .addReg(ScratchRsrcReg, getKillRegState(IsKill))
166143 .addImm(Offset)
167144 .addReg(SOffset, getKillRegState(IsKill))
168145 .addImm(0) // glc
235212 }
236213
237214 if (isM0) {
215 dbgs() << "Scavenge M0\n";
238216 SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
239217 }
240218
261239 case AMDGPU::SI_SPILL_V32_SAVE:
262240 buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
263241 TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
264 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
242 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
265243 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
266244 FrameInfo->getObjectOffset(Index), RS);
267245 MI->eraseFromParent();
273251 case AMDGPU::SI_SPILL_V512_RESTORE: {
274252 buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
275253 TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
276 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
254 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
277255 TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
278256 FrameInfo->getObjectOffset(Index), RS);
279257 MI->eraseFromParent();
110110 private:
111111 void buildScratchLoadStore(MachineBasicBlock::iterator MI,
112112 unsigned LoadStoreOp, unsigned Value,
113 unsigned ScratchPtr, unsigned ScratchOffset,
113 unsigned ScratchRsrcReg, unsigned ScratchOffset,
114114 int64_t Offset, RegScavenger *RS) const;
115115 };
116116