llvm.org GIT mirror llvm / 9f7818d
R600: rework handling of the constants Remove Cxxx registers, add new special register - "ALU_CONST" and new operand for each alu src - "sel". ALU_CONST is used to designate that the new operand contains the value to override src.sel, src.kc_bank, src.chan for constants in the driver. Patch by: Vadim Girlin Vincent Lejeune: - Use pointers for constants - Fold CONST_ADDRESS when possible Tom Stellard: - Give CONSTANT_BUFFER_0 its own address space - Use integer types for constant loads Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173222 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 7 years ago
16 changed file(s) with 484 addition(s) and 105 deletion(s). Raw diff Collapse all Expand all
2222 // R600 Passes
2323 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
2424 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
25 FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
2526
2627 // SI Passes
2728 FunctionPass *createSIAnnotateControlFlowPass();
135135 addPass(createAMDGPUCFGPreparationPass(*TM));
136136 addPass(createAMDGPUCFGStructurizerPass(*TM));
137137 addPass(createR600ExpandSpecialInstrsPass(*TM));
138 addPass(createR600LowerConstCopy(*TM));
138139 addPass(&FinalizeMachineBundlesID);
139140 } else {
140141 addPass(createSILowerLiteralConstantsPass(*TM));
8989 enum AddressSpaces {
9090 PRIVATE_ADDRESS = 0, ///< Address space for private memory.
9191 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
92 CONSTANT_ADDRESS = 2, ///< Address space for constant memory.
92 CONSTANT_ADDRESS = 2, ///< Address space for constant memory
9393 LOCAL_ADDRESS = 3, ///< Address space for local memory.
9494 REGION_ADDRESS = 4, ///< Address space for region memory.
9595 ADDRESS_NONE = 5, ///< Address space for unknown memory.
9696 PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
9797 PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
9898 USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
99 LAST_ADDRESS = 9
99 CONSTANT_BUFFER_0 = 9,
100 CONSTANT_BUFFER_1 = 10,
101 CONSTANT_BUFFER_2 = 11,
102 CONSTANT_BUFFER_3 = 12,
103 CONSTANT_BUFFER_4 = 13,
104 CONSTANT_BUFFER_5 = 14,
105 CONSTANT_BUFFER_6 = 15,
106 CONSTANT_BUFFER_7 = 16,
107 CONSTANT_BUFFER_8 = 17,
108 CONSTANT_BUFFER_9 = 18,
109 CONSTANT_BUFFER_10 = 19,
110 CONSTANT_BUFFER_11 = 20,
111 CONSTANT_BUFFER_12 = 21,
112 CONSTANT_BUFFER_13 = 22,
113 CONSTANT_BUFFER_14 = 23,
114 CONSTANT_BUFFER_15 = 24,
115 LAST_ADDRESS = 25
100116 };
101117
102118 } // namespace AMDGPUAS
1919 #include "llvm/CodeGen/PseudoSourceValue.h"
2020 #include "llvm/CodeGen/SelectionDAGISel.h"
2121 #include "llvm/Support/Compiler.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
2223 #include
2324 #include
2425
4445
4546 private:
4647 inline SDValue getSmallIPtrImm(unsigned Imm);
48 bool FoldOperands(unsigned, const R600InstrInfo *, std::vector &);
4749
4850 // Complex pattern selectors
4951 bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
6668 static bool isLocalLoad(const LoadSDNode *N);
6769 static bool isRegionLoad(const LoadSDNode *N);
6870
71 bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
72 bool SelectGlobalValueVariableOffset(SDValue Addr,
73 SDValue &BaseReg, SDValue& Offset);
6974 bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
7075 bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
7176 bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
258263 break;
259264 }
260265 }
261 return SelectCode(N);
266 SDNode *Result = SelectCode(N);
267
268 // Fold operands of selected node
269
270 const AMDGPUSubtarget &ST = TM.getSubtarget();
271 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
272 const R600InstrInfo *TII =
273 static_cast(TM.getInstrInfo());
274 if (Result && TII->isALUInstr(Result->getMachineOpcode())) {
275 bool IsModified = false;
276 do {
277 std::vector Ops;
278 for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
279 I != E; ++I)
280 Ops.push_back(*I);
281 IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
282 if (IsModified) {
283 Result = CurDAG->MorphNodeTo(Result, Result->getOpcode(),
284 Result->getVTList(), Ops.data(), Ops.size());
285 }
286 } while (IsModified);
287 }
288 }
289
290 return Result;
291 }
292
293 bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
294 const R600InstrInfo *TII, std::vector &Ops) {
295 int OperandIdx[] = {
296 TII->getOperandIdx(Opcode, R600Operands::SRC0),
297 TII->getOperandIdx(Opcode, R600Operands::SRC1),
298 TII->getOperandIdx(Opcode, R600Operands::SRC2)
299 };
300 int SelIdx[] = {
301 TII->getOperandIdx(Opcode, R600Operands::SRC0_SEL),
302 TII->getOperandIdx(Opcode, R600Operands::SRC1_SEL),
303 TII->getOperandIdx(Opcode, R600Operands::SRC2_SEL)
304 };
305 for (unsigned i = 0; i < 3; i++) {
306 if (OperandIdx[i] < 0)
307 return false;
308 SDValue Operand = Ops[OperandIdx[i] - 1];
309 switch (Operand.getOpcode()) {
310 case AMDGPUISD::CONST_ADDRESS: {
311 SDValue CstOffset;
312 if (!Operand.getValueType().isVector() &&
313 SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
314 Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
315 Ops[SelIdx[i] - 1] = CstOffset;
316 return true;
317 }
318 }
319 break;
320 default:
321 break;
322 }
323 }
324 return false;
262325 }
263326
264327 bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
405468
406469 ///==== AMDGPU Functions ====///
407470
471 bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
472 SDValue& IntPtr) {
473 if (ConstantSDNode *Cst = dyn_cast(Addr)) {
474 IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
475 return true;
476 }
477 return false;
478 }
479
480 bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
481 SDValue& BaseReg, SDValue &Offset) {
482 if (!dyn_cast(Addr)) {
483 BaseReg = Addr;
484 Offset = CurDAG->getIntPtrConstant(0, true);
485 return true;
486 }
487 return false;
488 }
489
408490 bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
409491 SDValue& Offset) {
410492 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3535 R600ExpandSpecialInstrs.cpp
3636 R600InstrInfo.cpp
3737 R600ISelLowering.cpp
38 R600LowerConstCopy.cpp
3839 R600MachineFunctionInfo.cpp
3940 R600RegisterInfo.cpp
4041 SIAnnotateControlFlow.cpp
128128 }
129129 }
130130
131 void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
132 raw_ostream &O) {
133 const char * chans = "XYZW";
134 int sel = MI->getOperand(OpNo).getImm();
135
136 int chan = sel & 3;
137 sel >>= 2;
138
139 if (sel >= 512) {
140 sel -= 512;
141 int cb = sel >> 12;
142 sel &= 4095;
143 O << cb << "[" << sel << "]";
144 } else if (sel >= 448) {
145 sel -= 448;
146 O << sel;
147 } else if (sel >= 0){
148 O << sel;
149 }
150
151 if (sel >= 0)
152 O << "." << chans[chan];
153 }
154
131155 #include "AMDGPUGenAsmWriter.inc"
4444 void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4545 void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4646 void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
47 void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
4748 };
4849
4950 } // End namespace llvm
6262 void EmitALUInstr(const MCInst &MI, SmallVectorImpl &Fixups,
6363 raw_ostream &OS) const;
6464 void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
65 void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
66 raw_ostream &OS) const;
65 void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
66 raw_ostream &OS) const;
6767 void EmitDst(const MCInst &MI, raw_ostream &OS) const;
6868 void EmitTexInstr(const MCInst &MI, SmallVectorImpl &Fixups,
6969 raw_ostream &OS) const;
162162 case AMDGPU::VTX_READ_PARAM_32_eg:
163163 case AMDGPU::VTX_READ_GLOBAL_8_eg:
164164 case AMDGPU::VTX_READ_GLOBAL_32_eg:
165 case AMDGPU::VTX_READ_GLOBAL_128_eg: {
165 case AMDGPU::VTX_READ_GLOBAL_128_eg:
166 case AMDGPU::TEX_VTX_CONSTBUF: {
166167 uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
167168 uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
168169
192193 SmallVectorImpl &Fixups,
193194 raw_ostream &OS) const {
194195 const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
195 unsigned NumOperands = MI.getNumOperands();
196196
197197 // Emit instruction type
198198 EmitByte(INSTR_ALU, OS);
208208 InstWord01 |= ISAOpCode << 1;
209209 }
210210
211 unsigned SrcIdx = 0;
212 for (unsigned int OpIdx = 1; OpIdx < NumOperands; ++OpIdx) {
213 if (MI.getOperand(OpIdx).isImm() || MI.getOperand(OpIdx).isFPImm() ||
214 OpIdx == (unsigned)MCDesc.findFirstPredOperandIdx()) {
215 continue;
216 }
217 EmitSrcISA(MI, OpIdx, InstWord01, OS);
218 SrcIdx++;
219 }
220
221 // Emit zeros for unused sources
222 for ( ; SrcIdx < 3; SrcIdx++) {
223 EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
211 unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
212 MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
213
214 EmitByte(SrcNum, OS);
215
216 const unsigned SrcOps[3][2] = {
217 {R600Operands::SRC0, R600Operands::SRC0_SEL},
218 {R600Operands::SRC1, R600Operands::SRC1_SEL},
219 {R600Operands::SRC2, R600Operands::SRC2_SEL}
220 };
221
222 for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
223 unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
224 unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
225 EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
224226 }
225227
226228 Emit(InstWord01, OS);
291293
292294 }
293295
294 void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
295 uint64_t &Value, raw_ostream &OS) const {
296 const MCOperand &MO = MI.getOperand(OpIdx);
296 void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
297 unsigned SelOpIdx, raw_ostream &OS) const {
298 const MCOperand &RegMO = MI.getOperand(RegOpIdx);
299 const MCOperand &SelMO = MI.getOperand(SelOpIdx);
300
297301 union {
298302 float f;
299303 uint32_t i;
300304 } InlineConstant;
301305 InlineConstant.i = 0;
302 // Emit the source select (2 bytes). For GPRs, this is the register index.
303 // For other potential instruction operands, (e.g. constant registers) the
304 // value of the source select is defined in the r600isa docs.
305 if (MO.isReg()) {
306 unsigned Reg = MO.getReg();
307 if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
308 EmitByte(1, OS);
306 // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
307 // and select is 0 (GPR index is encoded in the instr encoding. For constants
308 // type is 1 and select is the original const select passed from the driver.
309 unsigned Reg = RegMO.getReg();
310 if (Reg == AMDGPU::ALU_CONST) {
311 EmitByte(1, OS);
312 uint32_t Sel = SelMO.getImm();
313 Emit(Sel, OS);
314 } else {
315 EmitByte(0, OS);
316 Emit((uint32_t)0, OS);
317 }
318
319 if (Reg == AMDGPU::ALU_LITERAL_X) {
320 unsigned ImmOpIndex = MI.getNumOperands() - 1;
321 MCOperand ImmOp = MI.getOperand(ImmOpIndex);
322 if (ImmOp.isFPImm()) {
323 InlineConstant.f = ImmOp.getFPImm();
309324 } else {
310 EmitByte(0, OS);
311 }
312
313 if (Reg == AMDGPU::ALU_LITERAL_X) {
314 unsigned ImmOpIndex = MI.getNumOperands() - 1;
315 MCOperand ImmOp = MI.getOperand(ImmOpIndex);
316 if (ImmOp.isFPImm()) {
317 InlineConstant.f = ImmOp.getFPImm();
318 } else {
319 assert(ImmOp.isImm());
320 InlineConstant.i = ImmOp.getImm();
321 }
325 assert(ImmOp.isImm());
326 InlineConstant.i = ImmOp.getImm();
322327 }
323328 }
324329
6161 SRC0_NEG,
6262 SRC0_REL,
6363 SRC0_ABS,
64 SRC0_SEL,
6465 SRC1,
6566 SRC1_NEG,
6667 SRC1_REL,
6768 SRC1_ABS,
69 SRC1_SEL,
6870 SRC2,
6971 SRC2_NEG,
7072 SRC2_REL,
73 SRC2_SEL,
7174 LAST,
7275 PRED_SEL,
7376 IMM,
7477 COUNT
7578 };
79
80 const static int ALUOpTable[3][R600Operands::COUNT] = {
81 // W C S S S S S S S S S S S
82 // R O D L S R R R R S R R R R S R R R L P
83 // D U I M R A R C C C C R C C C C R C C C A R I
84 // S E U T O E M C 0 0 0 0 C 1 1 1 1 C 2 2 2 S E M
85 // T M P E D L P 0 N R A S 1 N R A S 2 N R S T D M
86 {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
87 {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
88 {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
89 };
90
7691 }
7792
7893 #endif // R600DEFINES_H_
7373 setOperationAction(ISD::STORE, MVT::i32, Custom);
7474 setOperationAction(ISD::STORE, MVT::v4i32, Custom);
7575
76 setOperationAction(ISD::LOAD, MVT::i32, Custom);
77 setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
7678 setTargetDAGCombine(ISD::FP_ROUND);
79 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
7780
7881 setSchedulingPreference(Sched::VLIW);
7982 }
111114 MI->getOperand(0).getReg(),
112115 MI->getOperand(1).getReg());
113116 TII->addFlag(NewMI, 0, MO_FLAG_NEG);
114 break;
115 }
116
117 case AMDGPU::R600_LOAD_CONST: {
118 int64_t RegIndex = MI->getOperand(1).getImm();
119 unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
120 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
121 .addOperand(MI->getOperand(0))
122 .addReg(ConstantReg);
123117 break;
124118 }
125119
363357 case ISD::SELECT: return LowerSELECT(Op, DAG);
364358 case ISD::SETCC: return LowerSETCC(Op, DAG);
365359 case ISD::STORE: return LowerSTORE(Op, DAG);
360 case ISD::LOAD: return LowerLOAD(Op, DAG);
366361 case ISD::FPOW: return LowerFPOW(Op, DAG);
367362 case ISD::INTRINSIC_VOID: {
368363 SDValue Chain = Op.getOperand(0);
526521 switch (N->getOpcode()) {
527522 default: return;
528523 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
524 return;
525 case ISD::LOAD: {
526 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
527 Results.push_back(SDValue(Node, 0));
528 Results.push_back(SDValue(Node, 1));
529 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
530 // function
531 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
532 return;
533 }
529534 }
530535 }
531536
831836 return SDValue();
832837 }
833838
839 // return (512 + (kc_bank << 12)
840 static int
841 ConstantAddressBlock(unsigned AddressSpace) {
842 switch (AddressSpace) {
843 case AMDGPUAS::CONSTANT_BUFFER_0:
844 return 512;
845 case AMDGPUAS::CONSTANT_BUFFER_1:
846 return 512 + 4096;
847 case AMDGPUAS::CONSTANT_BUFFER_2:
848 return 512 + 4096 * 2;
849 case AMDGPUAS::CONSTANT_BUFFER_3:
850 return 512 + 4096 * 3;
851 case AMDGPUAS::CONSTANT_BUFFER_4:
852 return 512 + 4096 * 4;
853 case AMDGPUAS::CONSTANT_BUFFER_5:
854 return 512 + 4096 * 5;
855 case AMDGPUAS::CONSTANT_BUFFER_6:
856 return 512 + 4096 * 6;
857 case AMDGPUAS::CONSTANT_BUFFER_7:
858 return 512 + 4096 * 7;
859 case AMDGPUAS::CONSTANT_BUFFER_8:
860 return 512 + 4096 * 8;
861 case AMDGPUAS::CONSTANT_BUFFER_9:
862 return 512 + 4096 * 9;
863 case AMDGPUAS::CONSTANT_BUFFER_10:
864 return 512 + 4096 * 10;
865 case AMDGPUAS::CONSTANT_BUFFER_11:
866 return 512 + 4096 * 11;
867 case AMDGPUAS::CONSTANT_BUFFER_12:
868 return 512 + 4096 * 12;
869 case AMDGPUAS::CONSTANT_BUFFER_13:
870 return 512 + 4096 * 13;
871 case AMDGPUAS::CONSTANT_BUFFER_14:
872 return 512 + 4096 * 14;
873 case AMDGPUAS::CONSTANT_BUFFER_15:
874 return 512 + 4096 * 15;
875 default:
876 return -1;
877 }
878 }
879
880 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
881 {
882 EVT VT = Op.getValueType();
883 DebugLoc DL = Op.getDebugLoc();
884 LoadSDNode *LoadNode = cast(Op);
885 SDValue Chain = Op.getOperand(0);
886 SDValue Ptr = Op.getOperand(1);
887 SDValue LoweredLoad;
888
889 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
890 if (ConstantBlock > -1) {
891 SDValue Result;
892 if (dyn_cast(LoadNode->getSrcValue()) ||
893 dyn_cast(LoadNode->getSrcValue())) {
894 SDValue Slots[4];
895 for (unsigned i = 0; i < 4; i++) {
896 // We want Const position encoded with the following formula :
897 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
898 // const_index is Ptr computed by llvm using an alignment of 16.
899 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
900 // then div by 4 at the ISel step
901 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
902 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32));
903 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
904 }
905 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4);
906 } else {
907 // non constant ptr cant be folded, keeps it as a v4f32 load
908 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
909 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32))
910 );
911 }
912
913 if (!VT.isVector()) {
914 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
915 DAG.getConstant(0, MVT::i32));
916 }
917
918 SDValue MergedValues[2] = {
919 Result,
920 Chain
921 };
922 return DAG.getMergeValues(MergedValues, 2, DL);
923 }
924
925 return SDValue();
926 }
834927
835928 SDValue R600TargetLowering::LowerFPOW(SDValue Op,
836929 SelectionDAG &DAG) const {
903996 }
904997 break;
905998 }
999 // Extract_vec (Build_vector) generated by custom lowering
1000 // also needs to be customly combined
1001 case ISD::EXTRACT_VECTOR_ELT: {
1002 SDValue Arg = N->getOperand(0);
1003 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1004 if (ConstantSDNode *Const = dyn_cast(N->getOperand(1))) {
1005 unsigned Element = Const->getZExtValue();
1006 return Arg->getOperand(Element);
1007 }
1008 }
1009 }
9061010 }
9071011 return SDValue();
9081012 }
6262 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
6363 SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
6464 SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const;
65 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
6566
6667 bool isZero(SDValue Op) const;
6768 };
485485 .addReg(Src0Reg) // $src0
486486 .addImm(0) // $src0_neg
487487 .addImm(0) // $src0_rel
488 .addImm(0); // $src0_abs
488 .addImm(0) // $src0_abs
489 .addImm(-1); // $src0_sel
489490
490491 if (Src1Reg) {
491492 MIB.addReg(Src1Reg) // $src1
492493 .addImm(0) // $src1_neg
493494 .addImm(0) // $src1_rel
494 .addImm(0); // $src1_abs
495 .addImm(0) // $src1_abs
496 .addImm(-1); // $src1_sel
495497 }
496498
497499 //XXX: The r600g finalizer expects this to be 1, once we've moved the
520522
521523 int R600InstrInfo::getOperandIdx(unsigned Opcode,
522524 R600Operands::Ops Op) const {
523 const static int OpTable[3][R600Operands::COUNT] = {
524 // W C S S S S S S S S
525 // R O D L S R R R S R R R S R R L P
526 // D U I M R A R C C C C C C C R C C A R I
527 // S E U T O E M C 0 0 0 C 1 1 1 C 2 2 S E M
528 // T M P E D L P 0 N R A 1 N R A 2 N R T D M
529 {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8,-1,-1,-1,-1,-1,-1,-1, 9,10,11},
530 {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,-1,-1,-1,13,14,15,16,17},
531 {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8,-1, 9,10,11,12,13,14}
532 };
533525 unsigned TargetFlags = get(Opcode).TSFlags;
534526 unsigned OpTableIdx;
535527
555547 OpTableIdx = 2;
556548 }
557549
558 return OpTable[OpTableIdx][Op];
550 return R600Operands::ALUOpTable[OpTableIdx][Op];
559551 }
560552
561553 void R600InstrInfo::setImmOperand(MachineInstr *MI, R600Operands::Ops Op,
6969 let PrintMethod = PM;
7070 }
7171
72 // src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
73 def SEL : OperandWithDefaultOps {
74 let PrintMethod = "printSel";
75 }
76
7277 def LITERAL : InstFlag<"printLiteral">;
7378
7479 def WRITE : InstFlag <"printWrite", 1>;
8893 def ADDRParam : ComplexPattern;
8994 def ADDRDWord : ComplexPattern;
9095 def ADDRVTX_READ : ComplexPattern;
96 def ADDRGA_CONST_OFFSET : ComplexPattern;
97 def ADDRGA_VAR_OFFSET : ComplexPattern;
9198
9299 class R600ALU_Word0 {
93100 field bits<32> Word0;
262269 InstR600 <0,
263270 (outs R600_Reg32:$dst),
264271 (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
265 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
272 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
266273 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
267274 !strconcat(opName,
268275 "$clamp $dst$write$dst_rel$omod, "
269 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
276 "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
270277 "$literal $pred_sel$last"),
271278 pattern,
272279 itin>,
302309 (outs R600_Reg32:$dst),
303310 (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
304311 OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
305 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs,
306 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs,
312 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
313 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
307314 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
308315 !strconcat(opName,
309316 "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
310 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
311 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
317 "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, "
318 "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, "
312319 "$literal $pred_sel$last"),
313320 pattern,
314321 itin>,
339346 InstR600 <0,
340347 (outs R600_Reg32:$dst),
341348 (ins REL:$dst_rel, CLAMP:$clamp,
342 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel,
343 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel,
344 R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel,
349 R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
350 R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
351 R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
345352 LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
346353 !strconcat(opName, "$clamp $dst$dst_rel, "
347 "$src0_neg$src0$src0_rel, "
348 "$src1_neg$src1$src1_rel, "
349 "$src2_neg$src2$src2_rel, "
354 "$src0_neg$src0$src0_sel$src0_rel, "
355 "$src1_neg$src1$src1_sel$src1_rel, "
356 "$src2_neg$src2$src2_sel$src2_rel, "
350357 "$literal $pred_sel$last"),
351358 pattern,
352359 itin>,
481488 >;
482489
483490 def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
484 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>,
491 SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
485492 [SDNPMayLoad]
486493 >;
487494
15371544
15381545 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
15391546
1540 def R600_LOAD_CONST : AMDGPUShaderInst <
1541 (outs R600_Reg32:$dst),
1542 (ins i32imm:$src0),
1543 "R600_LOAD_CONST $dst, $src0",
1544 [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))]
1545 >;
15461547
15471548 def RESERVE_REG : AMDGPUShaderInst <
15481549 (outs),
15501551 "RESERVE_REG $src",
15511552 [(int_AMDGPU_reserve_reg imm:$src)]
15521553 >;
1553
15541554 def TXD: AMDGPUShaderInst <
15551555 (outs R600_Reg128:$dst),
15561556 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
15791579 def RETURN : ILFormat<(outs), (ins variable_ops),
15801580 "RETURN", [(IL_retflag)]>;
15811581 }
1582
1583
1584 //===----------------------------------------------------------------------===//
1585 // Constant Buffer Addressing Support
1586 //===----------------------------------------------------------------------===//
1587
1588 let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
1589 def CONST_COPY : Instruction {
1590 let OutOperandList = (outs R600_Reg32:$dst);
1591 let InOperandList = (ins i32imm:$src);
1592 let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
1593 let AsmString = "CONST_COPY";
1594 let neverHasSideEffects = 1;
1595 let isAsCheapAsAMove = 1;
1596 let Itinerary = NullALU;
1597 }
1598 } // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
1599
1600 def TEX_VTX_CONSTBUF :
1601 InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr",
1602 [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>,
1603 VTX_WORD1_GPR, VTX_WORD0 {
1604
1605 let VC_INST = 0;
1606 let FETCH_TYPE = 2;
1607 let FETCH_WHOLE_QUAD = 0;
1608 let BUFFER_ID = 0;
1609 let SRC_REL = 0;
1610 let SRC_SEL_X = 0;
1611 let DST_REL = 0;
1612 let USE_CONST_FIELDS = 0;
1613 let NUM_FORMAT_ALL = 2;
1614 let FORMAT_COMP_ALL = 1;
1615 let SRF_MODE_ALL = 1;
1616 let MEGA_FETCH_COUNT = 16;
1617 let DST_SEL_X = 0;
1618 let DST_SEL_Y = 1;
1619 let DST_SEL_Z = 2;
1620 let DST_SEL_W = 3;
1621 let DATA_FORMAT = 35;
1622
1623 let Inst{31-0} = Word0;
1624 let Inst{63-32} = Word1;
1625
1626 // LLVM can only encode 64-bit instructions, so these fields are manually
1627 // encoded in R600CodeEmitter
1628 //
1629 // bits<16> OFFSET;
1630 // bits<2> ENDIAN_SWAP = 0;
1631 // bits<1> CONST_BUF_NO_STRIDE = 0;
1632 // bits<1> MEGA_FETCH = 0;
1633 // bits<1> ALT_CONST = 0;
1634 // bits<2> BUFFER_INDEX_MODE = 0;
1635
1636
1637
1638 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1639 // is done in R600CodeEmitter
1640 //
1641 // Inst{79-64} = OFFSET;
1642 // Inst{81-80} = ENDIAN_SWAP;
1643 // Inst{82} = CONST_BUF_NO_STRIDE;
1644 // Inst{83} = MEGA_FETCH;
1645 // Inst{84} = ALT_CONST;
1646 // Inst{86-85} = BUFFER_INDEX_MODE;
1647 // Inst{95-86} = 0; Reserved
1648
1649 // VTX_WORD3 (Padding)
1650 //
1651 // Inst{127-96} = 0;
1652 }
1653
15821654
15831655 //===--------------------------------------------------------------------===//
15841656 // Instructions support
0 //===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass is intended to handle remaining ConstCopy pseudo MachineInstr.
11 /// ISel will fold each Const Buffer read inside scalar ALU. However it cannot
12 /// fold them inside vector instruction, like DOT4 or Cube ; ISel emits
13 /// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try
14 /// to fold them if possible or replace them by MOV otherwise.
15 /// TODO : Implement the folding part, using Copy Propagation algorithm.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "AMDGPU.h"
20 #include "R600InstrInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/IR/GlobalValue.h"
25
26 namespace llvm {
27
28 class R600LowerConstCopy : public MachineFunctionPass {
29 private:
30 static char ID;
31 const R600InstrInfo *TII;
32 public:
33 R600LowerConstCopy(TargetMachine &tm);
34 virtual bool runOnMachineFunction(MachineFunction &MF);
35
36 const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; }
37 };
38
39 char R600LowerConstCopy::ID = 0;
40
41
42 R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) :
43 MachineFunctionPass(ID),
44 TII (static_cast(tm.getInstrInfo()))
45 {
46 }
47
48 bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) {
49 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
50 BB != BB_E; ++BB) {
51 MachineBasicBlock &MBB = *BB;
52 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
53 I != E;) {
54 MachineInstr &MI = *I;
55 I = llvm::next(I);
56 if (MI.getOpcode() != AMDGPU::CONST_COPY)
57 continue;
58 MachineInstr *NewMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::MOV,
59 MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
60 NewMI->getOperand(9).setImm(MI.getOperand(1).getImm());
61 MI.eraseFromParent();
62 }
63 }
64 return false;
65 }
66
67 FunctionPass *createR600LowerConstCopy(TargetMachine &tm) {
68 return new R600LowerConstCopy(tm);
69 }
70
71 }
72
73
3737 Reserved.set(AMDGPU::NEG_ONE);
3838 Reserved.set(AMDGPU::PV_X);
3939 Reserved.set(AMDGPU::ALU_LITERAL_X);
40 Reserved.set(AMDGPU::ALU_CONST);
4041 Reserved.set(AMDGPU::PREDICATE_BIT);
4142 Reserved.set(AMDGPU::PRED_SEL_OFF);
4243 Reserved.set(AMDGPU::PRED_SEL_ZERO);
4344 Reserved.set(AMDGPU::PRED_SEL_ONE);
44
45 for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
46 E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
47 Reserved.set(*I);
48 }
4945
5046 for (std::vector::const_iterator I = MFI->ReservedRegs.begin(),
5147 E = MFI->ReservedRegs.end(); I != E; ++I) {
2626 foreach Chan = [ "X", "Y", "Z", "W" ] in {
2727 // 32-bit Temporary Registers
2828 def T#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index, Chan>;
29
30 // 32-bit Constant Registers (There are more than 128, this the number
31 // that is currently supported.
32 def C#Index#_#Chan : R600RegWithChan <"C"#Index#"."#Chan, Index, Chan>;
3329 }
3430 // 128-bit Temporary Registers
3531 def T#Index#_XYZW : R600Reg_128 <"T"#Index#".XYZW",
6359
6460 def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
6561 (add (sequence "ArrayBase%u", 448, 464))>;
66
67 def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
68 (add (interleave
69 (interleave (sequence "C%u_X", 0, 127),
70 (sequence "C%u_Z", 0, 127)),
71 (interleave (sequence "C%u_Y", 0, 127),
72 (sequence "C%u_W", 0, 127))))>;
62 // special registers for ALU src operands
63 // const buffer reference, SRCx_SEL contains index
64 def ALU_CONST : R600Reg<"CBuf", 0>;
65 // interpolation param reference, SRCx_SEL contains index
66 def ALU_PARAM : R600Reg<"Param", 0>;
7367
7468 def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
7569 (add (sequence "T%u_X", 0, 127))>;
8478 (add (sequence "T%u_W", 0, 127))>;
8579
8680 def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32,
87 (add (interleave
88 (interleave R600_TReg32_X, R600_TReg32_Z),
89 (interleave R600_TReg32_Y, R600_TReg32_W)))>;
81 (interleave R600_TReg32_X, R600_TReg32_Y,
82 R600_TReg32_Z, R600_TReg32_W)>;
9083
9184 def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
9285 R600_TReg32,
93 R600_CReg32,
9486 R600_ArrayBase,
95 ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
87 ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
88 ALU_CONST, ALU_PARAM
89 )>;
9690
9791 def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
9892 PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;