llvm.org GIT mirror llvm / 2774434
R600/SI: Remove explicit m0 operand from s_sendmsg Instead add m0 as an implicit operand. This allows us to avoid using the M0Reg register class and eliminates a number of unnecessary spills when using s_sendmsg instructions. This impacts one shader in the shader-db: SGPRS: 48 -> 40 (-16.67 %) VGPRS: 112 -> 108 (-3.57 %) Code Size: 40132 -> 38796 (-3.33 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 2048 -> 0 (-100.00 %) bytes per wave git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237133 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
6 changed file(s) with 36 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
26762676 NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
26772677 NODE_NAME_CASE(CONST_DATA_PTR)
26782678 case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
2679 NODE_NAME_CASE(SENDMSG)
26792680 NODE_NAME_CASE(STORE_MSKOR)
26802681 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
26812682 case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
287287 BUILD_VERTICAL_VECTOR,
288288 /// Pointer to the start of the shader's constant data.
289289 CONST_DATA_PTR,
290 SENDMSG,
290291 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
291292 STORE_MSKOR,
292293 LOAD_CONSTANT,
218218 []
219219 >;
220220
221 def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
222 SDTypeProfile<0, 1, [SDTCisInt<0>]>,
223 [SDNPHasChain, SDNPInGlue]>;
224
221225 //===----------------------------------------------------------------------===//
222226 // Flow Control Profile Types
223227 //===----------------------------------------------------------------------===//
835835 PtrHi, DAG.getConstant(0, DL, MVT::i32),
836836 SDValue(Lo.getNode(), 1));
837837 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
838 }
839
840 SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
841 SDValue V) const {
842 // We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
843 // so we will end up with redundant moves to m0.
844 //
845 // We can't use S_MOV_B32, because there is no way to specify m0 as the
846 // destination register.
847 //
848 // We have to use them both. Machine cse will combine all the S_MOV_B32
849 // instructions and the register coalescer eliminate the extra copies.
850 SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
851 return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
852 SDValue(M0, 0), SDValue()); // Glue
853 // A Null SDValue creates
854 // a glue result.
838855 }
839856
840857 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
939956 SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
940957 SelectionDAG &DAG) const {
941958 MachineFunction &MF = DAG.getMachineFunction();
959 SDLoc DL(Op);
942960 SDValue Chain = Op.getOperand(0);
943961 unsigned IntrinsicID = cast(Op.getOperand(1))->getZExtValue();
944962
945963 switch (IntrinsicID) {
964 case AMDGPUIntrinsic::SI_sendmsg: {
965 Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
966 SDValue Glue = Chain.getValue(1);
967 return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
968 Op.getOperand(2), Glue);
969 }
946970 case AMDGPUIntrinsic::SI_tbuffer_store: {
947 SDLoc DL(Op);
948971 SDValue Ops[] = {
949972 Chain,
950973 Op.getOperand(2),
116116 std::pair getRegForInlineAsmConstraint(
117117 const TargetRegisterInfo *TRI,
118118 const std::string &Constraint, MVT VT) const override;
119 SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
119120 };
120121
121122 } // End namespace llvm
487487 def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
488488 def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
489489
490 let Uses = [EXEC] in {
491 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
492 [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
493 > {
494 let DisableEncoding = "$m0";
495 }
496 } // End Uses = [EXEC]
490 let Uses = [EXEC, M0] in {
491 def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
492 [(AMDGPUsendmsg (i32 imm:$simm16))]
493 >;
494 } // End Uses = [EXEC, M0]
497495
498496 def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
499497 def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;