llvm.org GIT mirror llvm / a89c183
R600/SI: Remove explicit m0 operand from v_interp instructions Instead add m0 as an implicit operand. This helps avoid spills of the m0 register in some cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237140 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
6 changed file(s) with 59 addition(s) and 33 deletion(s). Raw diff Collapse all Expand all
26772677 NODE_NAME_CASE(CONST_DATA_PTR)
26782678 case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
26792679 NODE_NAME_CASE(SENDMSG)
2680 NODE_NAME_CASE(INTERP_MOV)
2681 NODE_NAME_CASE(INTERP_P1)
2682 NODE_NAME_CASE(INTERP_P2)
26802683 NODE_NAME_CASE(STORE_MSKOR)
26812684 NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
26822685 case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
288288 /// Pointer to the start of the shader's constant data.
289289 CONST_DATA_PTR,
290290 SENDMSG,
291 INTERP_MOV,
292 INTERP_P1,
293 INTERP_P2,
291294 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
292295 STORE_MSKOR,
293296 LOAD_CONSTANT,
222222 SDTypeProfile<0, 1, [SDTCisInt<0>]>,
223223 [SDNPHasChain, SDNPInGlue]>;
224224
225 def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
226 SDTypeProfile<1, 3, [SDTCisFP<0>]>,
227 [SDNPInGlue]>;
228
229 def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
230 SDTypeProfile<1, 3, [SDTCisFP<0>]>,
231 [SDNPInGlue, SDNPOutGlue]>;
232
233 def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
234 SDTypeProfile<1, 4, [SDTCisFP<0>]>,
235 [SDNPInGlue]>;
236
225237 //===----------------------------------------------------------------------===//
226238 // Flow Control Profile Types
227239 //===----------------------------------------------------------------------===//
947947 case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
948948 return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
949949 DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
950
950 case AMDGPUIntrinsic::SI_fs_constant: {
951 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
952 SDValue Glue = M0.getValue(1);
953 return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
954 DAG.getConstant(2, DL, MVT::i32), // P0
955 Op.getOperand(1), Op.getOperand(2), Glue);
956 }
957 case AMDGPUIntrinsic::SI_fs_interp: {
958 SDValue IJ = Op.getOperand(4);
959 SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
960 DAG.getConstant(0, DL, MVT::i32));
961 SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
962 DAG.getConstant(1, DL, MVT::i32));
963 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
964 SDValue Glue = M0.getValue(1);
965 SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
966 DAG.getVTList(MVT::f32, MVT::Glue),
967 I, Op.getOperand(1), Op.getOperand(2), Glue);
968 Glue = SDValue(P1.getNode(), 1);
969 return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
970 Op.getOperand(1), Op.getOperand(2), Glue);
971 }
951972 default:
952973 return AMDGPUTargetLowering::LowerOperation(Op, DAG);
953974 }
16691669 SIMCInstr;
16701670
16711671 multiclass VINTRP_m op, string opName, dag outs, dag ins, string asm,
1672 string disableEncoding = "", string constraints = "",
1673 list pattern = []> {
1672 list pattern = [],
1673 string disableEncoding = "", string constraints = ""> {
16741674 let DisableEncoding = disableEncoding,
16751675 Constraints = constraints in {
16761676 def "" : VINTRP_Pseudo ;
14321432 // VINTRP Instructions
14331433 //===----------------------------------------------------------------------===//
14341434
1435 let Uses = [M0] in {
1436
14351437 // FIXME: Specify SchedRW for VINTRP insturctions.
14361438 defm V_INTERP_P1_F32 : VINTRP_m <
14371439 0x00000000, "v_interp_p1_f32",
14381440 (outs VGPR_32:$dst),
1439 (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
1440 "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
1441 "$m0">;
1441 (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
1442 "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
1443 [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
1444 (i32 imm:$attr)))]>;
14421445
14431446 defm V_INTERP_P2_F32 : VINTRP_m <
14441447 0x00000001, "v_interp_p2_f32",
14451448 (outs VGPR_32:$dst),
1446 (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
1447 "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
1448 "$src0,$m0",
1449 (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
1450 "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
1451 [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
1452 (i32 imm:$attr)))],
1453 "$src0",
14491454 "$src0 = $dst">;
14501455
14511456 defm V_INTERP_MOV_F32 : VINTRP_m <
14521457 0x00000002, "v_interp_mov_f32",
14531458 (outs VGPR_32:$dst),
1454 (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
1455 "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
1456 "$m0">;
1459 (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
1460 "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
1461 [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
1462 (i32 imm:$attr)))]>;
1463
1464 } // End Uses = [M0]
14571465
14581466 //===----------------------------------------------------------------------===//
14591467 // VOP2 Instructions
27232731 (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm:$imm)))
27242732 >;
27252733
2726 /********** ===================== **********/
2727 /********** Interpolation Paterns **********/
2728 /********** ===================== **********/
2729
2730 // The value of $params is constant through out the entire kernel.
2731 // We need to use S_MOV_B32 $params, because CSE ignores copies, so
2732 // without it we end up with a lot of redundant moves.
2733
2734 def : Pat <
2735 (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
2736 (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
2737 >;
2738
2739 def : Pat <
2740 (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
2741 (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
2742 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
2743 (EXTRACT_SUBREG $ij, sub1),
2744 imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
2745 >;
2746
27472734 /********** ================== **********/
27482735 /********** Intrinsic Patterns **********/
27492736 /********** ================== **********/