llvm.org GIT mirror llvm / e672b94
AMDGPU: Select basic interp directly from intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375457 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 1 month ago
5 changed file(s) with 29 addition(s) and 57 deletion(s). Raw diff Collapse all Expand all
562562 case ISD::FREM:
563563 case ISD::INLINEASM:
564564 case ISD::INLINEASM_BR:
565 case AMDGPUISD::INTERP_P1:
566 case AMDGPUISD::INTERP_P2:
567565 case AMDGPUISD::DIV_SCALE:
566 case ISD::INTRINSIC_W_CHAIN:
568567
569568 // TODO: Should really be looking at the users of the bitcast. These are
570569 // problematic because bitcasts are used to legalize all stores to integer
571570 // types.
572571 case ISD::BITCAST:
573572 return false;
573 case ISD::INTRINSIC_WO_CHAIN: {
574 switch (cast(N->getOperand(0))->getZExtValue()) {
575 case Intrinsic::amdgcn_interp_p1:
576 case Intrinsic::amdgcn_interp_p2:
577 case Intrinsic::amdgcn_interp_mov:
578 case Intrinsic::amdgcn_interp_p1_f16:
579 case Intrinsic::amdgcn_interp_p2_f16:
580 return false;
581 default:
582 return true;
583 }
584 }
574585 default:
575586 return true;
576587 }
42824293 NODE_NAME_CASE(KILL)
42834294 NODE_NAME_CASE(DUMMY_CHAIN)
42844295 case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
4285 NODE_NAME_CASE(INTERP_MOV)
4286 NODE_NAME_CASE(INTERP_P1)
4287 NODE_NAME_CASE(INTERP_P2)
42884296 NODE_NAME_CASE(INTERP_P1LL_F16)
42894297 NODE_NAME_CASE(INTERP_P1LV_F16)
42904298 NODE_NAME_CASE(INTERP_P2_F16)
475475 BUILD_VERTICAL_VECTOR,
476476 /// Pointer to the start of the shader's constant data.
477477 CONST_DATA_PTR,
478 INTERP_MOV,
479 INTERP_P1,
480 INTERP_P2,
481478 INTERP_P1LL_F16,
482479 INTERP_P1LV_F16,
483480 INTERP_P2_F16,
327327 []>;
328328
329329 def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
330
331 def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
332 SDTypeProfile<1, 3, [SDTCisFP<0>]>,
333 [SDNPInGlue]>;
334
335 def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
336 SDTypeProfile<1, 3, [SDTCisFP<0>]>,
337 [SDNPInGlue, SDNPOutGlue]>;
338
339 def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
340 SDTypeProfile<1, 4, [SDTCisFP<0>]>,
341 [SDNPInGlue]>;
342330
343331 def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
344332 SDTypeProfile<1, 7, [SDTCisFP<0>]>,
58755875 }
58765876 case Intrinsic::amdgcn_fdiv_fast:
58775877 return lowerFDIV_FAST(Op, DAG);
5878 case Intrinsic::amdgcn_interp_mov: {
5879 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5880 Op.getOperand(4), SDValue());
5881 return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
5882 Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
5883 }
5884 case Intrinsic::amdgcn_interp_p1: {
5885 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5886 Op.getOperand(4), SDValue());
5887 return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
5888 Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
5889 }
5890 case Intrinsic::amdgcn_interp_p2: {
5891 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5892 Op.getOperand(5), SDValue());
5893 return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
5894 Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
5895 ToM0.getValue(1));
5896 }
58975878 case Intrinsic::amdgcn_interp_p1_f16: {
58985879 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
58995880 Op.getOperand(5), SDValue());
5900
59015881 if (getSubtarget()->getLDSBankCount() == 16) {
59025882 // 16 bank LDS
5903 SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
5904 DAG.getConstant(2, DL, MVT::i32), // P0
5905 Op.getOperand(2), // Attrchan
5906 Op.getOperand(3), // Attr
5907 ToM0.getValue(1));
5883
5884 // FIXME: This implicitly will insert a second CopyToReg to M0.
5885 SDValue S = DAG.getNode(
5886 ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
5887 DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
5888 DAG.getConstant(2, DL, MVT::i32), // P0
5889 Op.getOperand(2), // Attrchan
5890 Op.getOperand(3), // Attr
5891 Op.getOperand(5)); // m0
5892
59085893 SDValue Ops[] = {
59095894 Op.getOperand(1), // Src0
59105895 Op.getOperand(2), // Attrchan
1089410879 case ISD::INTRINSIC_W_CHAIN:
1089510880 return AMDGPU::isIntrinsicSourceOfDivergence(
1089610881 cast(N->getOperand(1))->getZExtValue());
10897 // In some cases intrinsics that are a source of divergence have been
10898 // lowered to AMDGPUISD so we also need to check those too.
10899 case AMDGPUISD::INTERP_MOV:
10900 case AMDGPUISD::INTERP_P1:
10901 case AMDGPUISD::INTERP_P2:
10902 return true;
1090310882 }
1090410883 return false;
1090510884 }
4242 (outs VINTRPDst:$vdst),
4343 (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
4444 "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
45 [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
46 (i32 timm:$attr)))]
45 [(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
46 (i32 timm:$attrchan), (i32 timm:$attr), M0))]
4747 >;
4848
4949 let OtherPredicates = [has32BankLDS] in {
6565 (outs VINTRPDst:$vdst),
6666 (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
6767 "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
68 [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
69 (i32 timm:$attr)))]>;
68 [(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
69 (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
7070
7171 } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
7272
7575 (outs VINTRPDst:$vdst),
7676 (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
7777 "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
78 [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
79 (i32 timm:$attr)))]>;
78 [(set f32:$vdst, (int_amdgcn_interp_mov (i32 imm:$vsrc),
79 (i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
8080
8181 } // End Uses = [M0, EXEC]
8282