llvm.org GIT mirror llvm / 208bbb1
AMDGPU: Use CopyToReg for interp intrinsic lowering This doesn't use the default value, so doesn't benefit from the hack to help optimize it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375450 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 1 year, 1 month ago
2 changed file(s) with 21 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
58765876 case Intrinsic::amdgcn_fdiv_fast:
58775877 return lowerFDIV_FAST(Op, DAG);
58785878 case Intrinsic::amdgcn_interp_mov: {
5879 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
5880 SDValue Glue = M0.getValue(1);
5879 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5880 Op.getOperand(4), SDValue());
58815881 return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
5882 Op.getOperand(2), Op.getOperand(3), Glue);
5882 Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
58835883 }
58845884 case Intrinsic::amdgcn_interp_p1: {
5885 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
5886 SDValue Glue = M0.getValue(1);
5885 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5886 Op.getOperand(4), SDValue());
58875887 return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
5888 Op.getOperand(2), Op.getOperand(3), Glue);
5888 Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
58895889 }
58905890 case Intrinsic::amdgcn_interp_p2: {
5891 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
5892 SDValue Glue = SDValue(M0.getNode(), 1);
5891 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5892 Op.getOperand(5), SDValue());
58935893 return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
58945894 Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
5895 Glue);
5895 ToM0.getValue(1));
58965896 }
58975897 case Intrinsic::amdgcn_interp_p1_f16: {
5898 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
5899 SDValue Glue = M0.getValue(1);
5898 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5899 Op.getOperand(5), SDValue());
5900
59005901 if (getSubtarget()->getLDSBankCount() == 16) {
59015902 // 16 bank LDS
59025903 SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
59035904 DAG.getConstant(2, DL, MVT::i32), // P0
59045905 Op.getOperand(2), // Attrchan
59055906 Op.getOperand(3), // Attr
5906 Glue);
5907 ToM0.getValue(1));
59075908 SDValue Ops[] = {
59085909 Op.getOperand(1), // Src0
59095910 Op.getOperand(2), // Attrchan
59265927 Op.getOperand(4), // high
59275928 DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
59285929 DAG.getTargetConstant(0, DL, MVT::i32), // $omod
5929 Glue
5930 ToM0.getValue(1)
59305931 };
59315932 return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
59325933 }
59335934 }
59345935 case Intrinsic::amdgcn_interp_p2_f16: {
5935 SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
5936 SDValue Glue = SDValue(M0.getNode(), 1);
5936 SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
5937 Op.getOperand(6), SDValue());
59375938 SDValue Ops[] = {
59385939 Op.getOperand(2), // Src0
59395940 Op.getOperand(3), // Attrchan
59435944 DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
59445945 Op.getOperand(5), // high
59455946 DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
5946 Glue
5947 ToM0.getValue(1)
59475948 };
59485949 return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
59495950 }
55 define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
66 ; GFX9-32BANK-LABEL: interp_f16:
77 ; GFX9-32BANK: ; %bb.0: ; %main_body
8 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
89 ; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
9 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
1010 ; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
1111 ; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
1212 ; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1
1919 ;
2020 ; GFX8-32BANK-LABEL: interp_f16:
2121 ; GFX8-32BANK: ; %bb.0: ; %main_body
22 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
2223 ; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
23 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
2424 ; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
2525 ; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
2626 ; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1
118118 define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
119119 ; GFX9-32BANK-LABEL: interp_p2_m0_setup:
120120 ; GFX9-32BANK: ; %bb.0: ; %main_body
121 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
121122 ; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
122 ; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
123123 ; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
124124 ; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
125125 ; GFX9-32BANK-NEXT: ;;#ASMSTART
135135 ;
136136 ; GFX8-32BANK-LABEL: interp_p2_m0_setup:
137137 ; GFX8-32BANK: ; %bb.0: ; %main_body
138 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
138139 ; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
139 ; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
140140 ; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
141141 ; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
142142 ; GFX8-32BANK-NEXT: ;;#ASMSTART