llvm.org GIT mirror llvm / b4a4ed4
AMDGPU: Treat undef as an inline immediate This should only matter in vectors with an undef component, since a full undef vector would have been folded out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363941 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 28 days ago
3 changed file(s) with 39 addition(s) and 27 deletion(s). Raw diff Collapse all Expand all
6666
6767 namespace {
6868
69 static bool isNullConstantOrUndef(SDValue V) {
70 if (V.isUndef())
71 return true;
72
73 ConstantSDNode *Const = dyn_cast(V);
74 return Const != nullptr && Const->isNullValue();
75 }
76
6977 static bool getConstantValue(SDValue N, uint32_t &Out) {
78 // This is only used for packed vectors, where ussing 0 for undef should
79 // always be good.
80 if (N.isUndef()) {
81 Out = 0;
82 return true;
83 }
84
7085 if (const ConstantSDNode *C = dyn_cast(N)) {
7186 Out = C->getAPIntValue().getSExtValue();
7287 return true;
478493
479494 bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
480495 bool Negated) const {
481 // TODO: Handle undef
496 if (N->isUndef())
497 return true;
482498
483499 const SIInstrInfo *TII = Subtarget->getInstrInfo();
484500 if (Negated) {
609609 return SDValue(packNegConstantV2I16(N, *CurDAG), 0);
610610 }]>;
611611
612
613 // TODO: Handle undef as 0
614612 def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
615613 assert(N->getNumOperands() == 2);
616614 assert(N->getOperand(0).getValueType().getSizeInBits() == 16);
619617 if (Src0 == Src1)
620618 return isNegInlineImmediate(Src0.getNode());
621619
622 return (isNullConstant(Src0) && isNegInlineImmediate(Src1.getNode())) ||
623 (isNullConstant(Src1) && isNegInlineImmediate(Src0.getNode()));
620 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) ||
621 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
624622 }], getNegV2I16Imm>;
625623
626624 //===----------------------------------------------------------------------===//
18841884 ; GFX9: ; %bb.0:
18851885 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
18861886 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1887 ; GFX9-NEXT: s_mov_b32 s4, 0xffe00000
1888 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1889 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
1890 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
1891 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1892 ; GFX9-NEXT: global_load_dword v3, v[0:1], off
1893 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2
1894 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1895 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1896 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1897 ; GFX9-NEXT: v_pk_add_u16 v2, v3, s4
1887 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1888 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
1889 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
1890 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1891 ; GFX9-NEXT: global_load_dword v3, v[0:1], off
1892 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2
1893 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1894 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1895 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1896 ; GFX9-NEXT: v_pk_sub_u16 v2, v3, 32 op_sel:[0,1] op_sel_hi:[1,0]
18981897 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
18991898 ; GFX9-NEXT: s_endpgm
19001899 %tid = call i32 @llvm.amdgcn.workitem.id.x()
19461945 ; GFX9: ; %bb.0:
19471946 ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
19481947 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
1949 ; GFX9-NEXT: s_movk_i32 s4, 0xffe0
1950 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1951 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
1952 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
1953 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1954 ; GFX9-NEXT: global_load_dword v3, v[0:1], off
1955 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2
1956 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1957 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1958 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1959 ; GFX9-NEXT: v_pk_add_u16 v2, v3, s4
1948 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1949 ; GFX9-NEXT: v_mov_b32_e32 v1, s3
1950 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s2, v2
1951 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1952 ; GFX9-NEXT: global_load_dword v3, v[0:1], off
1953 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v2
1954 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1955 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1956 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1957 ; GFX9-NEXT: v_pk_sub_u16 v2, v3, 32
19601958 ; GFX9-NEXT: global_store_dword v[0:1], v2, off
19611959 ; GFX9-NEXT: s_endpgm
19621960 %tid = call i32 @llvm.amdgcn.workitem.id.x()