llvm.org GIT mirror llvm / 9a6e4f0
R600/SI: Remove SIISelLowering::legalizeOperands() Its functionality has been replaced by calling SIInstrInfo::legalizeOperands() from SIISelLowering::AdjstInstrPostInstrSelection() and running the SIFoldOperands and SIShrinkInstructions passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225445 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
12 changed file(s) with 34 addition(s) and 188 deletion(s). Raw diff Collapse all Expand all
16891689 }
16901690 }
16911691
1692 /// \brief Test if RegClass is one of the SSrc classes
1693 static bool isSSrc(unsigned RegClass) {
1694 return AMDGPU::SSrc_32RegClassID == RegClass ||
1695 AMDGPU::SSrc_64RegClassID == RegClass;
1696 }
1697
16981692 /// \brief Analyze the possible immediate value Op
16991693 ///
17001694 /// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
17251719 }
17261720
17271721 return -1;
1728 }
1729
1730 /// \brief Try to fold an immediate directly into an instruction
1731 bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
1732 bool &ScalarSlotUsed) const {
1733
1734 MachineSDNode *Mov = dyn_cast(Operand);
1735 const SIInstrInfo *TII = static_cast(
1736 getTargetMachine().getSubtargetImpl()->getInstrInfo());
1737 if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
1738 return false;
1739
1740 const SDValue &Op = Mov->getOperand(0);
1741 int32_t Value = analyzeImmediate(Op.getNode());
1742 if (Value == -1) {
1743 // Not an immediate at all
1744 return false;
1745
1746 } else if (Value == 0) {
1747 // Inline immediates can always be fold
1748 Operand = Op;
1749 return true;
1750
1751 } else if (Value == Immediate) {
1752 // Already fold literal immediate
1753 Operand = Op;
1754 return true;
1755
1756 } else if (!ScalarSlotUsed && !Immediate) {
1757 // Fold this literal immediate
1758 ScalarSlotUsed = true;
1759 Immediate = Value;
1760 Operand = Op;
1761 return true;
1762
1763 }
1764
1765 return false;
17661722 }
17671723
17681724 const TargetRegisterClass *SITargetLowering::getRegClassForNode(
18281784 return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
18291785 }
18301786
1831 /// \returns true if \p Node's operands are different from the SDValue list
1832 /// \p Ops
1833 static bool isNodeChanged(const SDNode *Node, const std::vector &Ops) {
1834 for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
1835 if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
1836 return true;
1837 }
1838 }
1839 return false;
1840 }
1841
1842 /// TODO: This needs to be removed. It's current primary purpose is to fold
1843 /// immediates into operands when legal. The legalization parts are redundant
1844 /// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
1845 SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
1846 SelectionDAG &DAG) const {
1847 // Original encoding (either e32 or e64)
1848 int Opcode = Node->getMachineOpcode();
1849 const SIInstrInfo *TII = static_cast(
1850 getTargetMachine().getSubtargetImpl()->getInstrInfo());
1851 const MCInstrDesc *Desc = &TII->get(Opcode);
1852
1853 unsigned NumDefs = Desc->getNumDefs();
1854 unsigned NumOps = Desc->getNumOperands();
1855
1856 // Commuted opcode if available
1857 int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
1858 const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
1859
1860 assert(!DescRev || DescRev->getNumDefs() == NumDefs);
1861 assert(!DescRev || DescRev->getNumOperands() == NumOps);
1862
1863 int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
1864 bool HaveVSrc = false, HaveSSrc = false;
1865
1866 // First figure out what we already have in this instruction.
1867 for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1868 i != e && Op < NumOps; ++i, ++Op) {
1869
1870 unsigned RegClass = Desc->OpInfo[Op].RegClass;
1871 if (isVSrc(RegClass))
1872 HaveVSrc = true;
1873 else if (isSSrc(RegClass))
1874 HaveSSrc = true;
1875 else
1876 continue;
1877
1878 int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
1879 if (Imm != -1 && Imm != 0) {
1880 // Literal immediate
1881 Immediate = Imm;
1882 }
1883 }
1884
1885 // If we neither have VSrc nor SSrc, it makes no sense to continue.
1886 if (!HaveVSrc && !HaveSSrc)
1887 return Node;
1888
1889 // No scalar allowed when we have both VSrc and SSrc
1890 bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
1891
1892 // If this instruction has an implicit use of VCC, then it can't use the
1893 // constant bus.
1894 for (unsigned i = 0, e = Desc->getNumImplicitUses(); i != e; ++i) {
1895 if (Desc->ImplicitUses[i] == AMDGPU::VCC) {
1896 ScalarSlotUsed = true;
1897 break;
1898 }
1899 }
1900
1901 // Second go over the operands and try to fold them
1902 std::vector Ops;
1903 for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1904 i != e && Op < NumOps; ++i, ++Op) {
1905
1906 const SDValue &Operand = Node->getOperand(i);
1907 Ops.push_back(Operand);
1908
1909 // Already folded immediate?
1910 if (isa(Operand.getNode()) ||
1911 isa(Operand.getNode()))
1912 continue;
1913
1914 // Is this a VSrc or SSrc operand?
1915 unsigned RegClass = Desc->OpInfo[Op].RegClass;
1916 if (isVSrc(RegClass) || isSSrc(RegClass)) {
1917 // Try to fold the immediates. If this ends up with multiple constant bus
1918 // uses, it will be legalized later.
1919 foldImm(Ops[i], Immediate, ScalarSlotUsed);
1920 continue;
1921 }
1922
1923 if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
1924
1925 unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
1926 assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
1927
1928 // Test if it makes sense to swap operands
1929 if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
1930 (!fitsRegClass(DAG, Ops[1], RegClass) &&
1931 fitsRegClass(DAG, Ops[1], OtherRegClass))) {
1932
1933 // Swap commutable operands
1934 std::swap(Ops[0], Ops[1]);
1935
1936 Desc = DescRev;
1937 DescRev = nullptr;
1938 continue;
1939 }
1940 }
1941 }
1942
1943 // Add optional chain and glue
1944 for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
1945 Ops.push_back(Node->getOperand(i));
1946
1947 // Nodes that have a glue result are not CSE'd by getMachineNode(), so in
1948 // this case a brand new node is always be created, even if the operands
1949 // are the same as before. So, manually check if anything has been changed.
1950 if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) {
1951 return Node;
1952 }
1953
1954 // Create a complete new instruction
1955 return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
1956 }
1957
19581787 /// \brief Helper function for adjustWritemask
19591788 static unsigned SubIdx2Lane(unsigned Idx) {
19601789 switch (Idx) {
20831912 legalizeTargetIndependentNode(Node, DAG);
20841913 return Node;
20851914 }
2086
2087 return legalizeOperands(Node, DAG);
1915 return Node;
20881916 }
20891917
20901918 /// \brief Assign the register class depending on the number of
4141 SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
4242 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
4343
44 bool foldImm(SDValue &Operand, int32_t &Immediate,
45 bool &ScalarSlotUsed) const;
4644 const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
4745 const SDValue &Op) const;
4846 bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
4947 unsigned RegClass) const;
5048
51 SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
5249 void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
5350 MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
5451
4747 ; R600: -KC0[2].Z
4848
4949 ; XXX: We could use v_add_f32_e64 with the negate bit here instead.
50 ; SI: v_sub_f32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}}
50 ; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
5151 define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
5252 %bc = bitcast i32 %in to float
5353 %fsub = fsub float 0.0, %bc
111111
112112 ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
113113 ; CHECK: s_load_dword [[VAL:s[0-9]+]]
114 ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.0, [[VAL]]{{$}}
114 ; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
115115 ; CHECK-NEXT: buffer_store_dword [[REG]]
116116 define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
117117 %y = fadd float %x, 0.0
303303
304304 ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
305305 ; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
306 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.0, [[VAL]]
306 ; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
307307 ; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
308308 define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
309309 %y = fadd double %x, 0.0
8686 store i32 %tmp1, i32 addrspace(1)* %out
8787 ret void
8888 }
89 ; CHECK-LABEL: {{^}}vector_imm:
90 ; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64
91 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
92 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
93 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
94 ; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
95
96 define void @vector_imm(<4 x i32> addrspace(1)* %out) {
97 entry:
98 %tmp0 = call i32 @llvm.r600.read.tidig.x()
99 %tmp1 = add i32 %tmp0, 1
100 %tmp2 = add i32 %tmp0, 2
101 %tmp3 = add i32 %tmp0, 3
102 %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
103 %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
104 %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
105 %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
106 %tmp4 = xor <4 x i32> , %vec3
107 store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
108 ret void
109 }
89110
90111 declare i32 @llvm.r600.read.tidig.x() #0
91112 attributes #0 = { readnone }
11
22 ; CHECK-LABEL: {{^}}main:
33 ; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
4 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]]
4 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
55 define void @main(float %p) {
66 main_body:
77 %c = fcmp oeq float %p, %p
11
22 ; CHECK-LABEL: {{^}}main:
33 ; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
4 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0.0, 1.0, [[CMP]]
4 ; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
55 define void @main(float %p) {
66 main_body:
77 %c = fcmp une float %p, %p
4444
4545 ; SI-LABEL: @v_sint_to_fp_i64_to_f64
4646 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
47 ; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
48 ; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
47 ; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
4948 ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
49 ; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
5050 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
5151 ; SI: buffer_store_dwordx2 [[RESULT]]
5252 define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
4141
4242 ; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
4343 ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
44 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]]
44 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
4545 ; SI: buffer_store_dword [[RESULT]],
4646 ; SI: s_endpgm
4747 define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
33
44 ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
55 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
6 ; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
7 ; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
6 ; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
87 ; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
8 ; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
99 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
1010 ; SI: buffer_store_dwordx2 [[RESULT]]
1111 define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
5959
6060 ; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
6161 ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
62 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0.0, 1.0, [[CMP]]
62 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
6363 ; SI: buffer_store_dword [[RESULT]],
6464 ; SI: s_endpgm
6565 define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
3838 ; FUNC-LABEL: {{^}}xor_i1:
3939 ; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
4040
41 ; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0.0
41 ; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
4242 ; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
4343 ; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
4444 ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]