llvm.org GIT mirror llvm / 3cd6681
Revert "[GlobalISel][AArch64] Add selection support for G_EXTRACT_VECTOR_ELT" This broke test-suite::aarch64_neon_intrinsics.test Reverting while I look into it. Example failure: http://lab.llvm.org:8011/builders/clang-cmake-aarch64-quick/builds/17740 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355408 91177308-0d34-0410-b5e6-96231b3b80d8 Jessica Paquette 7 months ago
5 changed file(s) with 18 addition(s) and 332 deletion(s). Raw diff Collapse all Expand all
7777 void collectShuffleMaskIndices(MachineInstr &I, MachineRegisterInfo &MRI,
7878 SmallVectorImpl &Idxs) const;
7979 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
80 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
8180
8281 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
8382 MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
17091708 return selectUnmergeValues(I, MRI);
17101709 case TargetOpcode::G_SHUFFLE_VECTOR:
17111710 return selectShuffleVector(I, MRI);
1712 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1713 return selectExtractElt(I, MRI);
17141711 }
17151712
17161713 return false;
17891786 return true;
17901787 }
17911788
1792 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
1793 const unsigned EltSize) {
1794 // Choose a lane copy opcode and subregister based off of the size of the
1795 // vector's elements.
1796 switch (EltSize) {
1797 case 16:
1798 CopyOpc = AArch64::CPYi16;
1799 ExtractSubReg = AArch64::hsub;
1800 break;
1801 case 32:
1802 CopyOpc = AArch64::CPYi32;
1803 ExtractSubReg = AArch64::ssub;
1804 break;
1805 case 64:
1806 CopyOpc = AArch64::CPYi64;
1807 ExtractSubReg = AArch64::dsub;
1808 break;
1809 default:
1810 // Unknown size, bail out.
1811 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
1812 return false;
1813 }
1814 return true;
1815 }
1816
1817 bool AArch64InstructionSelector::selectExtractElt(
1818 MachineInstr &I, MachineRegisterInfo &MRI) const {
1819 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
1820 "unexpected opcode!");
1821 unsigned DstReg = I.getOperand(0).getReg();
1822 const LLT NarrowTy = MRI.getType(DstReg);
1823 const unsigned SrcReg = I.getOperand(1).getReg();
1824 const LLT WideTy = MRI.getType(SrcReg);
1825
1826 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
1827 "source register size too small!");
1828 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
1829
1830 // Need the lane index to determine the correct copy opcode.
1831 MachineOperand &LaneIdxOp = I.getOperand(2);
1832 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
1833
1834 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
1835 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
1836 return false;
1837 }
1838
1839 // Find the instruction that defines the constant to extract from. There could
1840 // be any number of copies between the instruction and the definition of the
1841 // index. Skip them.
1842 MachineInstr *LaneDefInst = nullptr;
1843 for (LaneDefInst = MRI.getVRegDef(LaneIdxOp.getReg());
1844 LaneDefInst && LaneDefInst->isCopy();
1845 LaneDefInst = MRI.getVRegDef(LaneDefInst->getOperand(1).getReg())) {
1846 }
1847
1848 // Did we find a def in the first place? If not, bail.
1849 if (!LaneDefInst) {
1850 LLVM_DEBUG(dbgs() << "Did not find VReg definition for " << LaneIdxOp
1851 << "\n");
1852 return false;
1853 }
1854
1855 // TODO: Handle extracts that don't use G_CONSTANT.
1856 if (LaneDefInst->getOpcode() != TargetOpcode::G_CONSTANT) {
1857 LLVM_DEBUG(dbgs() << "VRegs defined by anything other than G_CONSTANT "
1858 "currently unsupported.\n");
1859 return false;
1860 }
1861
1862 unsigned LaneIdx = LaneDefInst->getOperand(1).getCImm()->getLimitedValue();
1863 unsigned CopyOpc = 0;
1864 unsigned ExtractSubReg = 0;
1865 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) {
1866 LLVM_DEBUG(
1867 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
1868 return false;
1869 }
1870
1871 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1872 const TargetRegisterClass *DstRC =
1873 getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true);
1874 if (!DstRC) {
1875 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
1876 return false;
1877 }
1878
1879 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
1880 const TargetRegisterClass *SrcRC =
1881 getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true);
1882 if (!SrcRC) {
1883 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
1884 return false;
1885 }
1886
1887 // The register that we're going to copy into.
1888 unsigned InsertReg = SrcReg;
1889 MachineIRBuilder MIRBuilder(I);
1890
1891 // Lane copies require 128-bit wide registers. If we're dealing with an
1892 // unpacked vector, then we need to move up to that width. Insert an implicit
1893 // def and a subregister insert to get us there.
1894 if (WideTy.getSizeInBits() != 128) {
1895 MachineInstr *ScalarToVector = emitScalarToVector(
1896 WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
1897 if (!ScalarToVector)
1898 return false;
1899 InsertReg = ScalarToVector->getOperand(0).getReg();
1900 }
1901
1902 MachineInstr *LaneCopyMI =
1903 MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx);
1904 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
1905
1906 // Make sure that we actually constrain the initial copy.
1907 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
1908
1909 I.eraseFromParent();
1910 return true;
1911 }
1912
19131789 bool AArch64InstructionSelector::selectUnmergeValues(
19141790 MachineInstr &I, MachineRegisterInfo &MRI) const {
19151791 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
19461822 // vector's elements.
19471823 unsigned CopyOpc = 0;
19481824 unsigned ExtractSubReg = 0;
1949 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
1825 switch (NarrowTy.getSizeInBits()) {
1826 case 16:
1827 CopyOpc = AArch64::CPYi16;
1828 ExtractSubReg = AArch64::hsub;
1829 break;
1830 case 32:
1831 CopyOpc = AArch64::CPYi32;
1832 ExtractSubReg = AArch64::ssub;
1833 break;
1834 case 64:
1835 CopyOpc = AArch64::CPYi64;
1836 ExtractSubReg = AArch64::dsub;
1837 break;
1838 default:
1839 // Unknown size, bail out.
1840 LLVM_DEBUG(dbgs() << "NarrowTy had unsupported size.\n");
19501841 return false;
1842 }
19511843
19521844 // Set up for the lane copies.
19531845 MachineBasicBlock &MBB = *I.getParent();
440440 .minScalar(2, s64)
441441 .legalIf([=](const LegalityQuery &Query) {
442442 const LLT &VecTy = Query.Types[1];
443 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v4s32 ||
444 VecTy == v2s64 || VecTy == v2s32;
443 return VecTy == v4s32 || VecTy == v2s64;
445444 });
446445
447446 getActionDefinitionsBuilder(G_BUILD_VECTOR)
688688 }
689689 break;
690690 }
691 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
692 // Destination and source need to be FPRs.
693 OpRegBankIdx[0] = PMI_FirstFPR;
694 OpRegBankIdx[1] = PMI_FirstFPR;
695
696 // Index needs to be a GPR.
697 OpRegBankIdx[2] = PMI_FirstGPR;
698 break;
699691
700692 case TargetOpcode::G_BUILD_VECTOR:
701693 // If the first source operand belongs to a FPR register bank, then make
+0
-103
test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir less more
None # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=regbankselect %s -o - | FileCheck %s
2
3 name: v2s32_fpr
4 alignment: 2
5 legalized: true
6 tracksRegLiveness: true
7 registers:
8 - { id: 0, class: _ }
9 - { id: 1, class: _ }
10 - { id: 2, class: _ }
11 body: |
12 bb.1.entry:
13 liveins: $d0
14
15 %0:_(<2 x s32>) = COPY $d0
16 %2:_(s64) = G_CONSTANT i64 1
17 %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %2(s64)
18 $s0 = COPY %1(s32)
19 RET_ReallyLR implicit $s0
20
21 ...
22 ---
23 name: v4s32_gpr
24 alignment: 2
25 legalized: true
26 tracksRegLiveness: true
27 registers:
28 - { id: 0, class: _ }
29 - { id: 1, class: _ }
30 - { id: 2, class: _ }
31 body: |
32 bb.1.entry:
33 liveins: $q0
34
35 ; CHECK-LABEL: name: v4s32_gpr
36 ; CHECK: liveins: $q0
37 ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0
38 ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0
39 ; CHECK: [[EVEC:%[0-9]+]]:fpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
40 ; CHECK: $s0 = COPY [[EVEC]](s32)
41 ; CHECK: RET_ReallyLR implicit $s0
42 %0:_(<4 x s32>) = COPY $q0
43 %2:_(s64) = G_CONSTANT i64 0
44 %1:_(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %2(s64)
45 $s0 = COPY %1(s32)
46 RET_ReallyLR implicit $s0
47
48 ...
49 ---
50 name: v2s64_fpr
51 alignment: 2
52 legalized: true
53 tracksRegLiveness: true
54 registers:
55 - { id: 0, class: _ }
56 - { id: 1, class: _ }
57 - { id: 2, class: _ }
58 body: |
59 bb.1.entry:
60 liveins: $q0
61
62 ; CHECK-LABEL: name: v2s64_fpr
63 ; CHECK: liveins: $q0
64 ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0
65 ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 2
66 ; CHECK: [[EVEC:%[0-9]+]]:fpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
67 ; CHECK: $d0 = COPY [[EVEC]](s64)
68 ; CHECK: RET_ReallyLR implicit $d0
69 %0:_(<2 x s64>) = COPY $q0
70 %2:_(s64) = G_CONSTANT i64 2
71 %1:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %2(s64)
72 $d0 = COPY %1(s64)
73 RET_ReallyLR implicit $d0
74
75 ...
76 ---
77 name: v4s16_fpr
78 alignment: 2
79 legalized: true
80 tracksRegLiveness: true
81 registers:
82 - { id: 0, class: _ }
83 - { id: 1, class: _ }
84 - { id: 2, class: _ }
85 body: |
86 bb.1.entry:
87 liveins: $d0
88
89 ; CHECK-LABEL: name: v4s16_fpr
90 ; CHECK: liveins: $d0
91 ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0
92 ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1
93 ; CHECK: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64)
94 ; CHECK: $h0 = COPY [[EVEC]](s16)
95 ; CHECK: RET_ReallyLR implicit $h0
96 %0:_(<4 x s16>) = COPY $d0
97 %2:_(s64) = G_CONSTANT i64 1
98 %1:_(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %2(s64)
99 $h0 = COPY %1(s16)
100 RET_ReallyLR implicit $h0
101
102 ...
+0
-94
test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir less more
None # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs -O0 -run-pass=instruction-select %s -o - | FileCheck %s
2 ...
3 ---
4 name: v2s32_fpr
5 alignment: 2
6 legalized: true
7 regBankSelected: true
8 tracksRegLiveness: true
9 registers:
10 - { id: 0, class: fpr }
11 - { id: 1, class: fpr }
12 - { id: 2, class: gpr }
13 - { id: 3, class: fpr }
14 body: |
15 bb.0:
16 liveins: $d0
17
18 ; CHECK-LABEL: name: v2s32_fpr
19 ; CHECK: liveins: $d0
20 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
21 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
22 ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
23 ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1
24 ; CHECK: $s0 = COPY [[CPYi32_]]
25 ; CHECK: RET_ReallyLR implicit $s0
26 %0:fpr(<2 x s32>) = COPY $d0
27 %2:gpr(s64) = G_CONSTANT i64 1
28 %3:fpr(s64) = COPY %2(s64)
29 %1:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
30 $s0 = COPY %1(s32)
31 RET_ReallyLR implicit $s0
32
33 ...
34 ---
35 name: v2s64_fpr
36 alignment: 2
37 legalized: true
38 regBankSelected: true
39 tracksRegLiveness: true
40 registers:
41 - { id: 0, class: fpr }
42 - { id: 1, class: fpr }
43 - { id: 2, class: gpr }
44 - { id: 3, class: fpr }
45 body: |
46 bb.0:
47 liveins: $q0
48
49 ; CHECK-LABEL: name: v2s64_fpr
50 ; CHECK: liveins: $q0
51 ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
52 ; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 2
53 ; CHECK: $d0 = COPY [[CPYi64_]]
54 ; CHECK: RET_ReallyLR implicit $d0
55 %0:fpr(<2 x s64>) = COPY $q0
56 %2:gpr(s64) = G_CONSTANT i64 2
57 %3:fpr(s64) = COPY %2(s64)
58 %1:fpr(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %3(s64)
59 $d0 = COPY %1(s64)
60 RET_ReallyLR implicit $d0
61
62 ...
63 ---
64 name: v4s16_fpr
65 alignment: 2
66 legalized: true
67 regBankSelected: true
68 tracksRegLiveness: true
69 registers:
70 - { id: 0, class: fpr }
71 - { id: 1, class: fpr }
72 - { id: 2, class: gpr }
73 - { id: 3, class: fpr }
74 body: |
75 bb.0:
76 liveins: $d0
77
78 ; CHECK-LABEL: name: v4s16_fpr
79 ; CHECK: liveins: $d0
80 ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
81 ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
82 ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
83 ; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1
84 ; CHECK: $h0 = COPY [[CPYi16_]]
85 ; CHECK: RET_ReallyLR implicit $h0
86 %0:fpr(<4 x s16>) = COPY $d0
87 %2:gpr(s64) = G_CONSTANT i64 1
88 %3:fpr(s64) = COPY %2(s64)
89 %1:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %3(s64)
90 $h0 = COPY %1(s16)
91 RET_ReallyLR implicit $h0
92
93 ...