llvm.org GIT mirror llvm / 820b69b
[AArch64][GlobalISel] Import XRO load/store patterns instead of custom selection Instead of using custom C++ in `earlySelect` for loads and stores, just import the patterns. Remove `earlySelectLoad`, since we can just import the work it's doing. Some minor changes to how `ComplexRendererFns` are returned for the XRO addressing modes. If you add immediates in two steps, sometimes they are not imported properly and you only end up with one immediate. I'm not sure if this is intentional. - Update load-addressing-modes.mir to include the instructions we can now import. - Add a similar test, store-addressing-modes.mir to show which store opcodes we currently import, and show that we can pull in shifts etc. - Update arm64-fastisel-gep-promote-before-add.ll to use FastISel instead of GISel. This test failed with GISel because GISel folds the gep into the load. The test checks that FastISel doesn't fold non-pointer-width adds into loads. GISel on the other hand, produces a G_CONSTANT of -128 for the add, and then a G_GEP, which must be pointer-width. Note that we don't get STRBRoX right now. It seems like the importer can't handle `FPR8Op:{ *:[Untyped] }:$Rt` source operands. So, those are not currently supported. Differential Revision: https://reviews.llvm.org/D66679 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369806 91177308-0d34-0410-b5e6-96231b3b80d8 Jessica Paquette 29 days ago
5 changed file(s) with 336 addition(s) and 68 deletion(s). Raw diff Collapse all Expand all
30023002 def ro_Xindexed32 : ComplexPattern", []>;
30033003 def ro_Xindexed64 : ComplexPattern", []>;
30043004 def ro_Xindexed128 : ComplexPattern", []>;
3005
3006 def gi_ro_Xindexed8 :
3007 GIComplexOperandMatcher">,
3008 GIComplexPatternEquiv;
3009 def gi_ro_Xindexed16 :
3010 GIComplexOperandMatcher">,
3011 GIComplexPatternEquiv;
3012 def gi_ro_Xindexed32 :
3013 GIComplexOperandMatcher">,
3014 GIComplexPatternEquiv;
3015 def gi_ro_Xindexed64 :
3016 GIComplexOperandMatcher">,
3017 GIComplexPatternEquiv;
3018 def gi_ro_Xindexed128 :
3019 GIComplexOperandMatcher">,
3020 GIComplexPatternEquiv;
30053021
30063022 def ro_Windexed8 : ComplexPattern", []>;
30073023 def ro_Windexed16 : ComplexPattern", []>;
7575 bool earlySelect(MachineInstr &I) const;
7676
7777 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
78 bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
7978
8079 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
8180 void contractCrossBankCopyIntoStore(MachineInstr &I,
207206 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
208207 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
209208 unsigned SizeInBytes) const;
209 template
210 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
211 return selectAddrModeXRO(Root, Width / 8);
212 }
210213
211214 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
212215
12451248 I.getOperand(0).setReg(DefDstReg);
12461249 }
12471250
1248 bool AArch64InstructionSelector::earlySelectLoad(
1249 MachineInstr &I, MachineRegisterInfo &MRI) const {
1250 // Try to fold in shifts, etc into the addressing mode of a load.
1251 assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
1252
1253 // Don't handle atomic loads/stores yet.
1254 auto &MemOp = **I.memoperands_begin();
1255 if (MemOp.isAtomic()) {
1256 LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
1257 return false;
1258 }
1259
1260 unsigned MemBytes = MemOp.getSize();
1261
1262 // Only support 64-bit loads for now.
1263 if (MemBytes != 8)
1264 return false;
1265
1266 Register DstReg = I.getOperand(0).getReg();
1267 const LLT DstTy = MRI.getType(DstReg);
1268 // Don't handle vectors.
1269 if (DstTy.isVector())
1270 return false;
1271
1272 unsigned DstSize = DstTy.getSizeInBits();
1273 // TODO: 32-bit destinations.
1274 if (DstSize != 64)
1275 return false;
1276
1277 // Check if we can do any folding from GEPs/shifts etc. into the load.
1278 auto ImmFn = selectAddrModeXRO(I.getOperand(1), MemBytes);
1279 if (!ImmFn)
1280 return false;
1281
1282 // We can fold something. Emit the load here.
1283 MachineIRBuilder MIB(I);
1284
1285 // Choose the instruction based off the size of the element being loaded, and
1286 // whether or not we're loading into a FPR.
1287 const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
1288 unsigned Opc =
1289 RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
1290 // Construct the load.
1291 auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
1292 for (auto &RenderFn : *ImmFn)
1293 RenderFn(LoadMI);
1294 LoadMI.addMemOperand(*I.memoperands_begin());
1295 I.eraseFromParent();
1296 return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
1297 }
1298
12991251 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
13001252 assert(I.getParent() && "Instruction should be in a basic block!");
13011253 assert(I.getParent()->getParent() && "Instruction should be in a function!");
13071259 switch (I.getOpcode()) {
13081260 case TargetOpcode::G_SHL:
13091261 return earlySelectSHL(I, MRI);
1310 case TargetOpcode::G_LOAD:
1311 return earlySelectLoad(I, MRI);
13121262 case TargetOpcode::G_CONSTANT: {
13131263 bool IsZero = false;
13141264 if (I.getOperand(1).isCImm())
43414291
43424292 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
43434293 // offset. Signify that we are shifting by setting the shift flag to 1.
4344 return {{
4345 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4346 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4347 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4348 [=](MachineInstrBuilder &MIB) { MIB.addImm(1); },
4349 }};
4294 return {{[=](MachineInstrBuilder &MIB) {
4295 MIB.addUse(Gep->getOperand(1).getReg());
4296 },
4297 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4298 [=](MachineInstrBuilder &MIB) {
4299 // Need to add both immediates here to make sure that they are both
4300 // added to the instruction.
4301 MIB.addImm(0);
4302 MIB.addImm(1);
4303 }}};
43504304 }
43514305
43524306 /// This is used for computing addresses like this:
43744328 return None;
43754329
43764330 // Base is the GEP's LHS, offset is its RHS.
4377 return {{
4378 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
4379 [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
4380 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4381 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
4382 }};
4331 return {{[=](MachineInstrBuilder &MIB) {
4332 MIB.addUse(Gep->getOperand(1).getReg());
4333 },
4334 [=](MachineInstrBuilder &MIB) {
4335 MIB.addUse(Gep->getOperand(2).getReg());
4336 },
4337 [=](MachineInstrBuilder &MIB) {
4338 // Need to add both immediates here to make sure that they are both
4339 // added to the instruction.
4340 MIB.addImm(0);
4341 MIB.addImm(0);
4342 }}};
43834343 }
43844344
43854345 /// This is intended to be equivalent to selectAddrModeXRO in
1717 define void @more_than_one_use_shl_lsl_fast(i64* %addr) #1 { ret void }
1818 define void @more_than_one_use_shl_lsl_slow(i64* %addr) { ret void }
1919 define void @more_than_one_use_shl_minsize(i64* %addr) #0 { ret void }
20 define void @ldrwrox(i64* %addr) { ret void }
21 define void @ldrsrox(i64* %addr) { ret void }
22 define void @ldrhrox(i64* %addr) { ret void }
23 define void @ldbbrox(i64* %addr) { ret void }
24 define void @ldrqrox(i64* %addr) { ret void }
2025 attributes #0 = { optsize minsize }
2126 attributes #1 = { "target-features"="+lsl-fast" }
2227 ...
7176 $d0 = COPY %4(s64)
7277 RET_ReallyLR implicit $d0
7378 ...
74
7579 ---
7680 name: more_than_one_use
7781 alignment: 2
505509 %9:gpr(s64) = G_ADD %8, %7
506510 $x2 = COPY %9(s64)
507511 RET_ReallyLR implicit $x2
512 ...
513 ---
514 name: ldrwrox
515 alignment: 2
516 legalized: true
517 regBankSelected: true
518 tracksRegLiveness: true
519 machineFunctionInfo: {}
520 body: |
521 bb.0:
522 liveins: $x0, $x1
523 ; CHECK-LABEL: name: ldrwrox
524 ; CHECK: liveins: $x0, $x1
525 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
526 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
527 ; CHECK: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr)
528 ; CHECK: $w2 = COPY [[LDRWroX]]
529 ; CHECK: RET_ReallyLR implicit $w2
530 %0:gpr(p0) = COPY $x0
531 %1:gpr(s64) = COPY $x1
532 %2:gpr(p0) = G_GEP %0, %1
533 %4:gpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr)
534 $w2 = COPY %4(s32)
535 RET_ReallyLR implicit $w2
536 ...
537 ---
538 name: ldrsrox
539 alignment: 2
540 legalized: true
541 regBankSelected: true
542 tracksRegLiveness: true
543 machineFunctionInfo: {}
544 body: |
545 bb.0:
546 liveins: $d0, $x1
547 ; CHECK-LABEL: name: ldrsrox
548 ; CHECK: liveins: $d0, $x1
549 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0
550 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
551 ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[COPY1]], 0, 0 :: (load 4 from %ir.addr)
552 ; CHECK: $s2 = COPY [[LDRSroX]]
553 ; CHECK: RET_ReallyLR implicit $h2
554 %0:gpr(p0) = COPY $d0
555 %1:gpr(s64) = COPY $x1
556 %2:gpr(p0) = G_GEP %0, %1
557 %4:fpr(s32) = G_LOAD %2(p0) :: (load 4 from %ir.addr)
558 $s2 = COPY %4(s32)
559 RET_ReallyLR implicit $h2
560 ...
561 ---
562 name: ldrhrox
563 alignment: 2
564 legalized: true
565 regBankSelected: true
566 tracksRegLiveness: true
567 machineFunctionInfo: {}
568 body: |
569 bb.0:
570 liveins: $x0, $x1
571 ; CHECK-LABEL: name: ldrhrox
572 ; CHECK: liveins: $x0, $x1
573 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
574 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
575 ; CHECK: [[LDRHroX:%[0-9]+]]:fpr16 = LDRHroX [[COPY]], [[COPY1]], 0, 0 :: (load 2 from %ir.addr)
576 ; CHECK: $h2 = COPY [[LDRHroX]]
577 ; CHECK: RET_ReallyLR implicit $h2
578 %0:gpr(p0) = COPY $x0
579 %1:gpr(s64) = COPY $x1
580 %2:gpr(p0) = G_GEP %0, %1
581 %4:fpr(s16) = G_LOAD %2(p0) :: (load 2 from %ir.addr)
582 $h2 = COPY %4(s16)
583 RET_ReallyLR implicit $h2
584 ...
585 ---
586 name: ldbbrox
587 alignment: 2
588 legalized: true
589 regBankSelected: true
590 tracksRegLiveness: true
591 machineFunctionInfo: {}
592 body: |
593 bb.0:
594 liveins: $x0, $x1
595 ; CHECK-LABEL: name: ldbbrox
596 ; CHECK: liveins: $x0, $x1
597 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
598 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
599 ; CHECK: [[LDRBBroX:%[0-9]+]]:gpr32 = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load 1 from %ir.addr)
600 ; CHECK: $w2 = COPY [[LDRBBroX]]
601 ; CHECK: RET_ReallyLR implicit $w2
602 %0:gpr(p0) = COPY $x0
603 %1:gpr(s64) = COPY $x1
604 %2:gpr(p0) = G_GEP %0, %1
605 %4:gpr(s32) = G_LOAD %2(p0) :: (load 1 from %ir.addr)
606 $w2 = COPY %4(s32)
607 RET_ReallyLR implicit $w2
608 ...
609 ---
610 name: ldrqrox
611 alignment: 2
612 legalized: true
613 regBankSelected: true
614 tracksRegLiveness: true
615 machineFunctionInfo: {}
616 body: |
617 bb.0:
618 liveins: $d0, $x1
619 ; CHECK-LABEL: name: ldrqrox
620 ; CHECK: liveins: $d0, $x1
621 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0
622 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
623 ; CHECK: [[LDRQroX:%[0-9]+]]:fpr128 = LDRQroX [[COPY]], [[COPY1]], 0, 0 :: (load 16 from %ir.addr)
624 ; CHECK: $q0 = COPY [[LDRQroX]]
625 ; CHECK: RET_ReallyLR implicit $q0
626 %0:gpr(p0) = COPY $d0
627 %1:gpr(s64) = COPY $x1
628 %2:gpr(p0) = G_GEP %0, %1
629 %4:fpr(<2 x s64>) = G_LOAD %2(p0) :: (load 16 from %ir.addr)
630 $q0 = COPY %4(<2 x s64>)
631 RET_ReallyLR implicit $q0
0 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
1 # RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
2
3 --- |
4 define void @strxrox(i64* %addr) { ret void }
5 define void @strdrox(i64* %addr) { ret void }
6 define void @strwrox(i64* %addr) { ret void }
7 define void @strsrox(i64* %addr) { ret void }
8 define void @strhrox(i64* %addr) { ret void }
9 define void @strqrox(i64* %addr) { ret void }
10 define void @shl(i64* %addr) { ret void }
11 ...
12
13 ---
14 name: strxrox
15 alignment: 2
16 legalized: true
17 regBankSelected: true
18 tracksRegLiveness: true
19 machineFunctionInfo: {}
20 body: |
21 bb.0:
22 liveins: $x0, $x1, $x2
23 ; CHECK-LABEL: name: strxrox
24 ; CHECK: liveins: $x0, $x1, $x2
25 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
26 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
27 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
28 ; CHECK: STRXroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr)
29 %0:gpr(p0) = COPY $x0
30 %1:gpr(s64) = COPY $x1
31 %ptr:gpr(p0) = G_GEP %0, %1
32 %3:gpr(s64) = COPY $x2
33 G_STORE %3, %ptr :: (store 8 into %ir.addr)
34 ...
35 ---
36 name: strdrox
37 alignment: 2
38 legalized: true
39 regBankSelected: true
40 tracksRegLiveness: true
41 machineFunctionInfo: {}
42 body: |
43 bb.0:
44 liveins: $x0, $x1, $d2
45 ; CHECK-LABEL: name: strdrox
46 ; CHECK: liveins: $x0, $x1, $d2
47 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
48 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
49 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d2
50 ; CHECK: STRDroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 8 into %ir.addr)
51 %0:gpr(p0) = COPY $x0
52 %1:gpr(s64) = COPY $x1
53 %ptr:gpr(p0) = G_GEP %0, %1
54 %3:fpr(s64) = COPY $d2
55 G_STORE %3, %ptr :: (store 8 into %ir.addr)
56 ...
57 ---
58 name: strwrox
59 alignment: 2
60 legalized: true
61 regBankSelected: true
62 tracksRegLiveness: true
63 machineFunctionInfo: {}
64 body: |
65 bb.0:
66 liveins: $x0, $x1, $w2
67 ; CHECK-LABEL: name: strwrox
68 ; CHECK: liveins: $x0, $x1, $w2
69 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
70 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
71 ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
72 ; CHECK: STRWroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr)
73 %0:gpr(p0) = COPY $x0
74 %1:gpr(s64) = COPY $x1
75 %ptr:gpr(p0) = G_GEP %0, %1
76 %3:gpr(s32) = COPY $w2
77 G_STORE %3, %ptr :: (store 4 into %ir.addr)
78 ...
79 ---
80 name: strsrox
81 alignment: 2
82 legalized: true
83 regBankSelected: true
84 tracksRegLiveness: true
85 machineFunctionInfo: {}
86 body: |
87 bb.0:
88 liveins: $x0, $x1, $s2
89 ; CHECK-LABEL: name: strsrox
90 ; CHECK: liveins: $x0, $x1, $s2
91 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
92 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
93 ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s2
94 ; CHECK: STRSroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 4 into %ir.addr)
95 %0:gpr(p0) = COPY $x0
96 %1:gpr(s64) = COPY $x1
97 %ptr:gpr(p0) = G_GEP %0, %1
98 %3:fpr(s32) = COPY $s2
99 G_STORE %3, %ptr :: (store 4 into %ir.addr)
100 ...
101 ---
102 name: strhrox
103 alignment: 2
104 legalized: true
105 regBankSelected: true
106 tracksRegLiveness: true
107 machineFunctionInfo: {}
108 body: |
109 bb.0:
110 liveins: $x0, $x1, $h0
111 ; CHECK-LABEL: name: strhrox
112 ; CHECK: liveins: $x0, $x1, $h0
113 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
114 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
115 ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY $h0
116 ; CHECK: STRHroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 2 into %ir.addr)
117 %0:gpr(p0) = COPY $x0
118 %1:gpr(s64) = COPY $x1
119 %ptr:gpr(p0) = G_GEP %0, %1
120 %3:fpr(s16) = COPY $h0
121 G_STORE %3, %ptr :: (store 2 into %ir.addr)
122 ...
123 ---
124 name: strqrox
125 alignment: 2
126 legalized: true
127 regBankSelected: true
128 tracksRegLiveness: true
129 machineFunctionInfo: {}
130 body: |
131 bb.0:
132 liveins: $x0, $x1, $q2
133 ; CHECK-LABEL: name: strqrox
134 ; CHECK: liveins: $x0, $x1, $q2
135 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
136 ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
137 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q2
138 ; CHECK: STRQroX [[COPY2]], [[COPY]], [[COPY1]], 0, 0 :: (store 16 into %ir.addr)
139 %0:gpr(p0) = COPY $x0
140 %1:gpr(s64) = COPY $x1
141 %ptr:gpr(p0) = G_GEP %0, %1
142 %2:fpr(<2 x s64>) = COPY $q2
143 G_STORE %2, %ptr :: (store 16 into %ir.addr)
144 ...
145 ---
146 name: shl
147 alignment: 2
148 legalized: true
149 regBankSelected: true
150 tracksRegLiveness: true
151 machineFunctionInfo: {}
152 body: |
153 bb.0:
154 liveins: $x0, $x1, $x2
155 ; CHECK-LABEL: name: shl
156 ; CHECK: liveins: $x0, $x1, $x2
157 ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
158 ; CHECK: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1
159 ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
160 ; CHECK: STRXroX [[COPY2]], [[COPY1]], [[COPY]], 0, 1 :: (store 8 into %ir.addr)
161 %0:gpr(s64) = COPY $x0
162 %1:gpr(s64) = G_CONSTANT i64 3
163 %2:gpr(s64) = G_SHL %0, %1(s64)
164 %3:gpr(p0) = COPY $x1
165 %ptr:gpr(p0) = G_GEP %3, %2
166 %4:gpr(s64) = COPY $x2
167 G_STORE %4, %ptr :: (store 8 into %ir.addr)
0 ; fastisel should not fold add with non-pointer bitwidth
11 ; sext(a) + sext(b) != sext(a + b)
2 ; RUN: llc -mtriple=arm64-apple-darwin %s -O0 -o - | FileCheck %s
2 ; RUN: llc -fast-isel -mtriple=arm64-apple-darwin %s -O0 -o - | FileCheck %s
33
44 define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
55 entry: