llvm.org GIT mirror llvm / f5fe785
Revert r329956, "AArch64: Introduce a DAG combine for folding offsets into addresses." Caused a hang and eventually an assertion failure in LTO builds of 7zip-benchmark on aarch64 iOS targets. http://green.lab.llvm.org/green/job/lnt-ctmark-aarch64-O3-flto/2024/ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@330063 91177308-0d34-0410-b5e6-96231b3b80d8 Peter Collingbourne 1 year, 7 months ago
8 changed file(s) with 105 addition(s) and 262 deletion(s). Raw diff Collapse all Expand all
742742 if (!GAN)
743743 return true;
744744
745 if (GAN->getOffset() % Size == 0) {
746 const GlobalValue *GV = GAN->getGlobal();
747 unsigned Alignment = GV->getAlignment();
748 Type *Ty = GV->getValueType();
749 if (Alignment == 0 && Ty->isSized())
750 Alignment = DL.getABITypeAlignment(Ty);
751
752 if (Alignment >= Size)
753 return true;
754 }
745 const GlobalValue *GV = GAN->getGlobal();
746 unsigned Alignment = GV->getAlignment();
747 Type *Ty = GV->getValueType();
748 if (Alignment == 0 && Ty->isSized())
749 Alignment = DL.getABITypeAlignment(Ty);
750
751 if (Alignment >= Size)
752 return true;
755753 }
756754
757755 if (CurDAG->isBaseWithConstantOffset(N)) {
575575 setTargetDAGCombine(ISD::INTRINSIC_VOID);
576576 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
577577 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
578
579 setTargetDAGCombine(ISD::GlobalAddress);
580578
581579 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
582580 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
36783676 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
36793677 SelectionDAG &DAG,
36803678 unsigned Flag) const {
3681 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3682 N->getOffset(), Flag);
3679 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
36833680 }
36843681
36853682 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
37543751 unsigned char OpFlags =
37553752 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
37563753
3757 if (OpFlags != AArch64II::MO_NO_FLAG)
3758 assert(cast(Op)->getOffset() == 0 &&
3759 "unexpected offset in global node");
3754 assert(cast(Op)->getOffset() == 0 &&
3755 "unexpected offset in global node");
37603756
37613757 // This also catches the large code model case for Darwin.
37623758 if ((OpFlags & AArch64II::MO_GOT) != 0) {
49944990
49954991 bool AArch64TargetLowering::isOffsetFoldingLegal(
49964992 const GlobalAddressSDNode *GA) const {
4997 // Offsets are folded in the DAG combine rather than here so that we can
4998 // intelligently choose an offset based on the uses.
4993 DEBUG(dbgs() << "Skipping offset folding global address: ");
4994 DEBUG(GA->dump());
4995 DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "
4996 "addresses\n");
49994997 return false;
50004998 }
50014999
1061610614 return N->getOperand(0);
1061710615
1061810616 return SDValue();
10619 }
10620
10621 // If all users of the globaladdr are of the form (globaladdr + constant), find
10622 // the smallest constant, fold it into the globaladdr's offset and rewrite the
10623 // globaladdr as (globaladdr + constant) - constant.
10624 static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
10625 const AArch64Subtarget *Subtarget,
10626 const TargetMachine &TM) {
10627 auto *GN = dyn_cast(N);
10628 if (!GN || Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
10629 AArch64II::MO_NO_FLAG)
10630 return SDValue();
10631
10632 uint64_t MinOffset = -1ull;
10633 for (SDNode *N : GN->uses()) {
10634 if (N->getOpcode() != ISD::ADD)
10635 return SDValue();
10636 auto *C = dyn_cast(N->getOperand(0));
10637 if (!C)
10638 C = dyn_cast(N->getOperand(1));
10639 if (!C)
10640 return SDValue();
10641 MinOffset = std::min(MinOffset, C->getZExtValue());
10642 }
10643 uint64_t Offset = MinOffset + GN->getOffset();
10644
10645 // Check whether folding this offset is legal. It must not go out of bounds of
10646 // the referenced object to avoid violating the code model, and must be
10647 // smaller than 2^21 because this is the largest offset expressible in all
10648 // object formats.
10649 //
10650 // This check also prevents us from folding negative offsets, which will end
10651 // up being treated in the same way as large positive ones. They could also
10652 // cause code model violations, and aren't really common enough to matter.
10653 if (Offset >= (1 << 21))
10654 return SDValue();
10655
10656 const GlobalValue *GV = GN->getGlobal();
10657 Type *T = GV->getValueType();
10658 if (!T->isSized() ||
10659 Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
10660 return SDValue();
10661
10662 SDLoc DL(GN);
10663 SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
10664 return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
10665 DAG.getConstant(MinOffset, DL, MVT::i64));
1066610617 }
1066710618
1066810619 SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1075210703 default:
1075310704 break;
1075410705 }
10755 case ISD::GlobalAddress:
10756 return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
1075710706 }
1075810707 return SDValue();
1075910708 }
44
55 ; base + offset (imm9)
66 ; CHECK: @t1
7 ; CHECK: ldr xzr, [x0, #8]
7 ; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
88 ; CHECK: ret
9 define void @t1(i64* %object) {
10 %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 1
9 define void @t1() {
10 %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
1111 %tmp = load volatile i64, i64* %incdec.ptr, align 8
1212 ret void
1313 }
1414
1515 ; base + offset (> imm9)
1616 ; CHECK: @t2
17 ; CHECK: sub [[ADDREG:x[0-9]+]], x0, #264
17 ; CHECK: sub [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #264
1818 ; CHECK: ldr xzr, [
19 ; CHECK: [[ADDREG]]]
1920 ; CHECK: ret
20 define void @t2(i64* %object) {
21 %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 -33
21 define void @t2() {
22 %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
2223 %tmp = load volatile i64, i64* %incdec.ptr, align 8
2324 ret void
2425 }
2526
2627 ; base + unsigned offset (> imm9 and <= imm12 * size of type in bytes)
2728 ; CHECK: @t3
28 ; CHECK: ldr xzr, [x0, #32760]
29 ; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
2930 ; CHECK: ret
30 define void @t3(i64* %object) {
31 %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4095
31 define void @t3() {
32 %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
3233 %tmp = load volatile i64, i64* %incdec.ptr, align 8
3334 ret void
3435 }
3637 ; base + unsigned offset (> imm12 * size of type in bytes)
3738 ; CHECK: @t4
3839 ; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
39 ; CHECK: ldr xzr, [x0, x[[NUM]]]
40 ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
4041 ; CHECK: ret
41 define void @t4(i64* %object) {
42 %incdec.ptr = getelementptr inbounds i64, i64* %object, i64 4096
42 define void @t4() {
43 %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
4344 %tmp = load volatile i64, i64* %incdec.ptr, align 8
4445 ret void
4546 }
5657
5758 ; base + reg + imm
5859 ; CHECK: @t6
59 ; CHECK: add [[ADDREG:x[0-9]+]], x1, x0, lsl #3
60 ; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
6061 ; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
6162 ; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
6263 ; CHECK: ret
63 define void @t6(i64 %a, i64* %object) {
64 %tmp1 = getelementptr inbounds i64, i64* %object, i64 %a
64 define void @t6(i64 %a) {
65 %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
6566 %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
6667 %tmp = load volatile i64, i64* %incdec.ptr, align 8
6768 ret void
263263
264264 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
265265 ; registers for unscaled vector accesses
266
267 define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
266 @str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
267
268 define <1 x i64> @fct0() nounwind readonly ssp {
268269 entry:
269270 ; CHECK-LABEL: fct0:
270271 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
271 %p = getelementptr inbounds i8, i8* %str, i64 3
272 %q = bitcast i8* %p to <1 x i64>*
273 %0 = load <1 x i64>, <1 x i64>* %q, align 8
272 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
274273 ret <1 x i64> %0
275274 }
276275
277 define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
276 define <2 x i32> @fct1() nounwind readonly ssp {
278277 entry:
279278 ; CHECK-LABEL: fct1:
280279 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
281 %p = getelementptr inbounds i8, i8* %str, i64 3
282 %q = bitcast i8* %p to <2 x i32>*
283 %0 = load <2 x i32>, <2 x i32>* %q, align 8
280 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
284281 ret <2 x i32> %0
285282 }
286283
287 define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
284 define <4 x i16> @fct2() nounwind readonly ssp {
288285 entry:
289286 ; CHECK-LABEL: fct2:
290287 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
291 %p = getelementptr inbounds i8, i8* %str, i64 3
292 %q = bitcast i8* %p to <4 x i16>*
293 %0 = load <4 x i16>, <4 x i16>* %q, align 8
288 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
294289 ret <4 x i16> %0
295290 }
296291
297 define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
292 define <8 x i8> @fct3() nounwind readonly ssp {
298293 entry:
299294 ; CHECK-LABEL: fct3:
300295 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
301 %p = getelementptr inbounds i8, i8* %str, i64 3
302 %q = bitcast i8* %p to <8 x i8>*
303 %0 = load <8 x i8>, <8 x i8>* %q, align 8
296 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
304297 ret <8 x i8> %0
305298 }
306299
307 define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
300 define <2 x i64> @fct4() nounwind readonly ssp {
308301 entry:
309302 ; CHECK-LABEL: fct4:
310303 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
311 %p = getelementptr inbounds i8, i8* %str, i64 3
312 %q = bitcast i8* %p to <2 x i64>*
313 %0 = load <2 x i64>, <2 x i64>* %q, align 16
304 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
314305 ret <2 x i64> %0
315306 }
316307
317 define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
308 define <4 x i32> @fct5() nounwind readonly ssp {
318309 entry:
319310 ; CHECK-LABEL: fct5:
320311 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
321 %p = getelementptr inbounds i8, i8* %str, i64 3
322 %q = bitcast i8* %p to <4 x i32>*
323 %0 = load <4 x i32>, <4 x i32>* %q, align 16
312 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
324313 ret <4 x i32> %0
325314 }
326315
327 define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
316 define <8 x i16> @fct6() nounwind readonly ssp {
328317 entry:
329318 ; CHECK-LABEL: fct6:
330319 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
331 %p = getelementptr inbounds i8, i8* %str, i64 3
332 %q = bitcast i8* %p to <8 x i16>*
333 %0 = load <8 x i16>, <8 x i16>* %q, align 16
320 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
334321 ret <8 x i16> %0
335322 }
336323
337 define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
324 define <16 x i8> @fct7() nounwind readonly ssp {
338325 entry:
339326 ; CHECK-LABEL: fct7:
340327 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
341 %p = getelementptr inbounds i8, i8* %str, i64 3
342 %q = bitcast i8* %p to <16 x i8>*
343 %0 = load <16 x i8>, <16 x i8>* %q, align 16
328 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
344329 ret <16 x i8> %0
345330 }
346331
347 define void @fct8(i8* %str) nounwind ssp {
332 define void @fct8() nounwind ssp {
348333 entry:
349334 ; CHECK-LABEL: fct8:
350335 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
351336 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
352 %p = getelementptr inbounds i8, i8* %str, i64 3
353 %q = bitcast i8* %p to <1 x i64>*
354 %0 = load <1 x i64>, <1 x i64>* %q, align 8
355 %p2 = getelementptr inbounds i8, i8* %str, i64 4
356 %q2 = bitcast i8* %p2 to <1 x i64>*
357 store <1 x i64> %0, <1 x i64>* %q2, align 8
358 ret void
359 }
360
361 define void @fct9(i8* %str) nounwind ssp {
337 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
338 store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
339 ret void
340 }
341
342 define void @fct9() nounwind ssp {
362343 entry:
363344 ; CHECK-LABEL: fct9:
364345 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
365346 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
366 %p = getelementptr inbounds i8, i8* %str, i64 3
367 %q = bitcast i8* %p to <2 x i32>*
368 %0 = load <2 x i32>, <2 x i32>* %q, align 8
369 %p2 = getelementptr inbounds i8, i8* %str, i64 4
370 %q2 = bitcast i8* %p2 to <2 x i32>*
371 store <2 x i32> %0, <2 x i32>* %q2, align 8
372 ret void
373 }
374
375 define void @fct10(i8* %str) nounwind ssp {
347 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
348 store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
349 ret void
350 }
351
352 define void @fct10() nounwind ssp {
376353 entry:
377354 ; CHECK-LABEL: fct10:
378355 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
379356 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
380 %p = getelementptr inbounds i8, i8* %str, i64 3
381 %q = bitcast i8* %p to <4 x i16>*
382 %0 = load <4 x i16>, <4 x i16>* %q, align 8
383 %p2 = getelementptr inbounds i8, i8* %str, i64 4
384 %q2 = bitcast i8* %p2 to <4 x i16>*
385 store <4 x i16> %0, <4 x i16>* %q2, align 8
386 ret void
387 }
388
389 define void @fct11(i8* %str) nounwind ssp {
357 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
358 store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
359 ret void
360 }
361
362 define void @fct11() nounwind ssp {
390363 entry:
391364 ; CHECK-LABEL: fct11:
392365 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
393366 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
394 %p = getelementptr inbounds i8, i8* %str, i64 3
395 %q = bitcast i8* %p to <8 x i8>*
396 %0 = load <8 x i8>, <8 x i8>* %q, align 8
397 %p2 = getelementptr inbounds i8, i8* %str, i64 4
398 %q2 = bitcast i8* %p2 to <8 x i8>*
399 store <8 x i8> %0, <8 x i8>* %q2, align 8
400 ret void
401 }
402
403 define void @fct12(i8* %str) nounwind ssp {
367 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
368 store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
369 ret void
370 }
371
372 define void @fct12() nounwind ssp {
404373 entry:
405374 ; CHECK-LABEL: fct12:
406375 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
407376 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
408 %p = getelementptr inbounds i8, i8* %str, i64 3
409 %q = bitcast i8* %p to <2 x i64>*
410 %0 = load <2 x i64>, <2 x i64>* %q, align 16
411 %p2 = getelementptr inbounds i8, i8* %str, i64 4
412 %q2 = bitcast i8* %p2 to <2 x i64>*
413 store <2 x i64> %0, <2 x i64>* %q2, align 16
414 ret void
415 }
416
417 define void @fct13(i8* %str) nounwind ssp {
377 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
378 store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
379 ret void
380 }
381
382 define void @fct13() nounwind ssp {
418383 entry:
419384 ; CHECK-LABEL: fct13:
420385 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
421386 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
422 %p = getelementptr inbounds i8, i8* %str, i64 3
423 %q = bitcast i8* %p to <4 x i32>*
424 %0 = load <4 x i32>, <4 x i32>* %q, align 16
425 %p2 = getelementptr inbounds i8, i8* %str, i64 4
426 %q2 = bitcast i8* %p2 to <4 x i32>*
427 store <4 x i32> %0, <4 x i32>* %q2, align 16
428 ret void
429 }
430
431 define void @fct14(i8* %str) nounwind ssp {
387 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
388 store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
389 ret void
390 }
391
392 define void @fct14() nounwind ssp {
432393 entry:
433394 ; CHECK-LABEL: fct14:
434395 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
435396 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
436 %p = getelementptr inbounds i8, i8* %str, i64 3
437 %q = bitcast i8* %p to <8 x i16>*
438 %0 = load <8 x i16>, <8 x i16>* %q, align 16
439 %p2 = getelementptr inbounds i8, i8* %str, i64 4
440 %q2 = bitcast i8* %p2 to <8 x i16>*
441 store <8 x i16> %0, <8 x i16>* %q2, align 16
442 ret void
443 }
444
445 define void @fct15(i8* %str) nounwind ssp {
397 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
398 store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
399 ret void
400 }
401
402 define void @fct15() nounwind ssp {
446403 entry:
447404 ; CHECK-LABEL: fct15:
448405 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
449406 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
450 %p = getelementptr inbounds i8, i8* %str, i64 3
451 %q = bitcast i8* %p to <16 x i8>*
452 %0 = load <16 x i8>, <16 x i8>* %q, align 16
453 %p2 = getelementptr inbounds i8, i8* %str, i64 4
454 %q2 = bitcast i8* %p2 to <16 x i8>*
455 store <16 x i8> %0, <16 x i8>* %q2, align 16
407 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
408 store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
456409 ret void
457410 }
458411
+0
-59
test/CodeGen/AArch64/fold-global-offsets.ll less more
None ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s
1
2 @x1 = external hidden global [2 x i64]
3 @x2 = external hidden global [16777216 x i64]
4
5 define i64 @f1() {
6 ; CHECK: f1:
7 ; CHECK: adrp x8, x1+16
8 ; CHECK: ldr x0, [x8, :lo12:x1+16]
9 %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 2)
10 ret i64 %l
11 }
12
13 define i64 @f2() {
14 ; CHECK: f2:
15 ; CHECK: adrp x8, x1
16 ; CHECK: add x8, x8, :lo12:x1
17 ; CHECK: ldr x0, [x8, #24]
18 %l = load i64, i64* getelementptr ([2 x i64], [2 x i64]* @x1, i64 0, i64 3)
19 ret i64 %l
20 }
21
22 define i64 @f3() {
23 ; CHECK: f3:
24 ; CHECK: adrp x8, x1+1
25 ; CHECK: add x8, x8, :lo12:x1+1
26 ; CHECK: ldr x0, [x8]
27 %l = load i64, i64* bitcast (i8* getelementptr (i8, i8* bitcast ([2 x i64]* @x1 to i8*), i64 1) to i64*)
28 ret i64 %l
29 }
30
31 define [2 x i64] @f4() {
32 ; CHECK: f4:
33 ; CHECK: adrp x8, x2+8
34 ; CHECK: add x8, x8, :lo12:x2+8
35 ; CHECK: ldp x0, x1, [x8]
36 %l = load [2 x i64], [2 x i64]* bitcast (i8* getelementptr (i8, i8* bitcast ([16777216 x i64]* @x2 to i8*), i64 8) to [2 x i64]*)
37 ret [2 x i64] %l
38 }
39
40 define i64 @f5() {
41 ; CHECK: f5:
42 ; CHECK: adrp x8, x2+2097144
43 ; CHECK: ldr x0, [x8, :lo12:x2+2097144]
44 ; CHECK: ret
45 %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262143)
46 ret i64 %l
47 }
48
49 define i64 @f6() {
50 ; CHECK: f6:
51 ; CHECK: adrp x8, x2
52 ; CHECK: add x8, x8, :lo12:x2
53 ; CHECK: orr w9, wzr, #0x200000
54 ; CHECK: ldr x0, [x8, x9]
55 ; CHECK: ret
56 %l = load i64, i64* getelementptr ([16777216 x i64], [16777216 x i64]* @x2, i64 0, i64 262144)
57 ret i64 %l
58 }
99 ;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
1010 ;CHECK-APPLE-IOS-NOT: adrp
1111 ;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
12 ;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE+12
13 ;CHECK-APPLE-IOS: str w1, [x9, __MergedGlobals_y@PAGEOFF+12]
12 ;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
13 ;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
1414 %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
1515 %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
1616 store i32 %a1, i32* %x3, align 4
4343
4444 ; CHECK-LABEL: f3:
4545 define void @f3(i32 %a1, i32 %a2) minsize nounwind {
46 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
47 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+8
48 ; CHECK-NEXT: stp w0, w1, [x8]
46 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE
47 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
48 ; CHECK-NEXT: stp w0, w1, [x8, #8]
4949 ; CHECK-NEXT: ret
5050 store i32 %a1, i32* @m3, align 4
5151 store i32 %a2, i32* @n3, align 4
5656
5757 ; CHECK-LABEL: f4:
5858 define void @f4(i32 %a1, i32 %a2) nounwind {
59 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE+8
59 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE
60 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
6061 ; CHECK-NEXT: adrp x9, _n4@PAGE
61 ; CHECK-NEXT: str w0, [x8, [[SET]]@PAGEOFF+8]
62 ; CHECK-NEXT: str w0, [x8, #8]
6263 ; CHECK-NEXT: str w1, [x9, _n4@PAGEOFF]
6364 ; CHECK-NEXT: ret
6465 store i32 %a1, i32* @m3, align 4
3737
3838 ; CHECK-LABEL: f3:
3939 define void @f3(i32 %a1, i32 %a2) #0 {
40 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE+12
41 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF+12
42 ; CHECK-NEXT: stp w0, w1, [x8]
40 ; CHECK-NEXT: adrp x8, [[SET]]@PAGE
41 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
42 ; CHECK-NEXT: stp w0, w1, [x8, #12]
4343 ; CHECK-NEXT: ret
4444 store i32 %a1, i32* @m2, align 4
4545 store i32 %a2, i32* @n2, align 4