llvm.org GIT mirror llvm / 24517d0
Add the ability to use GEPs for address sinking in CGP The current memory-instruction optimization logic in CGP, which sinks parts of the address computation that can be adsorbed by the addressing mode, does this by explicitly converting the relevant part of the address computation into IR-level integer operations (making use of ptrtoint and inttoptr). For most targets this is currently not a problem, but for targets wishing to make use of IR-level aliasing analysis during CodeGen, the use of ptrtoint/inttoptr is a problem for two reasons: 1. BasicAA becomes less powerful in the face of the ptrtoint/inttoptr 2. In cases where type-punning was used, and BasicAA was used to override TBAA, BasicAA may no longer do so. (this had forced us to disable all use of TBAA in CodeGen; something which we can now enable again) This (use of GEPs instead of ptrtoint/inttoptr) is not currently enabled by default (except for those targets that use AA during CodeGen), and so aside from some PowerPC subtargets and SystemZ, there should be no change in behavior. We may be able to switch completely away from the ptrtoint/inttoptr sinking on all targets, but further testing is required. I've doubled-up on a number of existing tests that are sensitive to the address sinking behavior (including some store-merging tests that are sensitive to the order of the resulting ADD operations at the SDAG level). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206092 91177308-0d34-0410-b5e6-96231b3b80d8 Hal Finkel 5 years ago
12 changed file(s) with 164 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
3838 #include "llvm/Support/raw_ostream.h"
3939 #include "llvm/Target/TargetLibraryInfo.h"
4040 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetSubtargetInfo.h"
4142 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
4243 #include "llvm/Transforms/Utils/BuildLibCalls.h"
4344 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
6869 static cl::opt DisableSelectToBranch(
6970 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
7071 cl::desc("Disable select to branch conversion."));
72
73 static cl::opt AddrSinkUsingGEPs(
74 "addr-sink-using-gep", cl::Hidden, cl::init(false),
75 cl::desc("Address sinking in CGP using GEPs."));
7176
7277 static cl::opt EnableAndCmpSinking(
7378 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
24222427 << *MemoryInst);
24232428 if (SunkAddr->getType() != Addr->getType())
24242429 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
2430 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
2431 TM && TM->getSubtarget().useAA())) {
2432 // By default, we use the GEP-based method when AA is used later. This
2433 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
2434 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
2435 << *MemoryInst);
2436 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
2437 Value *ResultPtr = 0, *ResultIndex = 0;
2438
2439 // First, find the pointer.
2440 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
2441 ResultPtr = AddrMode.BaseReg;
2442 AddrMode.BaseReg = 0;
2443 }
2444
2445 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
2446 // We can't add more than one pointer together, nor can we scale a
2447 // pointer (both of which seem meaningless).
2448 if (ResultPtr || AddrMode.Scale != 1)
2449 return false;
2450
2451 ResultPtr = AddrMode.ScaledReg;
2452 AddrMode.Scale = 0;
2453 }
2454
2455 if (AddrMode.BaseGV) {
2456 if (ResultPtr)
2457 return false;
2458
2459 ResultPtr = AddrMode.BaseGV;
2460 }
2461
2462 // If the real base value actually came from an inttoptr, then the matcher
2463 // will look through it and provide only the integer value. In that case,
2464 // use it here.
2465 if (!ResultPtr && AddrMode.BaseReg) {
2466 ResultPtr =
2467 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
2468 AddrMode.BaseReg = 0;
2469 } else if (!ResultPtr && AddrMode.Scale == 1) {
2470 ResultPtr =
2471 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
2472 AddrMode.Scale = 0;
2473 }
2474
2475 if (!ResultPtr &&
2476 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
2477 SunkAddr = Constant::getNullValue(Addr->getType());
2478 } else if (!ResultPtr) {
2479 return false;
2480 } else {
2481 Type *I8PtrTy =
2482 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
2483
2484 // Start with the base register. Do this first so that subsequent address
2485 // matching finds it last, which will prevent it from trying to match it
2486 // as the scaled value in case it happens to be a mul. That would be
2487 // problematic if we've sunk a different mul for the scale, because then
2488 // we'd end up sinking both muls.
2489 if (AddrMode.BaseReg) {
2490 Value *V = AddrMode.BaseReg;
2491 if (V->getType() != IntPtrTy)
2492 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
2493
2494 ResultIndex = V;
2495 }
2496
2497 // Add the scale value.
2498 if (AddrMode.Scale) {
2499 Value *V = AddrMode.ScaledReg;
2500 if (V->getType() == IntPtrTy) {
2501 // done.
2502 } else if (cast(IntPtrTy)->getBitWidth() <
2503 cast(V->getType())->getBitWidth()) {
2504 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
2505 } else {
2506 // It is only safe to sign extend the BaseReg if we know that the math
2507 // required to create it did not overflow before we extend it. Since
2508 // the original IR value was tossed in favor of a constant back when
2509 // the AddrMode was created we need to bail out gracefully if widths
2510 // do not match instead of extending it.
2511 Instruction *I = dyn_cast_or_null(ResultIndex);
2512 if (I && (ResultIndex != AddrMode.BaseReg))
2513 I->eraseFromParent();
2514 return false;
2515 }
2516
2517 if (AddrMode.Scale != 1)
2518 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
2519 "sunkaddr");
2520 if (ResultIndex)
2521 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
2522 else
2523 ResultIndex = V;
2524 }
2525
2526 // Add in the Base Offset if present.
2527 if (AddrMode.BaseOffs) {
2528 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
2529 if (ResultIndex) {
2530 // We need to add this separately from the scale above to help with
2531 // SDAG consecutive load/store merging.
2532 if (ResultPtr->getType() != I8PtrTy)
2533 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
2534 ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
2535 }
2536
2537 ResultIndex = V;
2538 }
2539
2540 if (!ResultIndex) {
2541 SunkAddr = ResultPtr;
2542 } else {
2543 if (ResultPtr->getType() != I8PtrTy)
2544 ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
2545 SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
2546 }
2547
2548 if (SunkAddr->getType() != Addr->getType())
2549 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
2550 }
24252551 } else {
24262552 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
24272553 << *MemoryInst);
0 ; RUN: llc -mtriple=arm-eabi -mattr=+v4t %s -o - | FileCheck %s
1 ; RUN: llc -mtriple=arm-eabi -mattr=+v4t -addr-sink-using-gep=1 %s -o - | FileCheck %s
12
23 ;
34
0 ; RUN: llc -O3 < %s | FileCheck %s
1 ; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
12 ; Test case for a DAG combiner bug where we combined an indexed load
23 ; with an extension (sext, zext, or any) into a regular extended load,
34 ; i.e., dropping the indexed value.
0 ; RUN: llc -mcpu=g5 < %s | FileCheck %s
1 ; RUN: llc -mcpu=g5 -addr-sink-using-gep=1 < %s | FileCheck %s
12 ;; Formerly crashed, see PR 1508
23 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
34 target triple = "powerpc64-apple-darwin8"
None ; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
1 ; RUN: grep push | count 3
0 ; RUN: llc < %s -march=x86 -mtriple=i686-darwin | FileCheck %s
1 ; RUN: llc < %s -march=x86 -mtriple=i686-darwin -addr-sink-using-gep=1 | FileCheck %s
22
33 define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
44 entry:
5 ; CHECK-LABEL: @foo
6 ; CHECK: push
7 ; CHECK: push
8 ; CHECK: push
9 ; CHECK-NOT: push
10
511 icmp sgt i32 %size, 0 ; :0 [#uses=1]
612 br i1 %0, label %bb.preheader, label %return
713
0 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
1 ; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
12
23 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
34 target triple = "x86_64-apple-macosx10.8.0"
0 ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
1 ; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
12 ; This file tests the different cases what are involved when codegen prepare
23 ; tries to get sign extension out of the way of addressing mode.
34 ; This tests require an actual target as addressing mode decisions depends
280281 ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
281282 ; CHECK: load i32* [[ADDR2]]
282283 ; CHECK: ret
284 ; CHECK-GEP-LABEL: @checkProfitability
285 ; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
286 ; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg2 to i64
287 ; CHECK-GEP: [[SHL:%[a-zA-Z_0-9-]+]] = shl nsw i32 %arg1, 1
288 ; CHECK-GEP: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SHL]], %arg2
289 ; CHECK-GEP: [[SEXTADD:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
290 ; BB then
291 ; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
292 ; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
293 ; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
294 ; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
295 ; CHECK-GEP: load i32* [[ADDR1]]
296 ; BB else
297 ; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
298 ; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
299 ; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
300 ; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
301 ; CHECK-GEP: load i32* [[ADDR2]]
302 ; CHECK-GEP: ret
283303 define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
284304 %shl = shl nsw i32 %arg1, 1
285305 %add1 = add nsw i32 %shl, %arg2
0 ; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-pc-linux -addr-sink-using-gep=1 | FileCheck %s
12
23 ; Check that the CodeGenPrepare Pass
34 ; does not wrongly rewrite the address computed by Instruction %4
0 ; RUN: llc < %s -march=x86 | FileCheck %s
1 ; RUN: llc < %s -march=x86 -addr-sink-using-gep=1 | FileCheck %s
12
23 define i32 @test(i32* %X, i32 %B) {
34 ; CHECK-LABEL: test:
0 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -addr-sink-using-gep=1 | FileCheck %s
12
23 define void @merge_store(i32* nocapture %a) {
34 ; CHECK-LABEL: merge_store:
0 ; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=A9
1 ; RUN: llc -O3 -mtriple=thumb-eabi -mcpu=cortex-a9 -addr-sink-using-gep=1 %s -o - | FileCheck %s -check-prefix=A9
12
23 ; @simple is the most basic chain of address induction variables. Chaining
34 ; saves at least one register and avoids complex addressing and setup
0 ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
11 ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
2 ; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X64
3 ; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -addr-sink-using-gep=1 | FileCheck %s -check-prefix=X32
24
35 ; @simple is the most basic chain of address induction variables. Chaining
46 ; saves at least one register and avoids complex addressing and setup