llvm.org GIT mirror llvm / fa9e4b5
CodeGenPrep: sink extends of illegal types into use block. This helps the instruction selector to lower an i64 * i64 -> i128 multiplication into a single instruction on targets which support it. Patch by Manuel Jacob. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@203230 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
4 changed file(s) with 149 addition(s) and 95 deletion(s). Raw diff Collapse all Expand all
128128 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
129129 bool OptimizeInlineAsmInst(CallInst *CS);
130130 bool OptimizeCallInst(CallInst *CI);
131 bool SinkExtExpand(CastInst *I);
131132 bool MoveExtToFormExtLoad(Instruction *I);
132133 bool OptimizeExtUses(Instruction *I);
133134 bool OptimizeSelectInst(SelectInst *SI);
464465 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
465466 }
466467
468 /// SinkCast - Sink the specified cast instruction into its user blocks
469 static bool SinkCast(CastInst *CI) {
470 BasicBlock *DefBB = CI->getParent();
471
472 /// InsertedCasts - Only insert a cast in each block once.
473 DenseMap InsertedCasts;
474
475 bool MadeChange = false;
476 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
477 UI != E; ) {
478 Use &TheUse = UI.getUse();
479 Instruction *User = cast(*UI);
480
481 // Figure out which BB this cast is used in. For PHI's this is the
482 // appropriate predecessor block.
483 BasicBlock *UserBB = User->getParent();
484 if (PHINode *PN = dyn_cast(User)) {
485 UserBB = PN->getIncomingBlock(UI);
486 }
487
488 // Preincrement use iterator so we don't invalidate it.
489 ++UI;
490
491 // If this user is in the same block as the cast, don't change the cast.
492 if (UserBB == DefBB) continue;
493
494 // If we have already inserted a cast into this block, use it.
495 CastInst *&InsertedCast = InsertedCasts[UserBB];
496
497 if (!InsertedCast) {
498 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
499 InsertedCast =
500 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
501 InsertPt);
502 MadeChange = true;
503 }
504
505 // Replace a use of the cast with a use of the new cast.
506 TheUse = InsertedCast;
507 ++NumCastUses;
508 }
509
510 // If we removed all uses, nuke the cast.
511 if (CI->use_empty()) {
512 CI->eraseFromParent();
513 MadeChange = true;
514 }
515
516 return MadeChange;
517 }
518
467519 /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
468520 /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
469521 /// sink it into user blocks to reduce the number of virtual
498550 if (SrcVT != DstVT)
499551 return false;
500552
501 BasicBlock *DefBB = CI->getParent();
502
503 /// InsertedCasts - Only insert a cast in each block once.
504 DenseMap InsertedCasts;
505
506 bool MadeChange = false;
507 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
508 UI != E; ) {
509 Use &TheUse = UI.getUse();
510 Instruction *User = cast(*UI);
511
512 // Figure out which BB this cast is used in. For PHI's this is the
513 // appropriate predecessor block.
514 BasicBlock *UserBB = User->getParent();
515 if (PHINode *PN = dyn_cast(User)) {
516 UserBB = PN->getIncomingBlock(UI);
517 }
518
519 // Preincrement use iterator so we don't invalidate it.
520 ++UI;
521
522 // If this user is in the same block as the cast, don't change the cast.
523 if (UserBB == DefBB) continue;
524
525 // If we have already inserted a cast into this block, use it.
526 CastInst *&InsertedCast = InsertedCasts[UserBB];
527
528 if (!InsertedCast) {
529 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
530 InsertedCast =
531 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
532 InsertPt);
533 MadeChange = true;
534 }
535
536 // Replace a use of the cast with a use of the new cast.
537 TheUse = InsertedCast;
538 ++NumCastUses;
539 }
540
541 // If we removed all uses, nuke the cast.
542 if (CI->use_empty()) {
543 CI->eraseFromParent();
544 MadeChange = true;
545 }
546
547 return MadeChange;
553 return SinkCast(CI);
548554 }
549555
550556 /// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
25212527 return MadeChange;
25222528 }
25232529
2530 /// SinkExtExpand - Sink a zext or sext into its user blocks if the target type
2531 /// doesn't fit in one register
2532 bool CodeGenPrepare::SinkExtExpand(CastInst *CI) {
2533 if (TLI &&
2534 TLI->getTypeAction(CI->getContext(), TLI->getValueType(CI->getType())) ==
2535 TargetLowering::TypeExpandInteger)
2536 return SinkCast(CI);
2537 return false;
2538 }
2539
25242540 /// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
25252541 /// basic block as the load, unless conditions are unfavorable. This allows
25262542 /// SelectionDAG to fold the extend into the load.
25322548
25332549 // If they're already in the same block, there's nothing to do.
25342550 if (LI->getParent() == I->getParent())
2551 return false;
2552
2553 // Do not undo the optimization in SinkExtExpand
2554 if (TLI &&
2555 TLI->getTypeAction(I->getContext(), TLI->getValueType(I->getType())) ==
2556 TargetLowering::TypeExpandInteger)
25352557 return false;
25362558
25372559 // If the load has other users and the truncate is not free, this probably
28202842 return true;
28212843
28222844 if (isa(I) || isa(I)) {
2845 if (SinkExtExpand(CI))
2846 return true;
28232847 bool MadeChange = MoveExtToFormExtLoad(I);
28242848 return MadeChange | OptimizeExtUses(I);
28252849 }
14431443
14441444 //===---------------------------------------------------------------------===//
14451445
1446 This code:
1447
1448 void vec_mpys1(int y[], const int x[], int scaler) {
1449 int i;
1450 for (i = 0; i < 150; i++)
1451 y[i] += (((long long)scaler * (long long)x[i]) >> 31);
1452 }
1453
1454 Compiles to this loop with GCC 3.x:
1455
1456 .L5:
1457 movl %ebx, %eax
1458 imull (%edi,%ecx,4)
1459 shrdl $31, %edx, %eax
1460 addl %eax, (%esi,%ecx,4)
1461 incl %ecx
1462 cmpl $149, %ecx
1463 jle .L5
1464
1465 llvm-gcc compiles it to the much uglier:
1466
1467 LBB1_1: ## bb1
1468 movl 24(%esp), %eax
1469 movl (%eax,%edi,4), %ebx
1470 movl %ebx, %ebp
1471 imull %esi, %ebp
1472 movl %ebx, %eax
1473 mull %ecx
1474 addl %ebp, %edx
1475 sarl $31, %ebx
1476 imull %ecx, %ebx
1477 addl %edx, %ebx
1478 shldl $1, %eax, %ebx
1479 movl 20(%esp), %eax
1480 addl %ebx, (%eax,%edi,4)
1481 incl %edi
1482 cmpl $150, %edi
1483 jne LBB1_1 ## bb1
1484
1485 The issue is that we hoist the cast of "scaler" to long long outside of the
1486 loop, the value comes into the loop as two values, and
1487 RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
1488 constructed BUILD_PAIR which represents the cast value.
1489
1490 This can be handled by making CodeGenPrepare sink the cast.
1491
1492 //===---------------------------------------------------------------------===//
1493
14941446 Test instructions can be eliminated by using EFLAGS values from arithmetic
14951447 instructions. This is currently not done for mul, and, or, xor, neg, shl,
14961448 sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s
1
2 define void @test(i64* nocapture %arr, i64 %arrsize, i64 %factor) nounwind uwtable {
3 %1 = icmp sgt i64 %arrsize, 0
4 br i1 %1, label %.lr.ph, label %._crit_edge
5
6 .lr.ph: ; preds = %0
7 %2 = sext i64 %factor to i128
8 br label %3
9
10 ;
11 ; CHECK-NOT: mul
12 ; CHECK: imulq
13 ; CHECK-NOT: mul
14 %carry.02 = phi i128 [ 0, %.lr.ph ], [ %10, %3 ]
15 %i.01 = phi i64 [ 0, %.lr.ph ], [ %11, %3 ]
16 %4 = getelementptr inbounds i64* %arr, i64 %i.01
17 %5 = load i64* %4, align 8
18 %6 = sext i64 %5 to i128
19 %7 = mul nsw i128 %6, %2
20 %8 = add nsw i128 %7, %carry.02
21 %.tr = trunc i128 %8 to i64
22 %9 = and i64 %.tr, 9223372036854775807
23 store i64 %9, i64* %4, align 8
24 %10 = ashr i128 %8, 63
25 %11 = add nsw i64 %i.01, 1
26 %exitcond = icmp eq i64 %11, %arrsize
27 br i1 %exitcond, label %._crit_edge, label %3
28
29 ._crit_edge: ; preds = %3, %0
30 ret void
31 }
0 ; RUN: opt -codegenprepare -disable-cgp-branch-opts -S < %s | FileCheck %s
1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; The first cast should be sunk into block2, in order that the
5 ; instruction selector can form an efficient
6 ; i64 * i64 -> i128 multiplication.
7 define i128 @sink(i64* %mem1, i64* %mem2) {
8 ; CHECK-LABEL: block1:
9 ; CHECK-NEXT: load
10 block1:
11 %l1 = load i64* %mem1
12 %s1 = sext i64 %l1 to i128
13 br label %block2
14
15 ; CHECK-LABEL: block2:
16 ; CHECK-NEXT: sext
17 ; CHECK-NEXT: load
18 ; CHECK-NEXT: sext
19 block2:
20 %l2 = load i64* %mem2
21 %s2 = sext i64 %l2 to i128
22 %res = mul i128 %s1, %s2
23 ret i128 %res
24 }
25
26 ; The first cast should be hoisted into block1, in order that the
27 ; instruction selector can form an extend-load.
28 define i64 @hoist(i32* %mem1, i32* %mem2) {
29 ; CHECK-LABEL: block1:
30 ; CHECK-NEXT: load
31 ; CHECK-NEXT: sext
32 block1:
33 %l1 = load i32* %mem1
34 br label %block2
35
36 ; CHECK-LABEL: block2:
37 ; CHECK-NEXT: load
38 ; CHECK-NEXT: sext
39 block2:
40 %s1 = sext i32 %l1 to i64
41 %l2 = load i32* %mem2
42 %s2 = sext i32 %l2 to i64
43 %res = mul i64 %s1, %s2
44 ret i64 %res
45 }