llvm.org GIT mirror llvm / 30557dd
[CodeGenPrepare] Move Extension Instructions Through Logical And Shift Instructions CodeGenPrepare pass move extension instructions close to load instructions in different BB, so they can be combined later. But the extension instructions can't move through logical and shift instructions in current implementation. This patch enables this enhancement, so we can eliminate more extension instructions. Differential Revision: https://reviews.llvm.org/D45537 This is re-commit of r331783, which was reverted by r333305. The performance regression was caused by some unlucky alignment, not a code generation problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334049 91177308-0d34-0410-b5e6-96231b3b80d8 Guozhi Wei 1 year, 5 months ago
5 changed file(s) with 172 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
33893389 (IsSExt && BinOp->hasNoSignedWrap())))
33903390 return true;
33913391
3392 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3393 if ((Inst->getOpcode() == Instruction::And ||
3394 Inst->getOpcode() == Instruction::Or))
3395 return true;
3396
3397 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3398 if (Inst->getOpcode() == Instruction::Xor) {
3399 const ConstantInt *Cst = dyn_cast(Inst->getOperand(1));
3400 // Make sure it is not a NOT.
3401 if (Cst && !Cst->getValue().isAllOnesValue())
3402 return true;
3403 }
3404
3405 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3406 // It may change a poisoned value into a regular value, like
3407 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3408 // poisoned value regular value
3409 // It should be OK since undef covers valid value.
3410 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3411 return true;
3412
3413 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3414 // It may change a poisoned value into a regular value, like
3415 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3416 // poisoned value regular value
3417 // It should be OK since undef covers valid value.
3418 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3419 const Instruction *ExtInst =
3420 dyn_cast(*Inst->user_begin());
3421 if (ExtInst->hasOneUse()) {
3422 const Instruction *AndInst =
3423 dyn_cast(*ExtInst->user_begin());
3424 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3425 const ConstantInt *Cst = dyn_cast(AndInst->getOperand(1));
3426 if (Cst &&
3427 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3428 return true;
3429 }
3430 }
3431 }
3432
33923433 // Check if we can do the following simplification.
33933434 // ext(trunc(opnd)) --> ext(opnd)
33943435 if (!isa(Inst))
7878 ; CHECK-LABEL: test4:
7979 ; CHECK: # %bb.0: # %entry
8080 ; CHECK-NEXT: movsbl {{.*}}(%rip), %edx
81 ; CHECK-NEXT: movl %edx, %eax
82 ; CHECK-NEXT: shrb $7, %al
83 ; CHECK-NEXT: movzbl %al, %ecx
81 ; CHECK-NEXT: movzbl %dl, %ecx
82 ; CHECK-NEXT: shrl $7, %ecx
8483 ; CHECK-NEXT: xorl $1, %ecx
8584 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
8685 ; CHECK-NEXT: sarl %cl, %edx
33 define fastcc i32 @t() nounwind {
44 ; CHECK-LABEL: t:
55 ; CHECK: # %bb.0: # %walkExprTree.exit
6 ; CHECK-NEXT: movl 0, %eax
6 ; CHECK-NEXT: movzwl 0, %eax
77 ; CHECK-NEXT: orl $2, %eax
88 ; CHECK-NEXT: movw %ax, 0
99 ; CHECK-NEXT: shrl $3, %eax
8787 ret i64 %1
8888 }
8989
90 ; Don't do the folding if the other operand isn't a constant.
91 define i64 @test7(i8* %data, i8 %logop) {
92 ; CHECK-LABEL: test7:
93 ; CHECK: movb
94 ; CHECK-NEXT: shrb
95 ; CHECK-NEXT: orb
96 ; CHECK-NEXT: movzbl
97 ; CHECK-NEXT: retq
98 entry:
99 %bf.load = load i8, i8* %data, align 4
100 %bf.clear = lshr i8 %bf.load, 2
101 %0 = or i8 %bf.clear, %logop
102 %1 = zext i8 %0 to i64
103 ret i64 %1
104 }
105
10690 ; Load is folded with sext.
10791 define i64 @test8(i8* %data) {
10892 ; CHECK-LABEL: test8:
0 ; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s
1
2
3 @a = global [10 x i8] zeroinitializer, align 1
4 declare void @foo()
5
6 ; ext(and(ld, cst)) -> and(ext(ld), ext(cst))
7 define void @test1(i32* %p, i32 %ll) {
8 ; CHECK-LABEL: @test1
9 ; CHECK-NEXT: entry:
10 ; CHECK-NEXT: load
11 ; CHECK-NEXT: zext
12 ; CHECK-NEXT: and
13 entry:
14 %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
15 %and = and i8 %tmp, 60
16 %cmp = icmp ugt i8 %and, 20
17 br i1 %cmp, label %if.then, label %if.end
18
19 if.then: ; preds = %entry
20 %conv2 = zext i8 %and to i32
21 %add = add nsw i32 %conv2, %ll
22 store i32 %add, i32* %p, align 4
23 br label %if.end
24
25 if.end: ; preds = %if.then, %entry
26 tail call void @foo()
27 ret void
28 }
29
30 ; ext(or(ld, cst)) -> or(ext(ld), ext(cst))
31 define void @test2(i32* %p, i32 %ll) {
32 ; CHECK-LABEL: @test2
33 ; CHECK-NEXT: entry:
34 ; CHECK-NEXT: load
35 ; CHECK-NEXT: zext
36 ; CHECK-NEXT: or
37 entry:
38 %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
39 %or = or i8 %tmp, 60
40 %cmp = icmp ugt i8 %or, 20
41 br i1 %cmp, label %if.then, label %if.end
42
43 if.then: ; preds = %entry
44 %conv2 = zext i8 %or to i32
45 %add = add nsw i32 %conv2, %ll
46 store i32 %add, i32* %p, align 4
47 br label %if.end
48
49 if.end: ; preds = %if.then, %entry
50 tail call void @foo()
51 ret void
52 }
53
54 ; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst))
55 define void @test3(i32* %p, i32 %ll) {
56 ; CHECK-LABEL: @test3
57 ; CHECK-NEXT: entry:
58 ; CHECK-NEXT: load
59 ; CHECK-NEXT: zext
60 ; CHECK-NEXT: shl
61 ; CHECK-NEXT: and
62 entry:
63 %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
64 %shl = shl i8 %tmp, 2
65 %and = and i8 %shl, 60
66 %cmp = icmp ugt i8 %and, 20
67 br i1 %cmp, label %if.then, label %if.end
68
69 if.then: ; preds = %entry
70 %conv2 = zext i8 %and to i32
71 %add = add nsw i32 %conv2, %ll
72 store i32 %add, i32* %p, align 4
73 br label %if.end
74
75 if.end: ; preds = %if.then, %entry
76 tail call void @foo()
77 ret void
78 }
79
80 ; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst))
81 define void @test4(i32* %p, i32 %ll) {
82 ; CHECK-LABEL: @test4
83 ; CHECK-NEXT: entry:
84 ; CHECK-NEXT: load
85 ; CHECK-NEXT: zext
86 ; CHECK-NEXT: lshr
87 entry:
88 %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
89 %lshr = lshr i8 %tmp, 2
90 %cmp = icmp ugt i8 %lshr, 20
91 br i1 %cmp, label %if.then, label %if.end
92
93 if.then: ; preds = %entry
94 %conv2 = zext i8 %lshr to i32
95 %add = add nsw i32 %conv2, %ll
96 store i32 %add, i32* %p, align 4
97 br label %if.end
98
99 if.end: ; preds = %if.then, %entry
100 tail call void @foo()
101 ret void
102 }
103
104 ; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst))
105 define void @test5(i32* %p, i32 %ll) {
106 ; CHECK-LABEL: @test5
107 ; CHECK-NEXT: entry:
108 ; CHECK-NEXT: load
109 ; CHECK-NEXT: zext
110 ; CHECK-NEXT: xor
111 entry:
112 %tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
113 %xor = xor i8 %tmp, 60
114 %cmp = icmp ugt i8 %xor, 20
115 br i1 %cmp, label %if.then, label %if.end
116
117 if.then: ; preds = %entry
118 %conv2 = zext i8 %xor to i32
119 %add = add nsw i32 %conv2, %ll
120 store i32 %add, i32* %p, align 4
121 br label %if.end
122
123 if.end: ; preds = %if.then, %entry
124 tail call void @foo()
125 ret void
126 }
127