llvm.org GIT mirror llvm / d51f641
Revert "[MergeICmps] Disable mergeicmps if the target does not want to handle memcmp expansion." Still a few stability issues on windows. This reverts commit 67e3db9bc121ba244e20337aabc7cf341a62b545. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315058 91177308-0d34-0410-b5e6-96231b3b80d8 Clement Courbet 2 years ago
8 changed file(s) with 154 addition(s) and 316 deletion(s). Raw diff Collapse all Expand all
2727 #include
2828 #include "llvm/ADT/APSInt.h"
2929 #include "llvm/Analysis/Loads.h"
30 #include "llvm/Analysis/TargetLibraryInfo.h"
31 #include "llvm/Analysis/TargetTransformInfo.h"
3230 #include "llvm/IR/Function.h"
3331 #include "llvm/IR/IRBuilder.h"
3432 #include "llvm/IR/IntrinsicInst.h"
4240
4341 #define DEBUG_TYPE "mergeicmps"
4442
43 #define MERGEICMPS_DOT_ON
44
4545 // A BCE atom.
4646 struct BCEAtom {
4747 BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {}
4949 const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; }
5050
5151 bool operator<(const BCEAtom &O) const {
52 assert(Base() && "invalid atom");
53 assert(O.Base() && "invalid atom");
54 // Just ordering by (Base(), Offset) is sufficient. However because this
55 // means that the ordering will depend on the addresses of the base
56 // values, which are not reproducible from run to run. To guarantee
57 // stability, we use the names of the values if they exist; we sort by:
58 // (Base.getName(), Base(), Offset).
59 const int NameCmp = Base()->getName().compare(O.Base()->getName());
60 if (NameCmp == 0) {
61 if (Base() == O.Base()) {
62 return Offset.slt(O.Offset);
63 }
64 return Base() < O.Base();
65 }
66 return NameCmp < 0;
52 return Base() == O.Base() ? Offset.slt(O.Offset) : Base() < O.Base();
6753 }
6854
6955 GetElementPtrInst *GEP;
11298
11399 // A basic block with a comparison between two BCE atoms.
114100 // Note: the terminology is misleading: the comparison is symmetric, so there
115 // is no real {l/r}hs. What we want though is to have the same base on the
116 // left (resp. right), so that we can detect consecutive loads. To ensure this
117 // we put the smallest atom on the left.
101 // is no real {l/r}hs. To break the symmetry, we use the smallest atom as Lhs.
118102 class BCECmpBlock {
119103 public:
120104 BCECmpBlock() {}
445429
446430 IRBuilder<> Builder(BB);
447431 const auto &DL = Phi.getModule()->getDataLayout();
448 Value *const MemCmpCall = emitMemCmp(
449 FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
450 Builder, DL, TLI);
432 Value *const MemCmpCall =
433 emitMemCmp(FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP,
434 ConstantInt::get(DL.getIntPtrType(Context), TotalSize),
435 Builder, DL, TLI);
451436 Value *const MemCmpIsZero = Builder.CreateICmpEQ(
452437 MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
453438
603588 bool runOnFunction(Function &F) override {
604589 if (skipFunction(F)) return false;
605590 const auto &TLI = getAnalysis().getTLI();
606 const auto &TTI = getAnalysis().getTTI(F);
607 auto PA = runImpl(F, &TLI, &TTI);
591 auto PA = runImpl(F, &TLI);
608592 return !PA.areAllPreserved();
609593 }
610594
611595 private:
612596 void getAnalysisUsage(AnalysisUsage &AU) const override {
613597 AU.addRequired();
614 AU.addRequired();
615 }
616
617 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
618 const TargetTransformInfo *TTI);
598 }
599
600 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI);
619601 };
620602
621 PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
622 const TargetTransformInfo *TTI) {
603 PreservedAnalyses MergeICmps::runImpl(Function &F,
604 const TargetLibraryInfo *TLI) {
623605 DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
624
625 // We only try merging comparisons if the target wants to expand memcmp later.
626 // The rationale is to avoid turning small chains into memcmp calls.
627 unsigned MaxLoadSize;
628 if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all();
629606
630607 bool MadeChange = false;
631608
645622 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
646623 "Merge contiguous icmps into a memcmp", false, false)
647624 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
648 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
649625 INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
650626 "Merge contiguous icmps into a memcmp", false, false)
651627
+0
-3
test/Transforms/MergeICmps/X86/lit.local.cfg less more
None if not 'X86' in config.root.targets:
1 config.unsupported = True
2
+0
-91
test/Transforms/MergeICmps/X86/pair-int32-int32.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
2
3 %"struct.std::pair" = type { i32, i32 }
4
5 define zeroext i1 @opeq1(
6 ; X86-LABEL: @opeq1(
7 ; X86-NEXT: entry:
8 ; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
10 ; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
11 ; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
12 ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
13 ; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
14 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
15 ; X86: opeq1.exit:
16 ; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
17 ; X86-NEXT: ret i1 [[TMP1]]
18 ;
19 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
20 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
21 entry:
22 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
23 %0 = load i32, i32* %first.i, align 4
24 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
25 %1 = load i32, i32* %first1.i, align 4
26 %cmp.i = icmp eq i32 %0, %1
27 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
28
29 land.rhs.i:
30 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
31 %2 = load i32, i32* %second.i, align 4
32 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
33 %3 = load i32, i32* %second2.i, align 4
34 %cmp3.i = icmp eq i32 %2, %3
35 br label %opeq1.exit
36
37 opeq1.exit:
38 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
39 ret i1 %4
40 ; The entry block with zero-offset GEPs is kept, loads are removed.
41 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
42 ; The branch is now a direct branch; the other block has been removed.
43 ; The phi is updated.
44 }
45
46 ; Same as above, but the two blocks are in inverse order.
47 define zeroext i1 @opeq1_inverse(
48 ; X86-LABEL: @opeq1_inverse(
49 ; X86-NEXT: land.rhs.i:
50 ; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
51 ; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
52 ; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
53 ; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
54 ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
55 ; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
56 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
57 ; X86: opeq1.exit:
58 ; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[LAND_RHS_I:%.*]] ]
59 ; X86-NEXT: ret i1 [[TMP1]]
60 ;
61 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
62 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
63 entry:
64 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
65 %0 = load i32, i32* %first.i, align 4
66 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
67 %1 = load i32, i32* %first1.i, align 4
68 %cmp.i = icmp eq i32 %0, %1
69 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
70
71 land.rhs.i:
72 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
73 %2 = load i32, i32* %second.i, align 4
74 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
75 %3 = load i32, i32* %second2.i, align 4
76 %cmp3.i = icmp eq i32 %2, %3
77 br label %opeq1.exit
78
79 opeq1.exit:
80 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
81 ret i1 %4
82 ; The second block with zero-offset GEPs is kept, loads are removed.
83 ; CHECK: land.rhs.i
84 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
85 ; The branch is now a direct branch; the other block has been removed.
86 ; The phi is updated.
87 }
88
89
90
+0
-92
test/Transforms/MergeICmps/X86/tuple-four-int8.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
2
3 ; This is a more involved test: clang generates this weird pattern for
4 ; tuple. Right now we skip the entry block
5 ; (which defines the base pointer for other blocks) and the last one (which
6 ; does not have the expected structure). Only middle blocks (bytes [1,2]) are
7 ; merged.
8
9 %"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
10 %"struct.std::_Tuple_impl" = type { %"struct.std::_Tuple_impl.0", %"struct.std::_Head_base.6" }
11 %"struct.std::_Tuple_impl.0" = type { %"struct.std::_Tuple_impl.1", %"struct.std::_Head_base.5" }
12 %"struct.std::_Tuple_impl.1" = type { %"struct.std::_Tuple_impl.2", %"struct.std::_Head_base.4" }
13 %"struct.std::_Tuple_impl.2" = type { %"struct.std::_Head_base" }
14 %"struct.std::_Head_base" = type { i8 }
15 %"struct.std::_Head_base.4" = type { i8 }
16 %"struct.std::_Head_base.5" = type { i8 }
17 %"struct.std::_Head_base.6" = type { i8 }
18
19 define zeroext i1 @opeq(
20 ; CHECK-LABEL: @opeq(
21 ; CHECK-NEXT: entry:
22 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[A:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
23 ; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 3
24 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ADD_PTR_I_I_I_I_I]], align 1
25 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[B:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
26 ; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 3
27 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ADD_PTR_I_I_I6_I_I]], align 1
28 ; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP1]], [[TMP3]]
29 ; CHECK-NEXT: br i1 [[CMP_I_I]], label [[LAND_RHS_I_I_I:%.*]], label [[OPEQ_EXIT:%.*]]
30 ; CHECK: land.rhs.i.i.i:
31 ; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 1
32 ; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 1
33 ; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[ADD_PTR_I_I_I6_I_I_I_I]], i8* [[ADD_PTR_I_I_I_I_I_I_I]], i64 2)
34 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
35 ; CHECK-NEXT: br i1 [[TMP4]], label [[LAND_RHS_I_I_I_I:%.*]], label [[OPEQ_EXIT]]
36 ; CHECK: land.rhs.i.i.i.i:
37 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP0]], align 1
38 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP2]], align 1
39 ; CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP5]], [[TMP6]]
40 ; CHECK-NEXT: br label [[OPEQ_EXIT]]
41 ; CHECK: opeq.exit:
42 ; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I_I_I]] ], [ [[CMP_I_I_I_I_I]], [[LAND_RHS_I_I_I_I]] ]
43 ; CHECK-NEXT: ret i1 [[TMP7]]
44 ;
45 %"class.std::tuple"* nocapture readonly dereferenceable(4) %a,
46 %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 {
47 entry:
48 %0 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
49 %add.ptr.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 3
50 %1 = load i8, i8* %add.ptr.i.i.i.i.i, align 1
51 %2 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %b, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
52 %add.ptr.i.i.i6.i.i = getelementptr inbounds i8, i8* %2, i64 3
53 %3 = load i8, i8* %add.ptr.i.i.i6.i.i, align 1
54 %cmp.i.i = icmp eq i8 %1, %3
55 br i1 %cmp.i.i, label %land.rhs.i.i, label %opeq.exit
56
57 land.rhs.i.i:
58 %add.ptr.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 2
59 %4 = load i8, i8* %add.ptr.i.i.i.i.i.i, align 1
60 %add.ptr.i.i.i6.i.i.i = getelementptr inbounds i8, i8* %2, i64 2
61 %5 = load i8, i8* %add.ptr.i.i.i6.i.i.i, align 1
62 %cmp.i.i.i = icmp eq i8 %4, %5
63 br i1 %cmp.i.i.i, label %land.rhs.i.i.i, label %opeq.exit
64
65 land.rhs.i.i.i:
66 %add.ptr.i.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 1
67 %6 = load i8, i8* %add.ptr.i.i.i.i.i.i.i, align 1
68 %add.ptr.i.i.i6.i.i.i.i = getelementptr inbounds i8, i8* %2, i64 1
69 %7 = load i8, i8* %add.ptr.i.i.i6.i.i.i.i, align 1
70 %cmp.i.i.i.i = icmp eq i8 %6, %7
71 br i1 %cmp.i.i.i.i, label %land.rhs.i.i.i.i, label %opeq.exit
72
73 land.rhs.i.i.i.i:
74 %8 = load i8, i8* %0, align 1
75 %9 = load i8, i8* %2, align 1
76 %cmp.i.i.i.i.i = icmp eq i8 %8, %9
77 br label %opeq.exit
78
79 opeq.exit:
80 %10 = phi i1 [ false, %entry ], [ false, %land.rhs.i.i ], [ false, %land.rhs.i.i.i ], [ %cmp.i.i.i.i.i, %land.rhs.i.i.i.i ]
81 ret i1 %10
82 ; The entry block is kept as is, but the next block is now the merged comparison
83 ; block for bytes [1,2] or the block for the head.
84 ; The two 1 byte loads and compares at offset 1 are replaced with a single
85 ; 2-byte memcmp.
86 ; In the end we have three blocks.
87 ; CHECK-SAME %entry
88 ; CHECK-SAME %land.rhs.i.i.i.i
89 ; CHECK-SAME %land.rhs.i.i.i
90 }
91
+0
-48
test/Transforms/MergeICmps/X86/volatile.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
2
3 %"struct.std::pair" = type { i32, i32 }
4
5 define zeroext i1 @opeq(
6 ; CHECK-LABEL: @opeq(
7 ; CHECK-NEXT: entry:
8 ; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
10 ; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
11 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
12 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
13 ; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
14 ; CHECK: land.rhs.i:
15 ; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
16 ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[SECOND_I]], align 4
17 ; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
18 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
19 ; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
20 ; CHECK-NEXT: br label [[OPEQ1_EXIT]]
21 ; CHECK: opeq1.exit:
22 ; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
23 ; CHECK-NEXT: ret i1 [[TMP4]]
24 ;
25 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
26 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
27 entry:
28 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
29 %0 = load i32, i32* %first.i, align 4
30 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
31 %1 = load i32, i32* %first1.i, align 4
32 %cmp.i = icmp eq i32 %0, %1
33 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
34
35 land.rhs.i:
36 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
37 %2 = load volatile i32, i32* %second.i, align 4
38 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
39 %3 = load i32, i32* %second2.i, align 4
40 %cmp3.i = icmp eq i32 %2, %3
41 br label %opeq1.exit
42
43 opeq1.exit:
44 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
45 ret i1 %4
46 }
47
None ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -S | FileCheck %s --check-prefix=NOEXPANSION
0 ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
21
32 %"struct.std::pair" = type { i32, i32 }
43
54 define zeroext i1 @opeq1(
6 ; NOEXPANSION-LABEL: @opeq1(
7 ; NOEXPANSION-NEXT: entry:
8 ; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
10 ; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
11 ; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
12 ; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
13 ; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
14 ; NOEXPANSION: land.rhs.i:
15 ; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
16 ; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
17 ; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
18 ; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
19 ; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
20 ; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]]
21 ; NOEXPANSION: opeq1.exit:
22 ; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
23 ; NOEXPANSION-NEXT: ret i1 [[TMP4]]
24 ;
25 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
26 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
5 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
6 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
277 entry:
288 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
299 %0 = load i32, i32* %first.i, align 4
4323 opeq1.exit:
4424 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
4525 ret i1 %4
26 ; CHECK-LABEL: @opeq1(
27 ; The entry block with zero-offset GEPs is kept, loads are removed.
28 ; CHECK: entry
29 ; CHECK: getelementptr {{.*}} i32 0
30 ; CHECK-NOT: load
31 ; CHECK: getelementptr {{.*}} i32 0
32 ; CHECK-NOT: load
33 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
34 ; CHECK: @memcmp({{.*}}8)
35 ; CHECK: icmp eq {{.*}} 0
36 ; The branch is now a direct branch; the other block has been removed.
37 ; CHECK: br label %opeq1.exit
38 ; CHECK-NOT: br
39 ; The phi is updated.
40 ; CHECK: phi i1 [ %{{[^,]*}}, %entry ]
41 ; CHECK-NEXT: ret
4642 }
4743
4844 ; Same as above, but the two blocks are in inverse order.
4945 define zeroext i1 @opeq1_inverse(
50 ; NOEXPANSION-LABEL: @opeq1_inverse(
51 ; NOEXPANSION-NEXT: entry:
52 ; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1
53 ; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
54 ; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1
55 ; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
56 ; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
57 ; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
58 ; NOEXPANSION: land.rhs.i:
59 ; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0
60 ; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
61 ; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0
62 ; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
63 ; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
64 ; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]]
65 ; NOEXPANSION: opeq1.exit:
66 ; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
67 ; NOEXPANSION-NEXT: ret i1 [[TMP4]]
68 ;
69 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
70 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
46 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
47 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
7148 entry:
7249 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
7350 %0 = load i32, i32* %first.i, align 4
8764 opeq1.exit:
8865 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
8966 ret i1 %4
67 ; CHECK-LABEL: @opeq1_inverse(
68 ; The second block with zero-offset GEPs is kept, loads are removed.
69 ; CHECK: land.rhs.i
70 ; CHECK: getelementptr {{.*}} i32 0
71 ; CHECK-NOT: load
72 ; CHECK: getelementptr {{.*}} i32 0
73 ; CHECK-NOT: load
74 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
75 ; CHECK: @memcmp({{.*}}8)
76 ; CHECK: icmp eq {{.*}} 0
77 ; The branch is now a direct branch; the other block has been removed.
78 ; CHECK: br label %opeq1.exit
79 ; CHECK-NOT: br
80 ; The phi is updated.
81 ; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ]
82 ; CHECK-NEXT: ret
9083 }
9184
9285
0 ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
1
2 ; This is a more involved test: clang generates this weird pattern for
3 ; tuple. Right now we skip the entry block
4 ; (which defines the base pointer for other blocks) and the last one (which
5 ; does not have the expected structure). Only middle blocks (bytes [1,2]) are
6 ; merged.
7
8 %"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
9 %"struct.std::_Tuple_impl" = type { %"struct.std::_Tuple_impl.0", %"struct.std::_Head_base.6" }
10 %"struct.std::_Tuple_impl.0" = type { %"struct.std::_Tuple_impl.1", %"struct.std::_Head_base.5" }
11 %"struct.std::_Tuple_impl.1" = type { %"struct.std::_Tuple_impl.2", %"struct.std::_Head_base.4" }
12 %"struct.std::_Tuple_impl.2" = type { %"struct.std::_Head_base" }
13 %"struct.std::_Head_base" = type { i8 }
14 %"struct.std::_Head_base.4" = type { i8 }
15 %"struct.std::_Head_base.5" = type { i8 }
16 %"struct.std::_Head_base.6" = type { i8 }
17
18 define zeroext i1 @opeq(
19 %"class.std::tuple"* nocapture readonly dereferenceable(4) %a,
20 %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 {
21 entry:
22 %0 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
23 %add.ptr.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 3
24 %1 = load i8, i8* %add.ptr.i.i.i.i.i, align 1
25 %2 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %b, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
26 %add.ptr.i.i.i6.i.i = getelementptr inbounds i8, i8* %2, i64 3
27 %3 = load i8, i8* %add.ptr.i.i.i6.i.i, align 1
28 %cmp.i.i = icmp eq i8 %1, %3
29 br i1 %cmp.i.i, label %land.rhs.i.i, label %opeq.exit
30
31 land.rhs.i.i:
32 %add.ptr.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 2
33 %4 = load i8, i8* %add.ptr.i.i.i.i.i.i, align 1
34 %add.ptr.i.i.i6.i.i.i = getelementptr inbounds i8, i8* %2, i64 2
35 %5 = load i8, i8* %add.ptr.i.i.i6.i.i.i, align 1
36 %cmp.i.i.i = icmp eq i8 %4, %5
37 br i1 %cmp.i.i.i, label %land.rhs.i.i.i, label %opeq.exit
38
39 land.rhs.i.i.i:
40 %add.ptr.i.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 1
41 %6 = load i8, i8* %add.ptr.i.i.i.i.i.i.i, align 1
42 %add.ptr.i.i.i6.i.i.i.i = getelementptr inbounds i8, i8* %2, i64 1
43 %7 = load i8, i8* %add.ptr.i.i.i6.i.i.i.i, align 1
44 %cmp.i.i.i.i = icmp eq i8 %6, %7
45 br i1 %cmp.i.i.i.i, label %land.rhs.i.i.i.i, label %opeq.exit
46
47 land.rhs.i.i.i.i:
48 %8 = load i8, i8* %0, align 1
49 %9 = load i8, i8* %2, align 1
50 %cmp.i.i.i.i.i = icmp eq i8 %8, %9
51 br label %opeq.exit
52
53 opeq.exit:
54 %10 = phi i1 [ false, %entry ], [ false, %land.rhs.i.i ], [ false, %land.rhs.i.i.i ], [ %cmp.i.i.i.i.i, %land.rhs.i.i.i.i ]
55 ret i1 %10
56 ; CHECK-LABEL: @opeq(
57 ; The entry block is kept as is, but the next block is now the merged comparison
58 ; block for bytes [1,2] or the block for the head.
59 ; CHECK: entry
60 ; CHECK: br i1 %cmp.i.i, label %land.rhs.i.i.i{{(.i)?}}, label %opeq.exit
61 ; The two 1 byte loads and compares at offset 1 are replaced with a single
62 ; 2-byte memcmp.
63 ; CHECK: land.rhs.i.i.i
64 ; CHECK: @memcmp({{.*}}2)
65 ; CHECK: icmp eq {{.*}} 0
66 ; In the end we have three blocks.
67 ; CHECK: phi i1
68 ; CHECK-SAME %entry
69 ; CHECK-SAME %land.rhs.i.i.i.i
70 ; CHECK-SAME %land.rhs.i.i.i
71 }
72
0 ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
1
2 %"struct.std::pair" = type { i32, i32 }
3
4 define zeroext i1 @opeq(
5 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
6 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
7 entry:
8 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
9 %0 = load i32, i32* %first.i, align 4
10 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
11 %1 = load i32, i32* %first1.i, align 4
12 %cmp.i = icmp eq i32 %0, %1
13 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
14
15 land.rhs.i:
16 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
17 %2 = load volatile i32, i32* %second.i, align 4
18 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
19 %3 = load i32, i32* %second2.i, align 4
20 %cmp3.i = icmp eq i32 %2, %3
21 br label %opeq1.exit
22
23 opeq1.exit:
24 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
25 ret i1 %4
26 ; CHECK-LABEL: @opeq(
27 ; CHECK-NOT: memcmp
28 }
29