llvm.org GIT mirror llvm / 17f5bd1
[MergeICmps] Disable mergeicmps if the target does not want to handle memcmp expansion. Summary: This is to avoid e.g. merging two cheap icmps if the target is not going to expand to something nice later. Reviewers: dberlin, spatel Subscribers: davide, nemanjai Differential Revision: https://reviews.llvm.org/D38232 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314970 91177308-0d34-0410-b5e6-96231b3b80d8 Clement Courbet 2 years ago
8 changed file(s) with 295 addition(s) and 148 deletion(s). Raw diff Collapse all Expand all
2727 #include
2828 #include "llvm/ADT/APSInt.h"
2929 #include "llvm/Analysis/Loads.h"
30 #include "llvm/Analysis/TargetLibraryInfo.h"
31 #include "llvm/Analysis/TargetTransformInfo.h"
3032 #include "llvm/IR/Function.h"
3133 #include "llvm/IR/IRBuilder.h"
3234 #include "llvm/IR/IntrinsicInst.h"
3941 namespace {
4042
4143 #define DEBUG_TYPE "mergeicmps"
42
43 #define MERGEICMPS_DOT_ON
4444
4545 // A BCE atom.
4646 struct BCEAtom {
588588 bool runOnFunction(Function &F) override {
589589 if (skipFunction(F)) return false;
590590 const auto &TLI = getAnalysis().getTLI();
591 auto PA = runImpl(F, &TLI);
591 const auto &TTI = getAnalysis().getTTI(F);
592 auto PA = runImpl(F, &TLI, &TTI);
592593 return !PA.areAllPreserved();
593594 }
594595
595596 private:
596597 void getAnalysisUsage(AnalysisUsage &AU) const override {
597598 AU.addRequired();
598 }
599
600 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI);
599 AU.addRequired();
600 }
601
602 PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
603 const TargetTransformInfo *TTI);
601604 };
602605
603 PreservedAnalyses MergeICmps::runImpl(Function &F,
604 const TargetLibraryInfo *TLI) {
606 PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI,
607 const TargetTransformInfo *TTI) {
605608 DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
609
610 // We only try merging comparisons if the target wants to expand memcmp later.
611 // The rationale is to avoid turning small chains into memcmp calls.
612 unsigned MaxLoadSize;
613 if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all();
606614
607615 bool MadeChange = false;
608616
622630 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
623631 "Merge contiguous icmps into a memcmp", false, false)
624632 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
633 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
625634 INITIALIZE_PASS_END(MergeICmps, "mergeicmps",
626635 "Merge contiguous icmps into a memcmp", false, false)
627636
0 if not 'X86' in config.root.targets:
1 config.unsupported = True
2
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86
2
3 %"struct.std::pair" = type { i32, i32 }
4
5 define zeroext i1 @opeq1(
6 ; X86-LABEL: @opeq1(
7 ; X86-NEXT: entry:
8 ; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
10 ; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8*
11 ; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8*
12 ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
13 ; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
14 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
15 ; X86: opeq1.exit:
16 ; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ]
17 ; X86-NEXT: ret i1 [[TMP1]]
18 ;
19 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
20 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
21 entry:
22 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
23 %0 = load i32, i32* %first.i, align 4
24 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
25 %1 = load i32, i32* %first1.i, align 4
26 %cmp.i = icmp eq i32 %0, %1
27 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
28
29 land.rhs.i:
30 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
31 %2 = load i32, i32* %second.i, align 4
32 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
33 %3 = load i32, i32* %second2.i, align 4
34 %cmp3.i = icmp eq i32 %2, %3
35 br label %opeq1.exit
36
37 opeq1.exit:
38 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
39 ret i1 %4
40 ; The entry block with zero-offset GEPs is kept, loads are removed.
41 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
42 ; The branch is now a direct branch; the other block has been removed.
43 ; The phi is updated.
44 }
45
46 ; Same as above, but the two blocks are in inverse order.
47 define zeroext i1 @opeq1_inverse(
48 ; X86-LABEL: @opeq1_inverse(
49 ; X86-NEXT: land.rhs.i:
50 ; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
51 ; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
52 ; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8*
53 ; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8*
54 ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8)
55 ; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0
56 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]]
57 ; X86: opeq1.exit:
58 ; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[LAND_RHS_I:%.*]] ]
59 ; X86-NEXT: ret i1 [[TMP1]]
60 ;
61 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
62 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
63 entry:
64 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
65 %0 = load i32, i32* %first.i, align 4
66 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
67 %1 = load i32, i32* %first1.i, align 4
68 %cmp.i = icmp eq i32 %0, %1
69 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
70
71 land.rhs.i:
72 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
73 %2 = load i32, i32* %second.i, align 4
74 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
75 %3 = load i32, i32* %second2.i, align 4
76 %cmp3.i = icmp eq i32 %2, %3
77 br label %opeq1.exit
78
79 opeq1.exit:
80 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
81 ret i1 %4
82 ; The second block with zero-offset GEPs is kept, loads are removed.
83 ; CHECK: land.rhs.i
84 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
85 ; The branch is now a direct branch; the other block has been removed.
86 ; The phi is updated.
87 }
88
89
90
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
2
3 ; This is a more involved test: clang generates this weird pattern for
4 ; tuple. Right now we skip the entry block
5 ; (which defines the base pointer for other blocks) and the last one (which
6 ; does not have the expected structure). Only middle blocks (bytes [1,2]) are
7 ; merged.
8
9 %"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
10 %"struct.std::_Tuple_impl" = type { %"struct.std::_Tuple_impl.0", %"struct.std::_Head_base.6" }
11 %"struct.std::_Tuple_impl.0" = type { %"struct.std::_Tuple_impl.1", %"struct.std::_Head_base.5" }
12 %"struct.std::_Tuple_impl.1" = type { %"struct.std::_Tuple_impl.2", %"struct.std::_Head_base.4" }
13 %"struct.std::_Tuple_impl.2" = type { %"struct.std::_Head_base" }
14 %"struct.std::_Head_base" = type { i8 }
15 %"struct.std::_Head_base.4" = type { i8 }
16 %"struct.std::_Head_base.5" = type { i8 }
17 %"struct.std::_Head_base.6" = type { i8 }
18
19 define zeroext i1 @opeq(
20 ; CHECK-LABEL: @opeq(
21 ; CHECK-NEXT: entry:
22 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[A:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
23 ; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 3
24 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[ADD_PTR_I_I_I_I_I]], align 1
25 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* [[B:%.*]], i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
26 ; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 3
27 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ADD_PTR_I_I_I6_I_I]], align 1
28 ; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP1]], [[TMP3]]
29 ; CHECK-NEXT: br i1 [[CMP_I_I]], label [[LAND_RHS_I_I_I_I:%.*]], label [[OPEQ_EXIT:%.*]]
30 ; CHECK: land.rhs.i.i.i:
31 ; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 1
32 ; CHECK-NEXT: [[ADD_PTR_I_I_I6_I_I_I_I:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 1
33 ; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[ADD_PTR_I_I_I6_I_I_I_I]], i8* [[ADD_PTR_I_I_I_I_I_I_I]], i64 2)
34 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0
35 ; CHECK-NEXT: br label [[OPEQ_EXIT]]
36 ; CHECK: land.rhs.i.i.i.i:
37 ; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP0]], align 1
38 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP2]], align 1
39 ; CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = icmp eq i8 [[TMP5]], [[TMP6]]
40 ; CHECK-NEXT: br i1 [[CMP_I_I_I_I_I]], label [[LAND_RHS_I_I_I:%.*]], label [[OPEQ_EXIT]]
41 ; CHECK: opeq.exit:
42 ; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP_I_I_I_I_I]], [[LAND_RHS_I_I_I_I]] ], [ [[TMP4]], [[LAND_RHS_I_I_I]] ]
43 ; CHECK-NEXT: ret i1 [[TMP7]]
44 ;
45 %"class.std::tuple"* nocapture readonly dereferenceable(4) %a,
46 %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 {
47 entry:
48 %0 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
49 %add.ptr.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 3
50 %1 = load i8, i8* %add.ptr.i.i.i.i.i, align 1
51 %2 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %b, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
52 %add.ptr.i.i.i6.i.i = getelementptr inbounds i8, i8* %2, i64 3
53 %3 = load i8, i8* %add.ptr.i.i.i6.i.i, align 1
54 %cmp.i.i = icmp eq i8 %1, %3
55 br i1 %cmp.i.i, label %land.rhs.i.i, label %opeq.exit
56
57 land.rhs.i.i:
58 %add.ptr.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 2
59 %4 = load i8, i8* %add.ptr.i.i.i.i.i.i, align 1
60 %add.ptr.i.i.i6.i.i.i = getelementptr inbounds i8, i8* %2, i64 2
61 %5 = load i8, i8* %add.ptr.i.i.i6.i.i.i, align 1
62 %cmp.i.i.i = icmp eq i8 %4, %5
63 br i1 %cmp.i.i.i, label %land.rhs.i.i.i, label %opeq.exit
64
65 land.rhs.i.i.i:
66 %add.ptr.i.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 1
67 %6 = load i8, i8* %add.ptr.i.i.i.i.i.i.i, align 1
68 %add.ptr.i.i.i6.i.i.i.i = getelementptr inbounds i8, i8* %2, i64 1
69 %7 = load i8, i8* %add.ptr.i.i.i6.i.i.i.i, align 1
70 %cmp.i.i.i.i = icmp eq i8 %6, %7
71 br i1 %cmp.i.i.i.i, label %land.rhs.i.i.i.i, label %opeq.exit
72
73 land.rhs.i.i.i.i:
74 %8 = load i8, i8* %0, align 1
75 %9 = load i8, i8* %2, align 1
76 %cmp.i.i.i.i.i = icmp eq i8 %8, %9
77 br label %opeq.exit
78
79 opeq.exit:
80 %10 = phi i1 [ false, %entry ], [ false, %land.rhs.i.i ], [ false, %land.rhs.i.i.i ], [ %cmp.i.i.i.i.i, %land.rhs.i.i.i.i ]
81 ret i1 %10
82 ; The entry block is kept as is, but the next block is now the merged comparison
83 ; block for bytes [1,2] or the block for the head.
84 ; The two 1 byte loads and compares at offset 1 are replaced with a single
85 ; 2-byte memcmp.
86 ; In the end we have three blocks.
87 ; CHECK-SAME %entry
88 ; CHECK-SAME %land.rhs.i.i.i.i
89 ; CHECK-SAME %land.rhs.i.i.i
90 }
91
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s
2
3 %"struct.std::pair" = type { i32, i32 }
4
5 define zeroext i1 @opeq(
6 ; CHECK-LABEL: @opeq(
7 ; CHECK-NEXT: entry:
8 ; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
10 ; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
11 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
12 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
13 ; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
14 ; CHECK: land.rhs.i:
15 ; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
16 ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[SECOND_I]], align 4
17 ; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
18 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
19 ; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
20 ; CHECK-NEXT: br label [[OPEQ1_EXIT]]
21 ; CHECK: opeq1.exit:
22 ; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
23 ; CHECK-NEXT: ret i1 [[TMP4]]
24 ;
25 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
26 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
27 entry:
28 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
29 %0 = load i32, i32* %first.i, align 4
30 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
31 %1 = load i32, i32* %first1.i, align 4
32 %cmp.i = icmp eq i32 %0, %1
33 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
34
35 land.rhs.i:
36 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
37 %2 = load volatile i32, i32* %second.i, align 4
38 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
39 %3 = load i32, i32* %second2.i, align 4
40 %cmp3.i = icmp eq i32 %2, %3
41 br label %opeq1.exit
42
43 opeq1.exit:
44 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
45 ret i1 %4
46 }
47
None ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -mergeicmps -S | FileCheck %s --check-prefix=NOEXPANSION
12
23 %"struct.std::pair" = type { i32, i32 }
34
45 define zeroext i1 @opeq1(
5 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
6 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
6 ; NOEXPANSION-LABEL: @opeq1(
7 ; NOEXPANSION-NEXT: entry:
8 ; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0
9 ; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
10 ; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0
11 ; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
12 ; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
13 ; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
14 ; NOEXPANSION: land.rhs.i:
15 ; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1
16 ; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
17 ; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1
18 ; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
19 ; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
20 ; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]]
21 ; NOEXPANSION: opeq1.exit:
22 ; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
23 ; NOEXPANSION-NEXT: ret i1 [[TMP4]]
24 ;
25 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
26 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
727 entry:
828 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
929 %0 = load i32, i32* %first.i, align 4
2343 opeq1.exit:
2444 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
2545 ret i1 %4
26 ; CHECK-LABEL: @opeq1(
27 ; The entry block with zero-offset GEPs is kept, loads are removed.
28 ; CHECK: entry
29 ; CHECK: getelementptr {{.*}} i32 0
30 ; CHECK-NOT: load
31 ; CHECK: getelementptr {{.*}} i32 0
32 ; CHECK-NOT: load
33 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
34 ; CHECK: @memcmp({{.*}}8)
35 ; CHECK: icmp eq {{.*}} 0
36 ; The branch is now a direct branch; the other block has been removed.
37 ; CHECK: br label %opeq1.exit
38 ; CHECK-NOT: br
39 ; The phi is updated.
40 ; CHECK: phi i1 [ %{{[^,]*}}, %entry ]
41 ; CHECK-NEXT: ret
4246 }
4347
4448 ; Same as above, but the two blocks are in inverse order.
4549 define zeroext i1 @opeq1_inverse(
46 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
47 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
50 ; NOEXPANSION-LABEL: @opeq1_inverse(
51 ; NOEXPANSION-NEXT: entry:
52 ; NOEXPANSION-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1
53 ; NOEXPANSION-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4
54 ; NOEXPANSION-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1
55 ; NOEXPANSION-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4
56 ; NOEXPANSION-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]
57 ; NOEXPANSION-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]]
58 ; NOEXPANSION: land.rhs.i:
59 ; NOEXPANSION-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0
60 ; NOEXPANSION-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4
61 ; NOEXPANSION-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0
62 ; NOEXPANSION-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4
63 ; NOEXPANSION-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
64 ; NOEXPANSION-NEXT: br label [[OPEQ1_EXIT]]
65 ; NOEXPANSION: opeq1.exit:
66 ; NOEXPANSION-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ]
67 ; NOEXPANSION-NEXT: ret i1 [[TMP4]]
68 ;
69 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
70 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
4871 entry:
4972 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
5073 %0 = load i32, i32* %first.i, align 4
6487 opeq1.exit:
6588 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
6689 ret i1 %4
67 ; CHECK-LABEL: @opeq1_inverse(
68 ; The second block with zero-offset GEPs is kept, loads are removed.
69 ; CHECK: land.rhs.i
70 ; CHECK: getelementptr {{.*}} i32 0
71 ; CHECK-NOT: load
72 ; CHECK: getelementptr {{.*}} i32 0
73 ; CHECK-NOT: load
74 ; The two 4 byte loads and compares are replaced with a single 8-byte memcmp.
75 ; CHECK: @memcmp({{.*}}8)
76 ; CHECK: icmp eq {{.*}} 0
77 ; The branch is now a direct branch; the other block has been removed.
78 ; CHECK: br label %opeq1.exit
79 ; CHECK-NOT: br
80 ; The phi is updated.
81 ; CHECK: phi i1 [ %{{[^,]*}}, %land.rhs.i ]
82 ; CHECK-NEXT: ret
8390 }
8491
8592
+0
-73
test/Transforms/MergeICmps/tuple-four-int8.ll less more
None ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
1
2 ; This is a more involved test: clang generates this weird pattern for
3 ; tuple. Right now we skip the entry block
4 ; (which defines the base pointer for other blocks) and the last one (which
5 ; does not have the expected structure). Only middle blocks (bytes [1,2]) are
6 ; merged.
7
8 %"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
9 %"struct.std::_Tuple_impl" = type { %"struct.std::_Tuple_impl.0", %"struct.std::_Head_base.6" }
10 %"struct.std::_Tuple_impl.0" = type { %"struct.std::_Tuple_impl.1", %"struct.std::_Head_base.5" }
11 %"struct.std::_Tuple_impl.1" = type { %"struct.std::_Tuple_impl.2", %"struct.std::_Head_base.4" }
12 %"struct.std::_Tuple_impl.2" = type { %"struct.std::_Head_base" }
13 %"struct.std::_Head_base" = type { i8 }
14 %"struct.std::_Head_base.4" = type { i8 }
15 %"struct.std::_Head_base.5" = type { i8 }
16 %"struct.std::_Head_base.6" = type { i8 }
17
18 define zeroext i1 @opeq(
19 %"class.std::tuple"* nocapture readonly dereferenceable(4) %a,
20 %"class.std::tuple"* nocapture readonly dereferenceable(4) %b) local_unnamed_addr #1 {
21 entry:
22 %0 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %a, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
23 %add.ptr.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 3
24 %1 = load i8, i8* %add.ptr.i.i.i.i.i, align 1
25 %2 = getelementptr inbounds %"class.std::tuple", %"class.std::tuple"* %b, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0
26 %add.ptr.i.i.i6.i.i = getelementptr inbounds i8, i8* %2, i64 3
27 %3 = load i8, i8* %add.ptr.i.i.i6.i.i, align 1
28 %cmp.i.i = icmp eq i8 %1, %3
29 br i1 %cmp.i.i, label %land.rhs.i.i, label %opeq.exit
30
31 land.rhs.i.i:
32 %add.ptr.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 2
33 %4 = load i8, i8* %add.ptr.i.i.i.i.i.i, align 1
34 %add.ptr.i.i.i6.i.i.i = getelementptr inbounds i8, i8* %2, i64 2
35 %5 = load i8, i8* %add.ptr.i.i.i6.i.i.i, align 1
36 %cmp.i.i.i = icmp eq i8 %4, %5
37 br i1 %cmp.i.i.i, label %land.rhs.i.i.i, label %opeq.exit
38
39 land.rhs.i.i.i:
40 %add.ptr.i.i.i.i.i.i.i = getelementptr inbounds i8, i8* %0, i64 1
41 %6 = load i8, i8* %add.ptr.i.i.i.i.i.i.i, align 1
42 %add.ptr.i.i.i6.i.i.i.i = getelementptr inbounds i8, i8* %2, i64 1
43 %7 = load i8, i8* %add.ptr.i.i.i6.i.i.i.i, align 1
44 %cmp.i.i.i.i = icmp eq i8 %6, %7
45 br i1 %cmp.i.i.i.i, label %land.rhs.i.i.i.i, label %opeq.exit
46
47 land.rhs.i.i.i.i:
48 %8 = load i8, i8* %0, align 1
49 %9 = load i8, i8* %2, align 1
50 %cmp.i.i.i.i.i = icmp eq i8 %8, %9
51 br label %opeq.exit
52
53 opeq.exit:
54 %10 = phi i1 [ false, %entry ], [ false, %land.rhs.i.i ], [ false, %land.rhs.i.i.i ], [ %cmp.i.i.i.i.i, %land.rhs.i.i.i.i ]
55 ret i1 %10
56 ; CHECK-LABEL: @opeq(
57 ; The entry block is kept as is, but the next block is now the merged comparison
58 ; block for bytes [1,2] or the block for the head.
59 ; CHECK: entry
60 ; CHECK: br i1 %cmp.i.i, label %land.rhs.i.i.i{{(.i)?}}, label %opeq.exit
61 ; The two 1 byte loads and compares at offset 1 are replaced with a single
62 ; 2-byte memcmp.
63 ; CHECK: land.rhs.i.i.i
64 ; CHECK: @memcmp({{.*}}2)
65 ; CHECK: icmp eq {{.*}} 0
66 ; In the end we have three blocks.
67 ; CHECK: phi i1
68 ; CHECK-SAME %entry
69 ; CHECK-SAME %land.rhs.i.i.i.i
70 ; CHECK-SAME %land.rhs.i.i.i
71 }
72
+0
-30
test/Transforms/MergeICmps/volatile.ll less more
None ; RUN: opt -mergeicmps -S -o - %s | FileCheck %s
1
2 %"struct.std::pair" = type { i32, i32 }
3
4 define zeroext i1 @opeq(
5 %"struct.std::pair"* nocapture readonly dereferenceable(8) %a,
6 %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 {
7 entry:
8 %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0
9 %0 = load i32, i32* %first.i, align 4
10 %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0
11 %1 = load i32, i32* %first1.i, align 4
12 %cmp.i = icmp eq i32 %0, %1
13 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit
14
15 land.rhs.i:
16 %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1
17 %2 = load volatile i32, i32* %second.i, align 4
18 %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1
19 %3 = load i32, i32* %second2.i, align 4
20 %cmp3.i = icmp eq i32 %2, %3
21 br label %opeq1.exit
22
23 opeq1.exit:
24 %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ]
25 ret i1 %4
26 ; CHECK-LABEL: @opeq(
27 ; CHECK-NOT: memcmp
28 }
29