llvm.org GIT mirror llvm / 9dfccd2
[esan|wset] Optionally assume intra-cache-line accesses Summary: Adds an option -esan-assume-intra-cache-line which causes esan to assume that a single memory access touches just one cache line, even if it is not aligned, for better performance at a potential accuracy cost. Experiments show that the performance difference can be 2x or more, and accuracy loss is typically negligible, so we turn this on by default. This currently applies just to the working set tool. Reviewers: aizatsky Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits Differential Revision: http://reviews.llvm.org/D20978 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271743 91177308-0d34-0410-b5e6-96231b3b80d8 Derek Bruening 4 years ago
3 changed file(s) with 185 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
5656 "esan-instrument-memintrinsics", cl::init(true),
5757 cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
5858
59 // Experiments show that the performance difference can be 2x or more,
60 // and accuracy loss is typically negligible, so we turn this on by default.
61 static cl::opt ClAssumeIntraCacheLine(
62 "esan-assume-intra-cache-line", cl::init(true),
63 cl::desc("Assume each memory access touches just one cache line, for "
64 "better performance but with a potential loss of accuracy."),
65 cl::Hidden);
66
5967 STATISTIC(NumInstrumentedLoads, "Number of instrumented loads");
6068 STATISTIC(NumInstrumentedStores, "Number of instrumented stores");
6169 STATISTIC(NumFastpaths, "Number of instrumented fastpaths");
6472 STATISTIC(NumIgnoredStructs, "Number of ignored structs");
6573 STATISTIC(NumIgnoredGEPs, "Number of ignored GEP instructions");
6674 STATISTIC(NumInstrumentedGEPs, "Number of instrumented GEP instructions");
75 STATISTIC(NumAssumedIntraCacheLine,
76 "Number of accesses assumed to be intra-cache-line");
6777
6878 static const uint64_t EsanCtorAndDtorPriority = 0;
6979 static const char *const EsanModuleCtorName = "esan.module_ctor";
714724 // (and our shadow memory setup assumes 64-byte cache lines).
715725 assert(TypeSize <= 64);
716726 if (!(TypeSize == 8 ||
717 (Alignment % (TypeSize / 8)) == 0))
718 return false;
727 (Alignment % (TypeSize / 8)) == 0)) {
728 if (ClAssumeIntraCacheLine)
729 ++NumAssumedIntraCacheLine;
730 else
731 return false;
732 }
719733
720734 // We inline instrumentation to set the corresponding shadow bits for
721735 // each cache line touched by the application. Here we handle a single
9090 }
9191
9292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
93 ; Not guaranteed to be intra-cache-line
93 ; Not guaranteed to be intra-cache-line, but our defaults are to
94 ; assume they are:
9495
9596 define i16 @unaligned2(i16* %a) {
9697 entry:
9798 %tmp1 = load i16, i16* %a, align 1
9899 ret i16 %tmp1
99 ; CHECK: %0 = bitcast i16* %a to i8*
100 ; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0)
101 ; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1
100 ; CHECK: %0 = ptrtoint i16* %a to i64
101 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
102 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
103 ; CHECK-NEXT: %3 = lshr i64 %2, 6
104 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
105 ; CHECK-NEXT: %5 = load i8, i8* %4
106 ; CHECK-NEXT: %6 = and i8 %5, -127
107 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
108 ; CHECK-NEXT: br i1 %7, label %8, label %11
109 ; CHECK: %9 = or i8 %5, -127
110 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
111 ; CHECK-NEXT: store i8 %9, i8* %10
112 ; CHECK-NEXT: br label %11
113 ; CHECK: %tmp1 = load i16, i16* %a, align 1
102114 ; CHECK-NEXT: ret i16 %tmp1
103115 }
104116
106118 entry:
107119 %tmp1 = load i32, i32* %a, align 2
108120 ret i32 %tmp1
109 ; CHECK: %0 = bitcast i32* %a to i8*
110 ; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0)
111 ; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2
121 ; CHECK: %0 = ptrtoint i32* %a to i64
122 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
123 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
124 ; CHECK-NEXT: %3 = lshr i64 %2, 6
125 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
126 ; CHECK-NEXT: %5 = load i8, i8* %4
127 ; CHECK-NEXT: %6 = and i8 %5, -127
128 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
129 ; CHECK-NEXT: br i1 %7, label %8, label %11
130 ; CHECK: %9 = or i8 %5, -127
131 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
132 ; CHECK-NEXT: store i8 %9, i8* %10
133 ; CHECK-NEXT: br label %11
134 ; CHECK: %tmp1 = load i32, i32* %a, align 2
112135 ; CHECK-NEXT: ret i32 %tmp1
113136 }
114137
116139 entry:
117140 %tmp1 = load i64, i64* %a, align 4
118141 ret i64 %tmp1
119 ; CHECK: %0 = bitcast i64* %a to i8*
120 ; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0)
121 ; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4
142 ; CHECK: %0 = ptrtoint i64* %a to i64
143 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
144 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
145 ; CHECK-NEXT: %3 = lshr i64 %2, 6
146 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
147 ; CHECK-NEXT: %5 = load i8, i8* %4
148 ; CHECK-NEXT: %6 = and i8 %5, -127
149 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
150 ; CHECK-NEXT: br i1 %7, label %8, label %11
151 ; CHECK: %9 = or i8 %5, -127
152 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
153 ; CHECK-NEXT: store i8 %9, i8* %10
154 ; CHECK-NEXT: br label %11
155 ; CHECK: %tmp1 = load i64, i64* %a, align 4
122156 ; CHECK-NEXT: ret i64 %tmp1
123157 }
124158
0 ; Test EfficiencySanitizer working set instrumentation without aggressive
1 ; optimization flags.
2 ;
3 ; RUN: opt < %s -esan -esan-working-set -esan-assume-intra-cache-line=0 -S | FileCheck %s
4
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6 ; Intra-cache-line
7
8 define i8 @aligned1(i8* %a) {
9 entry:
10 %tmp1 = load i8, i8* %a, align 1
11 ret i8 %tmp1
12 ; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor
13 ; CHECK: %0 = ptrtoint i8* %a to i64
14 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
15 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
16 ; CHECK-NEXT: %3 = lshr i64 %2, 6
17 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
18 ; CHECK-NEXT: %5 = load i8, i8* %4
19 ; CHECK-NEXT: %6 = and i8 %5, -127
20 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
21 ; CHECK-NEXT: br i1 %7, label %8, label %11
22 ; CHECK: %9 = or i8 %5, -127
23 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
24 ; CHECK-NEXT: store i8 %9, i8* %10
25 ; CHECK-NEXT: br label %11
26 ; CHECK: %tmp1 = load i8, i8* %a, align 1
27 ; CHECK-NEXT: ret i8 %tmp1
28 }
29
30 define i16 @aligned2(i16* %a) {
31 entry:
32 %tmp1 = load i16, i16* %a, align 2
33 ret i16 %tmp1
34 ; CHECK: %0 = ptrtoint i16* %a to i64
35 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
36 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
37 ; CHECK-NEXT: %3 = lshr i64 %2, 6
38 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
39 ; CHECK-NEXT: %5 = load i8, i8* %4
40 ; CHECK-NEXT: %6 = and i8 %5, -127
41 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
42 ; CHECK-NEXT: br i1 %7, label %8, label %11
43 ; CHECK: %9 = or i8 %5, -127
44 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
45 ; CHECK-NEXT: store i8 %9, i8* %10
46 ; CHECK-NEXT: br label %11
47 ; CHECK: %tmp1 = load i16, i16* %a, align 2
48 ; CHECK-NEXT: ret i16 %tmp1
49 }
50
51 define i32 @aligned4(i32* %a) {
52 entry:
53 %tmp1 = load i32, i32* %a, align 4
54 ret i32 %tmp1
55 ; CHECK: %0 = ptrtoint i32* %a to i64
56 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
57 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
58 ; CHECK-NEXT: %3 = lshr i64 %2, 6
59 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
60 ; CHECK-NEXT: %5 = load i8, i8* %4
61 ; CHECK-NEXT: %6 = and i8 %5, -127
62 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
63 ; CHECK-NEXT: br i1 %7, label %8, label %11
64 ; CHECK: %9 = or i8 %5, -127
65 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
66 ; CHECK-NEXT: store i8 %9, i8* %10
67 ; CHECK-NEXT: br label %11
68 ; CHECK: %tmp1 = load i32, i32* %a, align 4
69 ; CHECK-NEXT: ret i32 %tmp1
70 }
71
72 define i64 @aligned8(i64* %a) {
73 entry:
74 %tmp1 = load i64, i64* %a, align 8
75 ret i64 %tmp1
76 ; CHECK: %0 = ptrtoint i64* %a to i64
77 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
78 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
79 ; CHECK-NEXT: %3 = lshr i64 %2, 6
80 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
81 ; CHECK-NEXT: %5 = load i8, i8* %4
82 ; CHECK-NEXT: %6 = and i8 %5, -127
83 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
84 ; CHECK-NEXT: br i1 %7, label %8, label %11
85 ; CHECK: %9 = or i8 %5, -127
86 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
87 ; CHECK-NEXT: store i8 %9, i8* %10
88 ; CHECK-NEXT: br label %11
89 ; CHECK: %tmp1 = load i64, i64* %a, align 8
90 ; CHECK-NEXT: ret i64 %tmp1
91 }
92
93 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
94 ; Not guaranteed to be intra-cache-line
95
96 define i16 @unaligned2(i16* %a) {
97 entry:
98 %tmp1 = load i16, i16* %a, align 1
99 ret i16 %tmp1
100 ; CHECK: %0 = bitcast i16* %a to i8*
101 ; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0)
102 ; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1
103 ; CHECK-NEXT: ret i16 %tmp1
104 }
105
106 define i32 @unaligned4(i32* %a) {
107 entry:
108 %tmp1 = load i32, i32* %a, align 2
109 ret i32 %tmp1
110 ; CHECK: %0 = bitcast i32* %a to i8*
111 ; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0)
112 ; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2
113 ; CHECK-NEXT: ret i32 %tmp1
114 }
115
116 define i64 @unaligned8(i64* %a) {
117 entry:
118 %tmp1 = load i64, i64* %a, align 4
119 ret i64 %tmp1
120 ; CHECK: %0 = bitcast i64* %a to i8*
121 ; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0)
122 ; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4
123 ; CHECK-NEXT: ret i64 %tmp1
124 }