llvm.org GIT mirror llvm / 6807506
[esan|wset] EfficiencySanitizer working set tool fastpath Summary: Adds fastpath instrumentation for esan's working set tool. The instrumentation for an intra-cache-line load or store consists of an inlined write to shadow memory bits for the corresponding cache line. Adds a basic test for this instrumentation. Reviewers: aizatsky Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits Differential Revision: http://reviews.llvm.org/D20483 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@270640 91177308-0d34-0410-b5e6-96231b3b80d8 Derek Bruening 4 years ago
3 changed file(s) with 269 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
122122 enum Type {
123123 ESAN_None = 0,
124124 ESAN_CacheFrag,
125 ESAN_WorkingSet,
125126 } ToolType;
126127 };
127128
4141 static cl::opt
4242 ClToolCacheFrag("esan-cache-frag", cl::init(false),
4343 cl::desc("Detect data cache fragmentation"), cl::Hidden);
44 static cl::opt
45 ClToolWorkingSet("esan-working-set", cl::init(false),
46 cl::desc("Measure the working set size"), cl::Hidden);
4447 // Each new tool will get its own opt flag here.
4548 // These are converted to EfficiencySanitizerOptions for use
4649 // in the code.
6467 static const char *const EsanInitName = "__esan_init";
6568 static const char *const EsanExitName = "__esan_exit";
6669
70 // We must keep these Shadow* constants consistent with the esan runtime.
71 // FIXME: Try to place these shadow constants, the names of the __esan_*
72 // interface functions, and the ToolType enum into a header shared between
73 // llvm and compiler-rt.
74 static const uint64_t ShadowMask = 0x00000fffffffffffull;
75 static const uint64_t ShadowOffs[3] = { // Indexed by scale
76 0x0000130000000000ull,
77 0x0000220000000000ull,
78 0x0000440000000000ull,
79 };
80 // This array is indexed by the ToolType enum.
81 static const int ShadowScale[] = {
82 0, // ESAN_None.
83 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
84 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
85 };
86
6787 namespace {
6888
6989 static EfficiencySanitizerOptions
7090 OverrideOptionsFromCL(EfficiencySanitizerOptions Options) {
7191 if (ClToolCacheFrag)
7292 Options.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
93 else if (ClToolWorkingSet)
94 Options.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
7395
7496 // Direct opt invocation with no params will have the default ESAN_None.
7597 // We run the default tool in that case.
99121 bool instrumentMemIntrinsic(MemIntrinsic *MI);
100122 bool shouldIgnoreMemoryAccess(Instruction *I);
101123 int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
124 Value *appToShadow(Value *Shadow, IRBuilder<> &IRB);
102125 bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsStore,
103126 Value *Addr, unsigned Alignment);
104127 // Each tool has its own fastpath routine:
105128 bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL,
106129 Value *Addr, unsigned Alignment);
130 bool instrumentFastpathWorkingSet(Instruction *I, const DataLayout &DL,
131 Value *Addr, unsigned Alignment);
107132
108133 EfficiencySanitizerOptions Options;
109134 LLVMContext *Ctx;
225250 return true;
226251 }
227252
253 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
254 // Shadow = ((App & Mask) + Offs) >> Scale
255 Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask));
256 uint64_t Offs;
257 int Scale = ShadowScale[Options.ToolType];
258 if (Scale <= 2)
259 Offs = ShadowOffs[Scale];
260 else
261 Offs = ShadowOffs[0] << Scale;
262 Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
263 if (Scale > 0)
264 Shadow = IRB.CreateLShr(Shadow, Scale);
265 return Shadow;
266 }
267
228268 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction *I) {
229269 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
230270 // We'd like to know about cache fragmentation in vtable accesses and
231271 // constant data references, so we do not currently ignore anything.
232272 return false;
273 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
274 // TODO: the instrumentation disturbs the data layout on the stack, so we
275 // may want to add an option to ignore stack references (if we can
276 // distinguish them) to reduce overhead.
233277 }
234278 // TODO(bruening): future tools will be returning true for some cases.
235279 return false;
308352 Type *OrigTy = cast(Addr->getType())->getElementType();
309353 const uint32_t TypeSizeBytes = DL.getTypeStoreSizeInBits(OrigTy) / 8;
310354 Value *OnAccessFunc = nullptr;
355
356 // Convert 0 to the default alignment.
357 if (Alignment == 0)
358 Alignment = DL.getPrefTypeAlignment(OrigTy);
359
311360 if (IsStore)
312361 NumInstrumentedStores++;
313362 else
383432 Value *Addr, unsigned Alignment) {
384433 if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) {
385434 return instrumentFastpathCacheFrag(I, DL, Addr, Alignment);
435 } else if (Options.ToolType == EfficiencySanitizerOptions::ESAN_WorkingSet) {
436 return instrumentFastpathWorkingSet(I, DL, Addr, Alignment);
386437 }
387438 return false;
388439 }
394445 // TODO(bruening): implement a fastpath for aligned accesses
395446 return false;
396447 }
448
449 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
450 Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) {
451 assert(ShadowScale[Options.ToolType] == 6); // The code below assumes this
452 IRBuilder<> IRB(I);
453 Type *OrigTy = cast(Addr->getType())->getElementType();
454 const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
455 // Bail to the slowpath if the access might touch multiple cache lines.
456 // An access aligned to its size is guaranteed to be intra-cache-line.
457 // getMemoryAccessFuncIndex has already ruled out a size larger than 16
458 // and thus larger than a cache line for platforms this tool targets
459 // (and our shadow memory setup assumes 64-byte cache lines).
460 assert(TypeSize <= 64);
461 if (!(TypeSize == 8 ||
462 (Alignment % (TypeSize / 8)) == 0))
463 return false;
464
465 // We inline instrumentation to set the corresponding shadow bits for
466 // each cache line touched by the application. Here we handle a single
467 // load or store where we've already ruled out the possibility that it
468 // might touch more than one cache line and thus we simply update the
469 // shadow memory for a single cache line.
470 // Our shadow memory model is fine with races when manipulating shadow values.
471 // We generate the following code:
472 //
473 // const char BitMask = 0x81;
474 // char *ShadowAddr = appToShadow(AppAddr);
475 // if ((*ShadowAddr & BitMask) != BitMask)
476 // *ShadowAddr |= Bitmask;
477 //
478 Value *AddrPtr = IRB.CreatePointerCast(Addr, IntptrTy);
479 Value *ShadowPtr = appToShadow(AddrPtr, IRB);
480 Type *ShadowTy = IntegerType::get(*Ctx, 8U);
481 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
482 // The bottom bit is used for the current sampling period's working set.
483 // The top bit is used for the total working set. We set both on each
484 // memory access, if they are not already set.
485 Value *ValueMask = ConstantInt::get(ShadowTy, 0x81); // 10000001B
486
487 Value *OldValue = IRB.CreateLoad(IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
488 // The AND and CMP will be turned into a TEST instruction by the compiler.
489 Value *Cmp = IRB.CreateICmpNE(IRB.CreateAnd(OldValue, ValueMask), ValueMask);
490 TerminatorInst *CmpTerm = SplitBlockAndInsertIfThen(Cmp, I, false);
491 // FIXME: do I need to call SetCurrentDebugLocation?
492 IRB.SetInsertPoint(CmpTerm);
493 // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
494 // which are used by the runtime library.
495 Value *NewVal = IRB.CreateOr(OldValue, ValueMask);
496 IRB.CreateStore(NewVal, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
497 IRB.SetInsertPoint(I);
498
499 return true;
500 }
0 ; Test basic EfficiencySanitizer working set instrumentation.
1 ;
2 ; RUN: opt < %s -esan -esan-working-set -S | FileCheck %s
3
4 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5 ; Intra-cache-line
6
7 define i8 @aligned1(i8* %a) {
8 entry:
9 %tmp1 = load i8, i8* %a, align 1
10 ret i8 %tmp1
11 ; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor
12 ; CHECK: %0 = ptrtoint i8* %a to i64
13 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
14 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
15 ; CHECK-NEXT: %3 = lshr i64 %2, 6
16 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
17 ; CHECK-NEXT: %5 = load i8, i8* %4
18 ; CHECK-NEXT: %6 = and i8 %5, -127
19 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
20 ; CHECK-NEXT: br i1 %7, label %8, label %11
21 ; CHECK: %9 = or i8 %5, -127
22 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
23 ; CHECK-NEXT: store i8 %9, i8* %10
24 ; CHECK-NEXT: br label %11
25 ; CHECK: %tmp1 = load i8, i8* %a, align 1
26 ; CHECK-NEXT: ret i8 %tmp1
27 }
28
29 define i16 @aligned2(i16* %a) {
30 entry:
31 %tmp1 = load i16, i16* %a, align 2
32 ret i16 %tmp1
33 ; CHECK: %0 = ptrtoint i16* %a to i64
34 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
35 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
36 ; CHECK-NEXT: %3 = lshr i64 %2, 6
37 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
38 ; CHECK-NEXT: %5 = load i8, i8* %4
39 ; CHECK-NEXT: %6 = and i8 %5, -127
40 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
41 ; CHECK-NEXT: br i1 %7, label %8, label %11
42 ; CHECK: %9 = or i8 %5, -127
43 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
44 ; CHECK-NEXT: store i8 %9, i8* %10
45 ; CHECK-NEXT: br label %11
46 ; CHECK: %tmp1 = load i16, i16* %a, align 2
47 ; CHECK-NEXT: ret i16 %tmp1
48 }
49
50 define i32 @aligned4(i32* %a) {
51 entry:
52 %tmp1 = load i32, i32* %a, align 4
53 ret i32 %tmp1
54 ; CHECK: %0 = ptrtoint i32* %a to i64
55 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
56 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
57 ; CHECK-NEXT: %3 = lshr i64 %2, 6
58 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
59 ; CHECK-NEXT: %5 = load i8, i8* %4
60 ; CHECK-NEXT: %6 = and i8 %5, -127
61 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
62 ; CHECK-NEXT: br i1 %7, label %8, label %11
63 ; CHECK: %9 = or i8 %5, -127
64 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
65 ; CHECK-NEXT: store i8 %9, i8* %10
66 ; CHECK-NEXT: br label %11
67 ; CHECK: %tmp1 = load i32, i32* %a, align 4
68 ; CHECK-NEXT: ret i32 %tmp1
69 }
70
71 define i64 @aligned8(i64* %a) {
72 entry:
73 %tmp1 = load i64, i64* %a, align 8
74 ret i64 %tmp1
75 ; CHECK: %0 = ptrtoint i64* %a to i64
76 ; CHECK-NEXT: %1 = and i64 %0, 17592186044415
77 ; CHECK-NEXT: %2 = add i64 %1, 1337006139375616
78 ; CHECK-NEXT: %3 = lshr i64 %2, 6
79 ; CHECK-NEXT: %4 = inttoptr i64 %3 to i8*
80 ; CHECK-NEXT: %5 = load i8, i8* %4
81 ; CHECK-NEXT: %6 = and i8 %5, -127
82 ; CHECK-NEXT: %7 = icmp ne i8 %6, -127
83 ; CHECK-NEXT: br i1 %7, label %8, label %11
84 ; CHECK: %9 = or i8 %5, -127
85 ; CHECK-NEXT: %10 = inttoptr i64 %3 to i8*
86 ; CHECK-NEXT: store i8 %9, i8* %10
87 ; CHECK-NEXT: br label %11
88 ; CHECK: %tmp1 = load i64, i64* %a, align 8
89 ; CHECK-NEXT: ret i64 %tmp1
90 }
91
92 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
93 ; Not guaranteed to be intra-cache-line
94
95 define i16 @unaligned2(i16* %a) {
96 entry:
97 %tmp1 = load i16, i16* %a, align 1
98 ret i16 %tmp1
99 ; CHECK: %0 = bitcast i16* %a to i8*
100 ; CHECK-NEXT: call void @__esan_unaligned_load2(i8* %0)
101 ; CHECK-NEXT: %tmp1 = load i16, i16* %a, align 1
102 ; CHECK-NEXT: ret i16 %tmp1
103 }
104
105 define i32 @unaligned4(i32* %a) {
106 entry:
107 %tmp1 = load i32, i32* %a, align 2
108 ret i32 %tmp1
109 ; CHECK: %0 = bitcast i32* %a to i8*
110 ; CHECK-NEXT: call void @__esan_unaligned_load4(i8* %0)
111 ; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 2
112 ; CHECK-NEXT: ret i32 %tmp1
113 }
114
115 define i64 @unaligned8(i64* %a) {
116 entry:
117 %tmp1 = load i64, i64* %a, align 4
118 ret i64 %tmp1
119 ; CHECK: %0 = bitcast i64* %a to i8*
120 ; CHECK-NEXT: call void @__esan_unaligned_load8(i8* %0)
121 ; CHECK-NEXT: %tmp1 = load i64, i64* %a, align 4
122 ; CHECK-NEXT: ret i64 %tmp1
123 }
124
125 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
126 ; Ensure that esan converts intrinsics to calls:
127
128 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
129 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1)
130 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
131
132 define void @memCpyTest(i8* nocapture %x, i8* nocapture %y) {
133 entry:
134 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
135 ret void
136 ; CHECK: define void @memCpyTest
137 ; CHECK: call i8* @memcpy
138 ; CHECK: ret void
139 }
140
141 define void @memMoveTest(i8* nocapture %x, i8* nocapture %y) {
142 entry:
143 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false)
144 ret void
145 ; CHECK: define void @memMoveTest
146 ; CHECK: call i8* @memmove
147 ; CHECK: ret void
148 }
149
150 define void @memSetTest(i8* nocapture %x) {
151 entry:
152 tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
153 ret void
154 ; CHECK: define void @memSetTest
155 ; CHECK: call i8* @memset
156 ; CHECK: ret void
157 }
158
159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
160 ; Top-level:
161
162 ; CHECK: define internal void @esan.module_ctor()
163 ; CHECK: call void @__esan_init(i32 2, i64 ptrtoint (i64* @0 to i64))