llvm.org GIT mirror llvm / cf82dc3
Rewrite the main DSE loop to be written in terms of reasoning about pairs of AA::Location's instead of looking for MemDep's "Def" predicate. This is more powerful and general, handling memset/memcpy/store all uniformly, and implementing PR8701 and probably obsoleting parts of memcpyoptimizer. This also fixes an obscure bug with init.trampoline and i8 stores, but I'm not surprised it hasn't been hit yet. Enhancing init.trampoline to carry the size that it stores would allow DSE to be much more aggressive about optimizing them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120406 91177308-0d34-0410-b5e6-96231b3b80d8 Chris Lattner 8 years ago
3 changed file(s) with 147 addition(s) and 82 deletion(s). Raw diff Collapse all Expand all
4646 /// pair holds the instruction that clobbers the memory. For example,
4747 /// this occurs when we see a may-aliased store to the memory location we
4848 /// care about.
49 ///
50 /// A dependence query on the first instruction of the entry block will
51 /// return a clobber(self) result.
4952 Clobber,
5053
5154 /// Def - This is a dependence on the specified instruction which
110110 return false;
111111 }
112112
113 /// getLocForWrite - Return a Location stored to by the specified instruction.
114 static AliasAnalysis::Location
115 getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
116 if (StoreInst *SI = dyn_cast(Inst))
117 return AA.getLocation(SI);
118
119 if (MemIntrinsic *MI = dyn_cast(Inst)) {
120 // memcpy/memmove/memset.
121 AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
122 // If we don't have target data around, an unknown size in Location means
123 // that we should use the size of the pointee type. This isn't valid for
124 // memset/memcpy, which writes more than an i8.
125 if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
126 return AliasAnalysis::Location();
127 return Loc;
128 }
129
130 IntrinsicInst *II = dyn_cast(Inst);
131 if (II == 0) return AliasAnalysis::Location();
132
133 switch (II->getIntrinsicID()) {
134 default: return AliasAnalysis::Location(); // Unhandled intrinsic.
135 case Intrinsic::init_trampoline:
136 // If we don't have target data around, an unknown size in Location means
137 // that we should use the size of the pointee type. This isn't valid for
138 // init.trampoline, which writes more than an i8.
139 if (AA.getTargetData() == 0) return AliasAnalysis::Location();
140
141 // FIXME: We don't know the size of the trampoline, so we can't really
142 // handle it here.
143 return AliasAnalysis::Location(II->getArgOperand(0));
144 case Intrinsic::lifetime_end: {
145 uint64_t Len = cast(II->getArgOperand(0))->getZExtValue();
146 return AliasAnalysis::Location(II->getArgOperand(1), Len);
147 }
148 }
149 }
150
113151 /// isRemovable - If the value of this instruction and the memory it writes to
114152 /// is unused, may we delete this instruction?
115153 static bool isRemovable(Instruction *I) {
139177 }
140178 }
141179
142 /// getStoreSize - Return the length in bytes of the write by the clobbering
143 /// instruction. If variable or unknown, returns AliasAnalysis::UnknownSize.
144 static uint64_t getStoreSize(Instruction *I, const TargetData *TD) {
145 assert(hasMemoryWrite(I));
146 if (StoreInst *SI = dyn_cast(I)) {
147 if (!TD) return AliasAnalysis::UnknownSize;
148 return TD->getTypeStoreSize(SI->getOperand(0)->getType());
149 }
150
151 Value *Len;
152 if (MemIntrinsic *MI = dyn_cast(I)) {
153 Len = MI->getLength();
154 } else {
155 IntrinsicInst *II = cast(I);
156 switch (II->getIntrinsicID()) {
157 default: assert(false && "Unexpected intrinsic!");
158 case Intrinsic::init_trampoline:
159 return AliasAnalysis::UnknownSize;
160 case Intrinsic::lifetime_end:
161 Len = II->getArgOperand(0);
162 break;
163 }
164 }
165 if (ConstantInt *LenCI = dyn_cast(Len))
166 if (!LenCI->isAllOnesValue())
167 return LenCI->getZExtValue();
168 return AliasAnalysis::UnknownSize;
169 }
170
171 /// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
172 /// greater than or equal to the store in I2. This returns false if we don't
173 /// know.
174 ///
175 static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
176 const TargetData *TD) {
177 const Type *I1Ty = getPointerOperand(I1)->getType();
178 const Type *I2Ty = getPointerOperand(I2)->getType();
179
180 // Exactly the same type, must have exactly the same size.
181 if (I1Ty == I2Ty) return true;
182
183 uint64_t I1Size = getStoreSize(I1, TD);
184 uint64_t I2Size = getStoreSize(I2, TD);
185
186 return I1Size != AliasAnalysis::UnknownSize &&
187 I2Size != AliasAnalysis::UnknownSize &&
188 I1Size >= I2Size;
189 }
190
180 /// isCompleteOverwrite - Return true if a store to the 'Later' location
181 /// completely overwrites a store to the 'Earlier' location.
182 static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
183 const AliasAnalysis::Location &Earlier,
184 AliasAnalysis &AA, const TargetData *TD) {
185 const Value *P1 = Later.Ptr->stripPointerCasts();
186 const Value *P2 = Earlier.Ptr->stripPointerCasts();
187
188 // Make sure that the start pointers are the same.
189 if (P1 != P2)
190 return false;
191
192 // If we have no TargetData information around, then the size of the store is
193 // inferrable from the pointee type. If they are the same type, then we know
194 // that the store is safe.
195 if (TD == 0)
196 return Later.Ptr->getType() == Earlier.Ptr->getType();
197
198
199 // Make sure that the Later size is >= the Earlier size.
200 if (Later.Size < Earlier.Size)
201 return false;
202
203 return true;
204 }
191205
192206 bool DSE::runOnBasicBlock(BasicBlock &BB) {
193207 MemoryDependenceAnalysis &MD = getAnalysis();
214228
215229 // Ignore non-local store liveness.
216230 // FIXME: cross-block DSE would be fun. :)
217 if (InstDep.isNonLocal()) continue;
231 if (InstDep.isNonLocal() ||
232 // Ignore self dependence, which happens in the entry block of the
233 // function.
234 InstDep.getInst() == Inst)
235 continue;
218236
219237 // If we're storing the same value back to a pointer that we just
220238 // loaded from, then the store can be removed.
239257 }
240258 }
241259
242 if (!InstDep.isDef()) {
260 // Figure out what location is being stored to.
261 AliasAnalysis::Location Loc = getLocForWrite(Inst, AA);
262
263 // If we didn't get a useful location, fail.
264 if (Loc.Ptr == 0)
265 continue;
266
267 while (!InstDep.isNonLocal()) {
268 // Get the memory clobbered by the instruction we depend on. MemDep will
269 // skip any instructions that 'Loc' clearly doesn't interact with. If we
270 // end up depending on a may- or must-aliased load, then we can't optimize
271 // away the store and we bail out. However, if we depend on on something
272 // that overwrites the memory location we *can* potentially optimize it.
273 //
274 // Find out what memory location the dependant instruction stores.
275 Instruction *DepWrite = InstDep.getInst();
276 AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, AA);
277 // If we didn't get a useful location, or if it isn't a size, bail out.
278 if (DepLoc.Ptr == 0)
279 break;
280
281 // If we find a removable write that is completely obliterated by the
282 // store to 'Loc' then we can remove it.
283 if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, AA, TD)) {
284 // Delete the store and now-dead instructions that feed it.
285 DeleteDeadInstruction(DepWrite);
286 ++NumFastStores;
287 MadeChange = true;
288
289 // DeleteDeadInstruction can delete the current instruction in loop
290 // cases, reset BBI.
291 BBI = Inst;
292 if (BBI != BB.begin())
293 --BBI;
294 break;
295 }
296
243297 // If this is a may-aliased store that is clobbering the store value, we
244298 // can keep searching past it for another must-aliased pointer that stores
245299 // to the same location. For example, in:
248302 // store -> P
249303 // we can remove the first store to P even though we don't know if P and Q
250304 // alias.
251 if (StoreInst *SI = dyn_cast(Inst)) {
252 AliasAnalysis::Location Loc = AA.getLocation(SI);
253 while (InstDep.isClobber() && InstDep.getInst() != &BB.front()) {
254 // Can't look past this instruction if it might read 'Loc'.
255 if (AA.getModRefInfo(InstDep.getInst(), Loc) & AliasAnalysis::Ref)
256 break;
257
258 InstDep = MD.getPointerDependencyFrom(Loc, false,
259 InstDep.getInst(), &BB);
260 }
261 }
262 }
263
264 // If this is a store-store dependence, then the previous store is dead so
265 // long as this store is at least as big as it.
266 if (InstDep.isDef() && hasMemoryWrite(InstDep.getInst())) {
267 Instruction *DepStore = InstDep.getInst();
268 if (!isRemovable(DepStore) ||
269 !isStoreAtLeastAsWideAs(Inst, DepStore, TD))
270 continue;
305 if (DepWrite == &BB.front()) break;
306
307 // Can't look past this instruction if it might read 'Loc'.
308 if (AA.getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
309 break;
271310
272 // Delete the store and now-dead instructions that feed it.
273 DeleteDeadInstruction(DepStore);
274 ++NumFastStores;
275 MadeChange = true;
276
277 // DeleteDeadInstruction can delete the current instruction in loop
278 // cases, reset BBI.
279 BBI = Inst;
280 if (BBI != BB.begin())
281 --BBI;
282 continue;
311 InstDep = MD.getPointerDependencyFrom(Loc, false, DepWrite, &BB);
283312 }
284313 }
285314
176176 ; CHECK-NEXT: ret void
177177 }
178178
179
180 ; PR8701
181
182 ;; Fully dead overwrite of memcpy.
183 define void @test15(i8* %P, i8* %Q) nounwind ssp {
184 tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
185 tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
186 ret void
187 ; CHECK: @test15
188 ; CHECK-NEXT: call void @llvm.memcpy
189 ; CHECK-NEXT: ret
190 }
191
192 ;; Full overwrite of smaller memcpy.
193 define void @test16(i8* %P, i8* %Q) nounwind ssp {
194 tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
195 tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
196 ret void
197 ; CHECK: @test16
198 ; CHECK-NEXT: call void @llvm.memcpy
199 ; CHECK-NEXT: ret
200 }
201
202 ;; Overwrite of memset by memcpy.
203 define void @test17(i8* %P, i8* %Q) nounwind ssp {
204 tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
205 tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
206 ret void
207 ; CHECK: @test17
208 ; CHECK-NEXT: call void @llvm.memcpy
209 ; CHECK-NEXT: ret
210 }
211