llvm.org GIT mirror llvm / 3691522
Merging r168361, r168346 and r168227 into 3.2 branch release Merging r168361: Fix PR14132 and handle OOB loads speculated throuh PHI nodes. The issue is that we may end up with newly OOB loads when speculating a load into the predecessors of a PHI node, and this confuses the new integer splitting logic in some cases, triggering an assertion failure. In fact, the branch in question must be dead code as it loads from a too-narrow alloca. Add code to handle this gracefully and leave the requisite FIXMEs for both optimizing more aggressively and doing more to aid sanitizing invalid code which triggers these patterns. Merging r168346: ------------------------------------------------------------------------ Rework the rewriting of loads and stores for vector and integer allocas to properly handle the combinations of these with split integer loads and stores. This essentially replaces Evan's r168227 by refactoring the code in a different way, and trynig to mirror that refactoring in both the load and store sides of the rewriting. Generally speaking there was some really problematic duplicated code here that led to poorly founded assumptions and then subtle bugs. Now much of the code actually flows through and follows a more consistent style and logical path. There is still a tiny bit of duplication on the store side of things, but it is much less bad. This also changes the logic to never re-use a load or store instruction as that was simply too error prone in practice. I've added a few tests (one a reduction of the one in Evan's original patch, which happened to be the same as the report in PR14349). I'm going to look at adding a few more tests for things I found and fixed in passing (such as the volatile tests in the vectorizable predicate). This patch has survived bootstrap, and modulo one bugfix survived Duncan's test suite, but let me know if anything else explodes. Merging r168227: Teach SROA rewriteVectorizedStoreInst to handle cases when the loaded value is narrower than the stored value. rdar://12713675 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@168443 91177308-0d34-0410-b5e6-96231b3b80d8 Pawel Wodnicki 6 years ago
4 changed file(s) with 225 addition(s) and 165 deletion(s). Raw diff Collapse all Expand all
567567
568568 // Clamp the end offset to the end of the allocation. Note that this is
569569 // formulated to handle even the case where "BeginOffset + Size" overflows.
570 // NOTE! This may appear superficially to be something we could ignore
571 // entirely, but that is not so! There may be PHI-node uses where some
572 // instructions are dead but not others. We can't completely ignore the
573 // PHI node, and so have to record at least the information here.
570574 assert(AllocSize >= BeginOffset); // Established above.
571575 if (Size > AllocSize - BeginOffset) {
572576 DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
13811385 /// \brief A collection of instructions to delete.
13821386 /// We try to batch deletions to simplify code and make things a bit more
13831387 /// efficient.
1384 SmallVector DeadInsts;
1385
1386 /// \brief A set to prevent repeatedly marking an instruction split into many
1387 /// uses as dead. Only used to guard insertion into DeadInsts.
1388 SmallPtrSet DeadSplitInsts;
1388 SetVector > DeadInsts;
13891389
13901390 /// \brief Post-promotion worklist.
13911391 ///
15721572 do {
15731573 LoadInst *LI = Loads.pop_back_val();
15741574 LI->replaceAllUsesWith(NewPN);
1575 Pass.DeadInsts.push_back(LI);
1575 Pass.DeadInsts.insert(LI);
15761576 } while (!Loads.empty());
15771577
15781578 // Inject loads into all of the pred blocks.
17161716
17171717 DEBUG(dbgs() << " speculated to: " << *V << "\n");
17181718 LI->replaceAllUsesWith(V);
1719 Pass.DeadInsts.push_back(LI);
1719 Pass.DeadInsts.insert(LI);
17201720 }
17211721 }
17221722 };
21332133 } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
21342134 // Disable vector promotion when there are loads or stores of an FCA.
21352135 return false;
2136 } else if (!isa(I->U->getUser()) &&
2137 !isaInst>(I->U->getUser())) {
2136 } else if (LoadInst *LI = dyn_castInst>(I->U->getUser())) {
2137 if (LI->isVolatile())
2138 return false;
2139 } else if (StoreInst *SI = dyn_cast(I->U->getUser())) {
2140 if (SI->isVolatile())
2141 return false;
2142 } else {
21382143 return false;
21392144 }
21402145 }
22402245 static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
22412246 IntegerType *Ty, uint64_t Offset,
22422247 const Twine &Name) {
2248 DEBUG(dbgs() << " start: " << *V << "\n");
22432249 IntegerType *IntTy = cast(V->getType());
22442250 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
22452251 "Element extends past full value");
22462252 uint64_t ShAmt = 8*Offset;
22472253 if (DL.isBigEndian())
22482254 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2249 if (ShAmt)
2255 if (ShAmt) {
22502256 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2257 DEBUG(dbgs() << " shifted: " << *V << "\n");
2258 }
22512259 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
22522260 "Cannot extract to a larger integer!");
2253 if (Ty != IntTy)
2261 if (Ty != IntTy) {
22542262 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2263 DEBUG(dbgs() << " trunced: " << *V << "\n");
2264 }
22552265 return V;
22562266 }
22572267
22612271 IntegerType *Ty = cast(V->getType());
22622272 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
22632273 "Cannot insert a larger integer!");
2264 if (Ty != IntTy)
2274 DEBUG(dbgs() << " start: " << *V << "\n");
2275 if (Ty != IntTy) {
22652276 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2277 DEBUG(dbgs() << " extended: " << *V << "\n");
2278 }
22662279 assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
22672280 "Element store outside of alloca store");
22682281 uint64_t ShAmt = 8*Offset;
22692282 if (DL.isBigEndian())
22702283 ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2271 if (ShAmt)
2284 if (ShAmt) {
22722285 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2286 DEBUG(dbgs() << " shifted: " << *V << "\n");
2287 }
22732288
22742289 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
22752290 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
22762291 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2292 DEBUG(dbgs() << " masked: " << *Old << "\n");
22772293 V = IRB.CreateOr(Old, V, Name + ".insert");
2294 DEBUG(dbgs() << " inserted: " << *V << "\n");
22782295 }
22792296 return V;
22802297 }
24412458 void deleteIfTriviallyDead(Value *V) {
24422459 Instruction *I = cast(V);
24432460 if (isInstructionTriviallyDead(I))
2444 Pass.DeadInsts.push_back(I);
2445 }
2446
2447 bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
2448 Value *Result;
2461 Pass.DeadInsts.insert(I);
2462 }
2463
2464 Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
2465 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2466 getName(".load"));
24492467 if (LI.getType() == VecTy->getElementType() ||
24502468 BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
2451 Result = IRB.CreateExtractElement(
2452 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
2453 getIndex(IRB, BeginOffset), getName(".extract"));
2454 } else {
2455 Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2456 getName(".load"));
2457 }
2458 if (Result->getType() != LI.getType())
2459 Result = convertValue(TD, IRB, Result, LI.getType());
2460 LI.replaceAllUsesWith(Result);
2461 Pass.DeadInsts.push_back(&LI);
2462
2463 DEBUG(dbgs() << " to: " << *Result << "\n");
2464 return true;
2465 }
2466
2467 bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
2469 V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
2470 getName(".extract"));
2471 }
2472 return V;
2473 }
2474
2475 Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
24682476 assert(IntTy && "We cannot insert an integer to the alloca");
24692477 assert(!LI.isVolatile());
24702478 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
24722480 V = convertValue(TD, IRB, V, IntTy);
24732481 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
24742482 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2475 V = extractInteger(TD, IRB, V, cast(LI.getType()), Offset,
2476 getName(".extract"));
2477 LI.replaceAllUsesWith(V);
2478 Pass.DeadInsts.push_back(&LI);
2479 DEBUG(dbgs() << " to: " << *V << "\n");
2480 return true;
2483 if (Offset > 0 || EndOffset < NewAllocaEndOffset)
2484 V = extractInteger(TD, IRB, V, cast(LI.getType()), Offset,
2485 getName(".extract"));
2486 return V;
24812487 }
24822488
24832489 bool visitLoadInst(LoadInst &LI) {
24872493 IRBuilder<> IRB(&LI);
24882494
24892495 uint64_t Size = EndOffset - BeginOffset;
2490 if (Size < TD.getTypeStoreSize(LI.getType())) {
2496 bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
2497
2498 // If this memory access can be shown to *statically* extend outside the
2499 // bounds of the original allocation it's behavior is undefined. Rather
2500 // than trying to transform it, just replace it with undef.
2501 // FIXME: We should do something more clever for functions being
2502 // instrumented by asan.
2503 // FIXME: Eventually, once ASan and friends can flush out bugs here, this
2504 // should be transformed to a load of null making it unreachable.
2505 uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
2506 if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
2507 LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
2508 Pass.DeadInsts.insert(&LI);
2509 deleteIfTriviallyDead(OldOp);
2510 DEBUG(dbgs() << " to: undef!!\n");
2511 return true;
2512 }
2513
2514 Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
2515 : LI.getType();
2516 bool IsPtrAdjusted = false;
2517 Value *V;
2518 if (VecTy) {
2519 V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
2520 } else if (IntTy && LI.getType()->isIntegerTy()) {
2521 V = rewriteIntegerLoad(IRB, LI);
2522 } else if (BeginOffset == NewAllocaBeginOffset &&
2523 canConvertValue(TD, NewAllocaTy, LI.getType())) {
2524 V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2525 LI.isVolatile(), getName(".load"));
2526 } else {
2527 Type *LTy = TargetTy->getPointerTo();
2528 V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
2529 getPartitionTypeAlign(TargetTy),
2530 LI.isVolatile(), getName(".load"));
2531 IsPtrAdjusted = true;
2532 }
2533 V = convertValue(TD, IRB, V, TargetTy);
2534
2535 if (IsSplitIntLoad) {
24912536 assert(!LI.isVolatile());
24922537 assert(LI.getType()->isIntegerTy() &&
24932538 "Only integer type loads and stores are split");
24972542 assert(LI.getType()->getIntegerBitWidth() ==
24982543 TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
24992544 "Only alloca-wide loads can be split and recomposed");
2500 IntegerType *NarrowTy = Type::getIntNTy(LI.getContext(), Size * 8);
2501 bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
2502 canConvertValue(TD, NewAllocaTy, NarrowTy);
2503 Value *V;
25042545 // Move the insertion point just past the load so that we can refer to it.
25052546 IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
2506 if (IsConvertable)
2507 V = convertValue(TD, IRB,
2508 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2509 getName(".load")),
2510 NarrowTy);
2511 else
2512 V = IRB.CreateAlignedLoad(
2513 getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
2514 getPartitionTypeAlign(NarrowTy), getName(".load"));
25152547 // Create a placeholder value with the same type as LI to use as the
25162548 // basis for the new value. This allows us to replace the uses of LI with
25172549 // the computed value, and then replace the placeholder with LI, leaving
25232555 LI.replaceAllUsesWith(V);
25242556 Placeholder->replaceAllUsesWith(&LI);
25252557 delete Placeholder;
2526 if (Pass.DeadSplitInsts.insert(&LI))
2527 Pass.DeadInsts.push_back(&LI);
2528 DEBUG(dbgs() << " to: " << *V << "\n");
2529 return IsConvertable;
2530 }
2531
2532 if (VecTy)
2533 return rewriteVectorizedLoadInst(IRB, LI, OldOp);
2534 if (IntTy && LI.getType()->isIntegerTy())
2535 return rewriteIntegerLoad(IRB, LI);
2536
2537 if (BeginOffset == NewAllocaBeginOffset &&
2538 canConvertValue(TD, NewAllocaTy, LI.getType())) {
2539 Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2540 LI.isVolatile(), getName(".load"));
2541 Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
2542 LI.replaceAllUsesWith(NewV);
2543 Pass.DeadInsts.push_back(&LI);
2544
2545 DEBUG(dbgs() << " to: " << *NewLI << "\n");
2546 return !LI.isVolatile();
2547 }
2548
2549 assert(!IntTy && "Invalid load found with int-op widening enabled");
2550
2551 Value *NewPtr = getAdjustedAllocaPtr(IRB,
2552 LI.getPointerOperand()->getType());
2553 LI.setOperand(0, NewPtr);
2554 LI.setAlignment(getPartitionTypeAlign(LI.getType()));
2555 DEBUG(dbgs() << " to: " << LI << "\n");
2556
2558 } else {
2559 LI.replaceAllUsesWith(V);
2560 }
2561
2562 Pass.DeadInsts.insert(&LI);
25572563 deleteIfTriviallyDead(OldOp);
2558 return NewPtr == &NewAI && !LI.isVolatile();
2559 }
2560
2561 bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI,
2562 Value *OldOp) {
2563 Value *V = SI.getValueOperand();
2564 DEBUG(dbgs() << " to: " << *V << "\n");
2565 return !LI.isVolatile() && !IsPtrAdjusted;
2566 }
2567
2568 bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
2569 StoreInst &SI, Value *OldOp) {
25642570 if (V->getType() == ElementTy ||
25652571 BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
25662572 if (V->getType() != ElementTy)
25732579 V = convertValue(TD, IRB, V, VecTy);
25742580 }
25752581 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2576 Pass.DeadInsts.push_back(&SI);
2582 Pass.DeadInsts.insert(&SI);
25772583
25782584 (void)Store;
25792585 DEBUG(dbgs() << " to: " << *Store << "\n");
25802586 return true;
25812587 }
25822588
2583 bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
2589 bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
25842590 assert(IntTy && "We cannot extract an integer from the alloca");
25852591 assert(!SI.isVolatile());
2586 Value *V = SI.getValueOperand();
25872592 if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
25882593 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
25892594 getName(".oldload"));
25952600 }
25962601 V = convertValue(TD, IRB, V, NewAllocaTy);
25972602 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2598 Pass.DeadInsts.push_back(&SI);
2603 Pass.DeadInsts.insert(&SI);
25992604 (void)Store;
26002605 DEBUG(dbgs() << " to: " << *Store << "\n");
26012606 return true;
26072612 assert(OldOp == OldPtr);
26082613 IRBuilder<> IRB(&SI);
26092614
2610 if (VecTy)
2611 return rewriteVectorizedStoreInst(IRB, SI, OldOp);
2612 Type *ValueTy = SI.getValueOperand()->getType();
2615 Value *V = SI.getValueOperand();
2616
2617 // Strip all inbounds GEPs and pointer casts to try to dig out any root
2618 // alloca that should be re-examined after promoting this alloca.
2619 if (V->getType()->isPointerTy())
2620 if (AllocaInst *AI = dyn_cast(V->stripInBoundsOffsets()))
2621 Pass.PostPromotionWorklist.insert(AI);
26132622
26142623 uint64_t Size = EndOffset - BeginOffset;
2615 if (Size < TD.getTypeStoreSize(ValueTy)) {
2624 if (Size < TD.getTypeStoreSize(V->getType())) {
26162625 assert(!SI.isVolatile());
2617 assert(ValueTy->isIntegerTy() &&
2626 assert(V->getType()->isIntegerTy() &&
26182627 "Only integer type loads and stores are split");
2619 assert(ValueTy->getIntegerBitWidth() ==
2620 TD.getTypeStoreSizeInBits(ValueTy) &&
2628 assert(V->getType()->getIntegerBitWidth() ==
2629 TD.getTypeStoreSizeInBits(V->getType()) &&
26212630 "Non-byte-multiple bit width");
2622 assert(ValueTy->getIntegerBitWidth() ==
2631 assert(V->getType()->getIntegerBitWidth() ==
26232632 TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
26242633 "Only alloca-wide stores can be split and recomposed");
26252634 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
2626 Value *V = extractInteger(TD, IRB, SI.getValueOperand(), NarrowTy,
2627 BeginOffset, getName(".extract"));
2628 StoreInst *NewSI;
2629 bool IsConvertable = (BeginOffset - NewAllocaBeginOffset == 0) &&
2630 canConvertValue(TD, NarrowTy, NewAllocaTy);
2631 if (IsConvertable)
2632 NewSI = IRB.CreateAlignedStore(convertValue(TD, IRB, V, NewAllocaTy),
2633 &NewAI, NewAI.getAlignment());
2634 else
2635 NewSI = IRB.CreateAlignedStore(
2636 V, getAdjustedAllocaPtr(IRB, NarrowTy->getPointerTo()),
2637 getPartitionTypeAlign(NarrowTy));
2638 (void)NewSI;
2639 if (Pass.DeadSplitInsts.insert(&SI))
2640 Pass.DeadInsts.push_back(&SI);
2641
2642 DEBUG(dbgs() << " to: " << *NewSI << "\n");
2643 return IsConvertable;
2644 }
2645
2646 if (IntTy && ValueTy->isIntegerTy())
2647 return rewriteIntegerStore(IRB, SI);
2648
2649 // Strip all inbounds GEPs and pointer casts to try to dig out any root
2650 // alloca that should be re-examined after promoting this alloca.
2651 if (ValueTy->isPointerTy())
2652 if (AllocaInst *AI = dyn_cast(SI.getValueOperand()
2653 ->stripInBoundsOffsets()))
2654 Pass.PostPromotionWorklist.insert(AI);
2655
2635 V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
2636 getName(".extract"));
2637 }
2638
2639 if (VecTy)
2640 return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
2641 if (IntTy && V->getType()->isIntegerTy())
2642 return rewriteIntegerStore(IRB, V, SI);
2643
2644 StoreInst *NewSI;
26562645 if (BeginOffset == NewAllocaBeginOffset &&
2657 canConvertValue(TD, ValueTy, NewAllocaTy)) {
2658 Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
2659 StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
2660 SI.isVolatile());
2661 (void)NewSI;
2662 Pass.DeadInsts.push_back(&SI);
2663
2664 DEBUG(dbgs() << " to: " << *NewSI << "\n");
2665 return !SI.isVolatile();
2666 }
2667
2668 assert(!IntTy && "Invalid store found with int-op widening enabled");
2669
2670 Value *NewPtr = getAdjustedAllocaPtr(IRB,
2671 SI.getPointerOperand()->getType());
2672 SI.setOperand(1, NewPtr);
2673 SI.setAlignment(getPartitionTypeAlign(SI.getValueOperand()->getType()));
2674 DEBUG(dbgs() << " to: " << SI << "\n");
2675
2646 canConvertValue(TD, V->getType(), NewAllocaTy)) {
2647 V = convertValue(TD, IRB, V, NewAllocaTy);
2648 NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2649 SI.isVolatile());
2650 } else {
2651 Value *NewPtr = getAdjustedAllocaPtr(IRB, V->getType()->getPointerTo());
2652 NewSI = IRB.CreateAlignedStore(V, NewPtr,
2653 getPartitionTypeAlign(V->getType()),
2654 SI.isVolatile());
2655 }
2656 (void)NewSI;
2657 Pass.DeadInsts.insert(&SI);
26762658 deleteIfTriviallyDead(OldOp);
2677 return NewPtr == &NewAI && !SI.isVolatile();
2659
2660 DEBUG(dbgs() << " to: " << *NewSI << "\n");
2661 return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
26782662 }
26792663
26802664 bool visitMemSetInst(MemSetInst &II) {
26942678 }
26952679
26962680 // Record this instruction for deletion.
2697 if (Pass.DeadSplitInsts.insert(&II))
2698 Pass.DeadInsts.push_back(&II);
2681 Pass.DeadInsts.insert(&II);
26992682
27002683 Type *AllocaTy = NewAI.getAllocatedType();
27012684 Type *ScalarTy = AllocaTy->getScalarType();
28512834 return false;
28522835 }
28532836 // Record this instruction for deletion.
2854 if (Pass.DeadSplitInsts.insert(&II))
2855 Pass.DeadInsts.push_back(&II);
2837 Pass.DeadInsts.insert(&II);
28562838
28572839 bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
28582840 EndOffset == NewAllocaEndOffset;
29622944 assert(II.getArgOperand(1) == OldPtr);
29632945
29642946 // Record this instruction for deletion.
2965 if (Pass.DeadSplitInsts.insert(&II))
2966 Pass.DeadInsts.push_back(&II);
2947 Pass.DeadInsts.insert(&II);
29672948
29682949 ConstantInt *Size
29692950 = ConstantInt::get(cast(II.getArgOperand(0)->getType()),
35323513 DI != DE; ++DI) {
35333514 Changed = true;
35343515 (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
3535 DeadInsts.push_back(*DI);
3516 DeadInsts.insert(*DI);
35363517 }
35373518 for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(),
35383519 DE = P.dead_op_end();
35433524 if (Instruction *OldI = dyn_cast(OldV))
35443525 if (isInstructionTriviallyDead(OldI)) {
35453526 Changed = true;
3546 DeadInsts.push_back(OldI);
3527 DeadInsts.insert(OldI);
35473528 }
35483529 }
35493530
35643545 /// We also record the alloca instructions deleted here so that they aren't
35653546 /// subsequently handed to mem2reg to promote.
35663547 void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) {
3567 DeadSplitInsts.clear();
35683548 while (!DeadInsts.empty()) {
35693549 Instruction *I = DeadInsts.pop_back_val();
35703550 DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
35763556 // Zero out the operand and see if it becomes trivially dead.
35773557 *OI = 0;
35783558 if (isInstructionTriviallyDead(U))
3579 DeadInsts.push_back(U);
3559 DeadInsts.insert(U);
35803560 }
35813561
35823562 if (AllocaInst *AI = dyn_cast(I))
10991099 %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
11001100 store float %phi.real, float* %real
11011101 store float %phi.imag, float* %imag
1102 ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
11021103 ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
11031104 ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
11041105 ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
11051106 ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
11061107 ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
1107 ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
11081108 ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
11091109 ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
11101110 ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
389389 %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0
390390 br label %for.cond
391391 }
392
393 define i64 @PR14132(i1 %flag) {
394 ; CHECK: @PR14132
395 ; Here we form a PHI-node by promoting the pointer alloca first, and then in
396 ; order to promote the other two allocas, we speculate the load of the
397 ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
398 ; alloca, which is completely bogus. However, we were asserting on trying to
399 ; rewrite it. Now it is replaced with undef. Eventually we may replace it with
400 ; unrechable and even the CFG will go away here.
401 entry:
402 %a = alloca i64
403 %b = alloca i8
404 %ptr = alloca i64*
405 ; CHECK-NOT: alloca
406
407 %ptr.cast = bitcast i64** %ptr to i8**
408 store i64 0, i64* %a
409 store i8 1, i8* %b
410 store i64* %a, i64** %ptr
411 br i1 %flag, label %if.then, label %if.end
412
413 if.then:
414 store i8* %b, i8** %ptr.cast
415 br label %if.end
416
417 if.end:
418 %tmp = load i64** %ptr
419 %result = load i64* %tmp
420 ; CHECK-NOT: store
421 ; CHECK-NOT: load
422 ; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
423
424 ret i64 %result
425 ; CHECK-NEXT: ret i64 %[[result]]
426 }
219219 ret i32 %load
220220 ; CHECK: ret i32
221221 }
222
223 define <2 x i8> @PR14349.1(i32 %x) {
224 ; CEHCK: @PR14349.1
225 ; The first testcase for broken SROA rewriting of split integer loads and
226 ; stores due to smaller vector loads and stores. This particular test ensures
227 ; that we can rewrite a split store of an integer to a store of a vector.
228 entry:
229 %a = alloca i32
230 ; CHECK-NOT: alloca
231
232 store i32 %x, i32* %a
233 ; CHECK-NOT: store
234
235 %cast = bitcast i32* %a to <2 x i8>*
236 %vec = load <2 x i8>* %cast
237 ; CHECK-NOT: load
238
239 ret <2 x i8> %vec
240 ; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
241 ; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
242 ; CHECK: ret <2 x i8> %[[cast]]
243 }
244
245 define i32 @PR14349.2(<2 x i8> %x) {
246 ; CEHCK: @PR14349.2
247 ; The first testcase for broken SROA rewriting of split integer loads and
248 ; stores due to smaller vector loads and stores. This particular test ensures
249 ; that we can rewrite a split load of an integer to a load of a vector.
250 entry:
251 %a = alloca i32
252 ; CHECK-NOT: alloca
253
254 %cast = bitcast i32* %a to <2 x i8>*
255 store <2 x i8> %x, <2 x i8>* %cast
256 ; CHECK-NOT: store
257
258 %int = load i32* %a
259 ; CHECK-NOT: load
260
261 ret i32 %int
262 ; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
263 ; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
264 ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
265 ; CHECK: ret i32 %[[insert]]
266 }