llvm.org GIT mirror llvm / 335a8e8
[EarlyCSE] Optimize MemoryPhis and reduce memory clobber queries w/ MemorySSA Summary: When using MemorySSA, re-optimize MemoryPhis when removing a store since this may create MemoryPhis with all identical arguments. Also, when using MemorySSA to check if two MemoryUses are reading from the same version of the heap, use the defining access instead of calling getClobberingAccess, since the latter can currently result in many more AA calls. Once the MemorySSA use optimization tracking changes are done, we can remove this limitation, which should result in more loads being CSE'd. Reviewers: dberlin Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D25881 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284984 91177308-0d34-0410-b5e6-96231b3b80d8 Geoff Berry 2 years ago
2 changed file(s) with 79 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
497497 return;
498498 // FIXME: Removing a store here can leave MemorySSA in an unoptimized state
499499 // by creating MemoryPhis that have identical arguments and by creating
500 // MemoryUses whose defining access is not an actual clobber.
501 if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst))
502 MSSA->removeMemoryAccess(MA);
500 // MemoryUses whose defining access is not an actual clobber. We handle the
501 // phi case here, but the non-optimized MemoryUse case is not handled. Once
502 // MemorySSA tracks whether uses are optimized this will be taken care of on
503 // the MemorySSA side.
504 if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
505 // Optimize MemoryPhi nodes that may become redundant by having all the
506 // same input values once MA is removed.
507 SmallVector PhisToCheck;
508 SmallVector WorkQueue;
509 WorkQueue.push_back(MA);
510 // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
511 // we shouldn't be processing that many phis and this will avoid an
512 // allocation in almost all cases.
513 for (unsigned I = 0; I < WorkQueue.size(); ++I) {
514 MemoryAccess *WI = WorkQueue[I];
515
516 for (auto *U : WI->users())
517 if (MemoryPhi *MP = dyn_cast(U))
518 PhisToCheck.push_back(MP);
519
520 MSSA->removeMemoryAccess(WI);
521
522 for (MemoryPhi *MP : PhisToCheck) {
523 MemoryAccess *FirstIn = MP->getIncomingValue(0);
524 if (all_of(MP->incoming_values(),
525 [=](Use &In) { return In == FirstIn; }))
526 WorkQueue.push_back(MP);
527 }
528 PhisToCheck.clear();
529 }
530 }
503531 }
504532 };
505533 }
506534
507 /// Determine if the memory referenced by LaterInst is from the same heap version
508 /// as EarlierInst.
535 /// Determine if the memory referenced by LaterInst is from the same heap
536 /// version as EarlierInst.
509537 /// This is currently called in two scenarios:
510538 ///
511539 /// load p
535563 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
536564 // EarlierInst and LaterInst and neither can any other write that potentially
537565 // clobbers LaterInst.
538 // FIXME: This is currently fairly expensive since it does an AA check even
539 // for MemoryUses that were already optimized by MemorySSA construction.
540 // Re-visit once MemorySSA optimized use tracking change has been committed.
541 MemoryAccess *LaterDef =
542 MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
566 // FIXME: Use getClobberingMemoryAccess only for stores since it is currently
567 // fairly expensive to call on MemoryUses since it does an AA check even for
568 // MemoryUses that were already optimized by MemorySSA construction. Once
569 // MemorySSA optimized use tracking change has been committed we can use
570 // getClobberingMemoryAccess for MemoryUses as well.
571 MemoryAccess *LaterMA = MSSA->getMemoryAccess(LaterInst);
572 MemoryAccess *LaterDef;
573 if (auto *LaterUse = dyn_cast(LaterMA))
574 LaterDef = LaterUse->getDefiningAccess();
575 else
576 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
543577 return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
544578 }
545579
3131 store i32 %V1, i32* @G1
3232 ret void
3333 }
34
35 ;; Check that memoryphi optimization happens during EarlyCSE, enabling
36 ;; more load CSE opportunities.
37 ; CHECK-LABEL: @test_memphiopt(
38 ; CHECK-NOMEMSSA-LABEL: @test_memphiopt(
39 define void @test_memphiopt(i1 %c, i32* %p) {
40 ; CHECK-LABEL: entry:
41 ; CHECK-NOMEMSSA-LABEL: entry:
42 entry:
43 ; CHECK: load
44 ; CHECK-NOMEMSSA: load
45 %v1 = load i32, i32* @G1
46 br i1 %c, label %then, label %end
47
48 ; CHECK-LABEL: then:
49 ; CHECK-NOMEMSSA-LABEL: then:
50 then:
51 ; CHECK: load
52 ; CHECK-NOMEMSSA: load
53 %pv = load i32, i32* %p
54 ; CHECK-NOT: store
55 ; CHECK-NOMEMSSA-NOT: store
56 store i32 %pv, i32* %p
57 br label %end
58
59 ; CHECK-LABEL: end:
60 ; CHECK-NOMEMSSA-LABEL: end:
61 end:
62 ; CHECK-NOT: load
63 ; CHECK-NOMEMSSA: load
64 %v2 = load i32, i32* @G1
65 %sum = add i32 %v1, %v2
66 store i32 %sum, i32* @G2
67 ret void
68 }