llvm.org GIT mirror llvm / 28a123a
[LoopRerolling] Be more forgiving with instruction order. We can't solve the full subgraph isomorphism problem. But we can allow obvious cases, where for example two instructions of different types are out of order. Due to them having different types/opcodes, there is no ambiguity. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228931 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 4 years ago
2 changed file(s) with 139 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
4343 static cl::opt
4444 MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden,
4545 cl::desc("The maximum increment for loop rerolling"));
46
47 static cl::opt
48 NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400),
49 cl::Hidden,
50 cl::desc("The maximum number of failures to tolerate"
51 " during fuzzy matching. (default: 400)"));
4652
4753 // This loop re-rolling transformation aims to transform loops like this:
4854 //
393399 const SmallInstructionSet &Final,
394400 DenseSet &Users);
395401
396 UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I);
402 UsesTy::iterator nextInstr(int Val, UsesTy &In,
403 const SmallInstructionSet &Exclude,
404 UsesTy::iterator *StartI=nullptr);
397405 bool isBaseInst(Instruction *I);
398406 bool isRootInst(Instruction *I);
407 bool instrDependsOn(Instruction *I,
408 UsesTy::iterator Start,
409 UsesTy::iterator End);
399410
400411 LoopReroll *Parent;
401412
940951
941952 }
942953
954 /// Get the next instruction in "In" that is a member of set Val.
955 /// Start searching from StartI, and do not return anything in Exclude.
956 /// If StartI is not given, start from In.begin().
943957 LoopReroll::DAGRootTracker::UsesTy::iterator
944958 LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In,
945 UsesTy::iterator I) {
946 while (I != In.end() && I->second.test(Val) == 0)
959 const SmallInstructionSet &Exclude,
960 UsesTy::iterator *StartI) {
961 UsesTy::iterator I = StartI ? *StartI : In.begin();
962 while (I != In.end() && (I->second.test(Val) == 0 ||
963 Exclude.count(I->first) != 0))
947964 ++I;
948965 return I;
949966 }
960977 for (auto &DRS : RootSets) {
961978 if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
962979 return true;
980 }
981 return false;
982 }
983
984 /// Return true if instruction I depends on any instruction between
985 /// Start and End.
986 bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
987 UsesTy::iterator Start,
988 UsesTy::iterator End) {
989 for (auto *U : I->users()) {
990 for (auto It = Start; It != End; ++It)
991 if (U == It->first)
992 return true;
963993 }
964994 return false;
965995 }
10211051 DenseMap BaseMap;
10221052
10231053 // Compare iteration Iter to the base.
1024 auto BaseIt = nextInstr(0, Uses, Uses.begin());
1025 auto RootIt = nextInstr(Iter, Uses, Uses.begin());
1054 SmallInstructionSet Visited;
1055 auto BaseIt = nextInstr(0, Uses, Visited);
1056 auto RootIt = nextInstr(Iter, Uses, Visited);
10261057 auto LastRootIt = Uses.begin();
10271058
10281059 while (BaseIt != Uses.end() && RootIt != Uses.end()) {
10321063 // Skip over the IV or root instructions; only match their users.
10331064 bool Continue = false;
10341065 if (isBaseInst(BaseInst)) {
1035 BaseIt = nextInstr(0, Uses, ++BaseIt);
1066 Visited.insert(BaseInst);
1067 BaseIt = nextInstr(0, Uses, Visited);
10361068 Continue = true;
10371069 }
10381070 if (isRootInst(RootInst)) {
10391071 LastRootIt = RootIt;
1040 RootIt = nextInstr(Iter, Uses, ++RootIt);
1072 Visited.insert(RootInst);
1073 RootIt = nextInstr(Iter, Uses, Visited);
10411074 Continue = true;
10421075 }
10431076 if (Continue) continue;
10441077
1078 if (!BaseInst->isSameOperationAs(RootInst)) {
1079 // Last chance saloon. We don't try and solve the full isomorphism
1080 // problem, but try and at least catch the case where two instructions
1081 // *of different types* are round the wrong way. We won't be able to
1082 // efficiently tell, given two ADD instructions, which way around we
1083 // should match them, but given an ADD and a SUB, we can at least infer
1084 // which one is which.
1085 //
1086 // This should allow us to deal with a greater subset of the isomorphism
1087 // problem. It does however change a linear algorithm into a quadratic
1088 // one, so limit the number of probes we do.
1089 auto TryIt = RootIt;
1090 unsigned N = NumToleratedFailedMatches;
1091 while (TryIt != Uses.end() &&
1092 !BaseInst->isSameOperationAs(TryIt->first) &&
1093 N--) {
1094 ++TryIt;
1095 TryIt = nextInstr(Iter, Uses, Visited, &TryIt);
1096 }
1097
1098 if (TryIt == Uses.end() || TryIt == RootIt ||
1099 instrDependsOn(TryIt->first, RootIt, TryIt)) {
1100 DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1101 " vs. " << *RootInst << "\n");
1102 return false;
1103 }
1104
1105 RootIt = TryIt;
1106 RootInst = TryIt->first;
1107 }
1108
10451109 // All instructions between the last root and this root
1046 // belong to some other iteration. If they belong to a
1110 // may belong to some other iteration. If they belong to a
10471111 // future iteration, then they're dangerous to alias with.
1048 for (; LastRootIt != RootIt; ++LastRootIt) {
1112 //
1113 // Note that because we allow a limited amount of flexibility in the order
1114 // that we visit nodes, LastRootIt might be *before* RootIt, in which
1115 // case we've already checked this set of instructions so we shouldn't
1116 // do anything.
1117 for (; LastRootIt < RootIt; ++LastRootIt) {
10491118 Instruction *I = LastRootIt->first;
10501119 if (LastRootIt->second.find_first() < (int)Iter)
10511120 continue;
10591128 !isSafeToSpeculativelyExecute(I, DL))
10601129 // Intervening instructions cause side effects.
10611130 FutureSideEffects = true;
1062 }
1063
1064 if (!BaseInst->isSameOperationAs(RootInst)) {
1065 DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
1066 " vs. " << *RootInst << "\n");
1067 return false;
10681131 }
10691132
10701133 // Make sure that this instruction, which is in the use set of this
11731236 BaseMap.insert(std::make_pair(RootInst, BaseInst));
11741237
11751238 LastRootIt = RootIt;
1176 BaseIt = nextInstr(0, Uses, ++BaseIt);
1177 RootIt = nextInstr(Iter, Uses, ++RootIt);
1239 Visited.insert(BaseInst);
1240 Visited.insert(RootInst);
1241 BaseIt = nextInstr(0, Uses, Visited);
1242 RootIt = nextInstr(Iter, Uses, Visited);
11781243 }
11791244 assert (BaseIt == Uses.end() && RootIt == Uses.end() &&
11801245 "Mismatched set sizes!");
487487 ret void
488488 }
489489
490 ; int foo(int a);
491 ; void bar2(int *x, int y, int z) {
492 ; for (int i = 0; i < 500; i += 3) {
493 ; foo(i+y+i*z); // Slightly reordered instruction order
494 ; foo(i+1+y+(i+1)*z);
495 ; foo(i+2+y+(i+2)*z);
496 ; }
497 ; }
498
499 ; Function Attrs: nounwind uwtable
500 define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
501 entry:
502 br label %for.body
503
504 for.body: ; preds = %for.body, %entry
505 %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
506
507 %tmp1 = add i32 %i.08, %y
508 %tmp2 = mul i32 %i.08, %z
509 %tmp3 = add i32 %tmp2, %tmp1
510 %call = tail call i32 @foo(i32 %tmp3) #1
511
512 %add = add nsw i32 %i.08, 1
513 %tmp2a = mul i32 %add, %z
514 %tmp1a = add i32 %add, %y
515 %tmp3a = add i32 %tmp2a, %tmp1a
516 %calla = tail call i32 @foo(i32 %tmp3a) #1
517
518 %add2 = add nsw i32 %i.08, 2
519 %tmp2b = mul i32 %add2, %z
520 %tmp1b = add i32 %add2, %y
521 %tmp3b = add i32 %tmp2b, %tmp1b
522 %callb = tail call i32 @foo(i32 %tmp3b) #1
523
524 %add3 = add nsw i32 %i.08, 3
525
526 %exitcond = icmp eq i32 %add3, 500
527 br i1 %exitcond, label %for.end, label %for.body
528
529 ; CHECK-LABEL: @bar2
530
531 ; CHECK: for.body:
532 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
533 ; CHECK: %tmp1 = add i32 %indvar, %y
534 ; CHECK: %tmp2 = mul i32 %indvar, %z
535 ; CHECK: %tmp3 = add i32 %tmp2, %tmp1
536 ; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
537 ; CHECK: %indvar.next = add i32 %indvar, 1
538 ; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
539 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
540
541 ; CHECK: ret
542
543 for.end: ; preds = %for.body
544 ret void
545 }
546
490547
491548 attributes #0 = { nounwind uwtable }
492549 attributes #1 = { nounwind }