llvm.org GIT mirror llvm / c29a56d
Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to a virtual register unless it requires an expensive cross class copy. That means we are only treating "expensive to copy" register dependency as physical register dependency. Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62074 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 11 years ago
7 changed file(s) with 114 addition(s) and 66 deletion(s). Raw diff Collapse all Expand all
484484 protected:
485485 void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
486486
487 void EmitCrossRCCopy(SUnit *SU, DenseMap &VRBaseMap);
487 void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap);
488488
489489 /// ForceUnitLatencies - Return true if all scheduling edges should be given a
490490 /// latency value of one. The default is to return false; schedulers may
3535 TII->insertNoop(*BB, BB->end());
3636 }
3737
38 void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
38 void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
3939 DenseMap &VRBaseMap) {
4040 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
4141 I != E; ++I) {
4848 unsigned Reg = 0;
4949 for (SUnit::const_succ_iterator II = SU->Succs.begin(),
5050 EE = SU->Succs.end(); II != EE; ++II) {
51 if (I->getReg()) {
52 Reg = I->getReg();
51 if (II->getReg()) {
52 Reg = II->getReg();
5353 break;
5454 }
5555 }
56 assert(I->getReg() && "Unknown physical register!");
5756 TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
5857 SU->CopyDstRC, SU->CopySrcRC);
5958 } else {
2727
2828 STATISTIC(NumUnfolds, "Number of nodes unfolded");
2929 STATISTIC(NumDups, "Number of duplicated nodes");
30 STATISTIC(NumCCCopies, "Number of cross class copies");
30 STATISTIC(NumPRCopies, "Number of physical copies");
3131
3232 static RegisterScheduler
3333 fastDAGScheduler("fast", "Fast suboptimal list scheduling",
9292 void ReleasePred(SUnit *SU, SDep *PredEdge);
9393 void ScheduleNodeBottomUp(SUnit*, unsigned);
9494 SUnit *CopyAndMoveSuccessors(SUnit*);
95 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
96 const TargetRegisterClass*,
97 const TargetRegisterClass*,
98 SmallVector&);
95 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
96 const TargetRegisterClass*,
97 const TargetRegisterClass*,
98 SmallVector&);
9999 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&);
100100 void ListScheduleBottomUp();
101101
360360 DelDeps.push_back(std::make_pair(SuccSU, D));
361361 }
362362 }
363 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
363 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
364364 RemovePred(DelDeps[i].first, DelDeps[i].second);
365 }
366365
367366 ++NumDups;
368367 return NewSU;
369368 }
370369
371 /// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
372 /// and move all scheduled successors of the given SUnit to the last copy.
373 void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
370 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
371 /// scheduled successors of the given SUnit to the last copy.
372 void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
374373 const TargetRegisterClass *DestRC,
375374 const TargetRegisterClass *SrcRC,
376375 SmallVector &Copies) {
407406 Copies.push_back(CopyFromSU);
408407 Copies.push_back(CopyToSU);
409408
410 ++NumCCCopies;
409 ++NumPRCopies;
411410 }
412411
413412 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
523522 assert(LRegs.size() == 1 && "Can't handle this yet!");
524523 unsigned Reg = LRegs[0];
525524 SUnit *LRDef = LiveRegDefs[Reg];
526 SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
525 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
526 const TargetRegisterClass *RC =
527 TRI->getPhysicalRegisterRegClass(Reg, VT);
528 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
529
530 // If cross copy register class is null, then it must be possible copy
531 // the value directly. Do not try duplicate the def.
532 SUnit *NewDef = 0;
533 if (DestRC)
534 NewDef = CopyAndMoveSuccessors(LRDef);
535 else
536 DestRC = RC;
527537 if (!NewDef) {
528 // Issue expensive cross register class copies.
529 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
530 const TargetRegisterClass *RC =
531 TRI->getPhysicalRegisterRegClass(Reg, VT);
532 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
533 if (!DestRC) {
534 assert(false && "Don't know how to copy this physical register!");
535 abort();
536 }
538 // Issue copies, these can be expensive cross register class copies.
537539 SmallVector Copies;
538 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
540 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
539541 DOUT << "Adding an edge from SU # " << TrySU->NodeNum
540542 << " to SU #" << Copies.front()->NodeNum << "\n";
541543 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
3434 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
3535 STATISTIC(NumUnfolds, "Number of nodes unfolded");
3636 STATISTIC(NumDups, "Number of duplicated nodes");
37 STATISTIC(NumCCCopies, "Number of cross class copies");
37 STATISTIC(NumPRCopies, "Number of physical register copies");
3838
3939 static RegisterScheduler
4040 burrListDAGScheduler("list-burr",
120120 void UnscheduleNodeBottomUp(SUnit*);
121121 void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
122122 SUnit *CopyAndMoveSuccessors(SUnit*);
123 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
124 const TargetRegisterClass*,
125 const TargetRegisterClass*,
126 SmallVector&);
123 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
124 const TargetRegisterClass*,
125 const TargetRegisterClass*,
126 SmallVector&);
127127 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&);
128128 void ListScheduleTopDown();
129129 void ListScheduleBottomUp();
516516 return NewSU;
517517 }
518518
519 /// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
520 /// and move all scheduled successors of the given SUnit to the last copy.
521 void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
522 const TargetRegisterClass *DestRC,
523 const TargetRegisterClass *SrcRC,
519 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
520 /// scheduled successors of the given SUnit to the last copy.
521 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
522 const TargetRegisterClass *DestRC,
523 const TargetRegisterClass *SrcRC,
524524 SmallVector &Copies) {
525525 SUnit *CopyFromSU = CreateNewSUnit(NULL);
526526 CopyFromSU->CopySrcRC = SrcRC;
545545 DelDeps.push_back(std::make_pair(SuccSU, *I));
546546 }
547547 }
548 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
548 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
549549 RemovePred(DelDeps[i].first, DelDeps[i].second);
550 }
551550
552551 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
553552 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
558557 Copies.push_back(CopyFromSU);
559558 Copies.push_back(CopyToSU);
560559
561 ++NumCCCopies;
560 ++NumPRCopies;
562561 }
563562
564563 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
704703 }
705704
706705 if (!CurSU) {
707 // Can't backtrack. Try duplicating the nodes that produces these
708 // "expensive to copy" values to break the dependency. In case even
709 // that doesn't work, insert cross class copies.
706 // Can't backtrack. If it's too expensive to copy the value, then try
707 // duplicate the nodes that produces these "too expensive to copy"
708 // values to break the dependency. In case even that doesn't work,
709 // insert cross class copies.
710 // If it's not too expensive, i.e. cost != -1, issue copies.
710711 SUnit *TrySU = NotReady[0];
711712 SmallVector &LRegs = LRegsMap[TrySU];
712713 assert(LRegs.size() == 1 && "Can't handle this yet!");
713714 unsigned Reg = LRegs[0];
714715 SUnit *LRDef = LiveRegDefs[Reg];
715 SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
716 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
717 const TargetRegisterClass *RC =
718 TRI->getPhysicalRegisterRegClass(Reg, VT);
719 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
720
721 // If cross copy register class is null, then it must be possible copy
722 // the value directly. Do not try duplicate the def.
723 SUnit *NewDef = 0;
724 if (DestRC)
725 NewDef = CopyAndMoveSuccessors(LRDef);
726 else
727 DestRC = RC;
716728 if (!NewDef) {
717 // Issue expensive cross register class copies.
718 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
719 const TargetRegisterClass *RC =
720 TRI->getPhysicalRegisterRegClass(Reg, VT);
721 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
722 if (!DestRC) {
723 assert(false && "Don't know how to copy this physical register!");
724 abort();
725 }
729 // Issue copies, these can be expensive cross register class copies.
726730 SmallVector Copies;
727 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
731 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
728732 DOUT << "Adding an edge from SU #" << TrySU->NodeNum
729733 << " to SU #" << Copies.front()->NodeNum << "\n";
730734 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
3838
3939 /// CheckForPhysRegDependency - Check if the dependency between def and use of
4040 /// a specified operand is a physical register dependency. If so, returns the
41 /// register.
41 /// register and the cost of copying the register.
4242 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
4343 const TargetRegisterInfo *TRI,
4444 const TargetInstrInfo *TII,
45 unsigned &PhysReg) {
45 unsigned &PhysReg, int &Cost) {
4646 if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
4747 return;
4848
5454 if (Def->isMachineOpcode()) {
5555 const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
5656 if (ResNo >= II.getNumDefs() &&
57 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
57 II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
5858 PhysReg = Reg;
59 const TargetRegisterClass *RC =
60 TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
61 Cost = RC->getCopyCost();
62 }
5963 }
6064 }
6165
178182 bool isChain = OpVT == MVT::Other;
179183
180184 unsigned PhysReg = 0;
185 int Cost = 1;
181186 // Determine if this is a physical register dependency.
182 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg);
187 CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
183188 assert((PhysReg == 0 || !isChain) &&
184189 "Chain dependence via physreg data?");
190 // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
191 // emits a copy from the physical register to a virtual register unless
192 // it requires a cross class copy (cost < 0). That means we are only
193 // treating "expensive to copy" register dependency as physical register
194 // dependency. This may change in the future though.
195 if (Cost >= 0)
196 PhysReg = 0;
185197 SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
186198 OpSU->Latency, PhysReg));
187199 }
251263
252264
253265 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
254 if (SU->getNode())
255 SU->getNode()->dump(DAG);
256 else
257 cerr << "CROSS RC COPY ";
266 if (!SU->getNode()) {
267 cerr << "PHYS REG COPY\n";
268 return;
269 }
270
271 SU->getNode()->dump(DAG);
258272 cerr << "\n";
259273 SmallVector FlaggedNodes;
260274 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
628628
629629 // For pre-regalloc scheduling, create instructions corresponding to the
630630 // SDNode and any flagged SDNodes and append them to the block.
631 if (!SU->getNode()) {
632 // Emit a copy.
633 EmitPhysRegCopy(SU, CopyVRBaseMap);
634 continue;
635 }
636
631637 SmallVector FlaggedNodes;
632638 for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
633639 FlaggedNodes.push_back(N);
635641 EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
636642 FlaggedNodes.pop_back();
637643 }
638 if (!SU->getNode())
639 EmitCrossRCCopy(SU, CopyVRBaseMap);
640 else
641 EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
644 EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
642645 }
643646
644647 return BB;
0 ; RUN: llvm-as < %s | llc -march=x86
1 ; PR3244
2
3 @g_62 = external global i16 ; [#uses=1]
4 @g_487 = external global i32 ; [#uses=1]
5
6 define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
7 entry:
8 %0 = load i16* @g_62, align 2 ; [#uses=1]
9 %1 = load i32* @g_487, align 4 ; [#uses=1]
10 %2 = trunc i16 %0 to i8 ; [#uses=1]
11 %3 = trunc i32 %1 to i8 ; [#uses=1]
12 %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
13 nounwind ; [#uses=1]
14 %5 = trunc i32 %4 to i8 ; [#uses=1]
15 %6 = mul i8 %3, %2 ; [#uses=1]
16 %7 = mul i8 %6, %5 ; [#uses=1]
17 %8 = sext i8 %7 to i16 ; [#uses=1]
18 %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind
19 ; [#uses=0]
20 ret i32 undef
21 }
22
23 declare i32 @func_7(...)
24
25 declare i32 @func_85(i16 signext, i32, i32)