llvm.org GIT mirror llvm / d95ea2d
Fix PR7421: bug in kill transferring logic. It was ignoring loads / stores which have already been processed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106481 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 9 years ago
2 changed file(s) with 214 addition(s) and 39 deletion(s). Raw diff Collapse all Expand all
7373 private:
7474 struct MemOpQueueEntry {
7575 int Offset;
76 unsigned Reg;
77 bool isKill;
7678 unsigned Position;
7779 MachineBasicBlock::iterator MBBI;
7880 bool Merged;
79 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
80 : Offset(o), Position(p), MBBI(i), Merged(false) {}
81 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
82 MachineBasicBlock::iterator i)
83 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
8184 };
8285 typedef SmallVector MemOpQueue;
8386 typedef MemOpQueue::iterator MemOpQueueIter;
263266
264267 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
265268 // success.
266 void ARMLoadStoreOpt::
267 MergeOpsUpdate(MachineBasicBlock &MBB,
268 MemOpQueue &memOps,
269 unsigned memOpsBegin,
270 unsigned memOpsEnd,
271 unsigned insertAfter,
272 int Offset,
273 unsigned Base,
274 bool BaseKill,
275 int Opcode,
276 ARMCC::CondCodes Pred,
277 unsigned PredReg,
278 unsigned Scratch,
279 DebugLoc dl,
280 SmallVector &Merges) {
269 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
270 MemOpQueue &memOps,
271 unsigned memOpsBegin, unsigned memOpsEnd,
272 unsigned insertAfter, int Offset,
273 unsigned Base, bool BaseKill,
274 int Opcode,
275 ARMCC::CondCodes Pred, unsigned PredReg,
276 unsigned Scratch,
277 DebugLoc dl,
278 SmallVector &Merges) {
281279 // First calculate which of the registers should be killed by the merged
282280 // instruction.
281 const unsigned insertPos = memOps[insertAfter].Position;
282
283 SmallSet UnavailRegs;
284 SmallSet KilledRegs;
285 DenseMap Killer;
286 for (unsigned i = 0; i < memOpsBegin; ++i) {
287 if (memOps[i].Position < insertPos && memOps[i].isKill) {
288 unsigned Reg = memOps[i].Reg;
289 if (memOps[i].Merged)
290 UnavailRegs.insert(Reg);
291 else {
292 KilledRegs.insert(Reg);
293 Killer[Reg] = i;
294 }
295 }
296 }
297 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
298 if (memOps[i].Position < insertPos && memOps[i].isKill) {
299 unsigned Reg = memOps[i].Reg;
300 KilledRegs.insert(Reg);
301 Killer[Reg] = i;
302 }
303 }
304
283305 SmallVector, 8> Regs;
284 const unsigned insertPos = memOps[insertAfter].Position;
285306 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
286 const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
287 unsigned Reg = MO.getReg();
288 bool isKill = MO.isKill();
307 unsigned Reg = memOps[i].Reg;
308 if (UnavailRegs.count(Reg))
309 // Register is killed before and it's not easy / possible to update the
310 // kill marker on already merged instructions. Abort.
311 return;
289312
290313 // If we are inserting the merged operation after an unmerged operation that
291314 // uses the same register, make sure to transfer any kill flag.
292 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
293 if (memOps[j].Position
294 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
295 if (MOJ.getReg() == Reg && MOJ.isKill())
296 isKill = true;
297 }
298
315 bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
299316 Regs.push_back(std::make_pair(Reg, isKill));
300317 }
301318
310327 Merges.push_back(prior(Loc));
311328 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
312329 // Remove kill flags from any unmerged memops that come before insertPos.
313 if (Regs[i-memOpsBegin].second)
314 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
315 if (memOps[j].Position
316 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
317 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
318 MOJ.setIsKill(false);
319 }
330 if (Regs[i-memOpsBegin].second) {
331 unsigned Reg = Regs[i-memOpsBegin].first;
332 if (KilledRegs.count(Reg)) {
333 unsigned j = Killer[Reg];
334 memOps[j].MBBI->getOperand(0).setIsKill(false);
335 }
336 }
320337 MBB.erase(memOps[i].MBBI);
321338 memOps[i].Merged = true;
322339 }
909926 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
910927 return false;
911928
929 MachineBasicBlock::iterator NewBBI = MBBI;
912930 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
913931 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
914932 bool EvenDeadKill = isLd ?
953971 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
954972 ++NumSTRD2STM;
955973 }
974 NewBBI = llvm::prior(MBBI);
956975 } else {
957976 // Split into two instructions.
958977 assert((!isT2 || !OffReg) &&
973992 OddReg, OddDeadKill, false,
974993 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
975994 Pred, PredReg, TII, isT2);
995 NewBBI = llvm::prior(MBBI);
976996 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
977997 EvenReg, EvenDeadKill, false,
978998 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
9891009 EvenReg, EvenDeadKill, EvenUndef,
9901010 BaseReg, false, BaseUndef, OffReg, false, OffUndef,
9911011 Pred, PredReg, TII, isT2);
1012 NewBBI = llvm::prior(MBBI);
9921013 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
9931014 OddReg, OddDeadKill, OddUndef,
9941015 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
10001021 ++NumSTRD2STR;
10011022 }
10021023
1003 MBBI = prior(MBBI);
10041024 MBB.erase(MI);
1025 MBBI = NewBBI;
1026 return true;
10051027 }
10061028 return false;
10071029 }
10341056 if (isMemOp) {
10351057 int Opcode = MBBI->getOpcode();
10361058 unsigned Size = getLSMultipleTransferSize(MBBI);
1059 const MachineOperand &MO = MBBI->getOperand(0);
1060 unsigned Reg = MO.getReg();
1061 bool isKill = MO.isDef() ? false : MO.isKill();
10371062 unsigned Base = MBBI->getOperand(1).getReg();
10381063 unsigned PredReg = 0;
10391064 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
10551080 CurrSize = Size;
10561081 CurrPred = Pred;
10571082 CurrPredReg = PredReg;
1058 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1083 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
10591084 NumMemOps++;
10601085 Advance = true;
10611086 } else {
10681093 // No need to match PredReg.
10691094 // Continue adding to the queue.
10701095 if (Offset > MemOps.back().Offset) {
1071 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1096 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1097 Position, MBBI));
10721098 NumMemOps++;
10731099 Advance = true;
10741100 } else {
10751101 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
10761102 I != E; ++I) {
10771103 if (Offset < I->Offset) {
1078 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1104 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1105 Position, MBBI));
10791106 NumMemOps++;
10801107 Advance = true;
10811108 break;
0 ; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s
1 ; PR7421
2
3 %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
4 %struct.FILE = type { i8* }
5 %struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
6 %struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
7 %struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] }
8 %struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
9
10 @.str2708 = external constant [14 x i8], align 4 ; <[14 x i8]*> [#uses=1]
11
12 define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
13 entry:
14 %xcenter = alloca i32, align 4 ; [#uses=2]
15 %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1]
16 %1 = icmp eq i32 %0, 4 ; [#uses=1]
17 br i1 %1, label %bb, label %return
18
19 bb: ; preds = %bb445, %entry
20 %2 = load %struct.cellbox** undef, align 4 ; <%struct.cellbox*> [#uses=2]
21 %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; [#uses=1]
22 store i32 undef, i32* %3, align 4
23 %4 = load i32* undef, align 4 ; [#uses=3]
24 %5 = icmp eq i32 undef, 1 ; [#uses=1]
25 br i1 %5, label %bb10, label %bb445
26
27 bb10: ; preds = %bb
28 br i1 undef, label %bb11, label %bb445
29
30 bb11: ; preds = %bb10
31 %6 = load %struct.tilebox** undef, align 4 ; <%struct.tilebox*> [#uses=3]
32 %7 = load %struct.termbox** null, align 4 ; <%struct.termbox*> [#uses=1]
33 %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; [#uses=1]
34 %9 = load i32* %8, align 4 ; [#uses=3]
35 %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; [#uses=1]
36 %11 = load i32* %10, align 4 ; [#uses=1]
37 br i1 false, label %bb12, label %bb13
38
39 bb12: ; preds = %bb11
40 unreachable
41
42 bb13: ; preds = %bb11
43 %iftmp.40.0.neg = sdiv i32 0, -2 ; [#uses=2]
44 %12 = sub nsw i32 0, %9 ; [#uses=1]
45 %13 = sitofp i32 %12 to double ; [#uses=1]
46 %14 = fdiv double %13, 0.000000e+00 ; [#uses=1]
47 %15 = fptosi double %14 to i32 ; [#uses=1]
48 %iftmp.41.0.in = add i32 0, %15 ; [#uses=1]
49 %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2 ; [#uses=3]
50 br i1 undef, label %bb43.loopexit, label %bb21
51
52 bb21: ; preds = %bb13
53 %16 = fptosi double undef to i32 ; [#uses=1]
54 %17 = fsub double undef, 0.000000e+00 ; [#uses=1]
55 %not.460 = fcmp oge double %17, 5.000000e-01 ; [#uses=1]
56 %18 = zext i1 %not.460 to i32 ; [#uses=1]
57 %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg ; [#uses=1]
58 %19 = add i32 %iftmp.42.0, %18 ; [#uses=1]
59 store i32 %19, i32* undef, align 4
60 %20 = sub nsw i32 0, %9 ; [#uses=1]
61 %21 = sitofp i32 %20 to double ; [#uses=1]
62 %22 = fdiv double %21, 0.000000e+00 ; [#uses=2]
63 %23 = fptosi double %22 to i32 ; [#uses=1]
64 %24 = fsub double %22, undef ; [#uses=1]
65 %not.461 = fcmp oge double %24, 5.000000e-01 ; [#uses=1]
66 %25 = zext i1 %not.461 to i32 ; [#uses=1]
67 %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg ; [#uses=1]
68 %26 = add i32 %iftmp.43.0, %25 ; [#uses=1]
69 %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; [#uses=1]
70 store i32 %26, i32* %27, align 4
71 %28 = fptosi double undef to i32 ; [#uses=1]
72 %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg ; [#uses=1]
73 %29 = add i32 %iftmp.45.0, 0 ; [#uses=1]
74 store i32 %29, i32* undef, align 4
75 br label %bb43.loopexit
76
77 bb36: ; preds = %bb43.loopexit, %bb36
78 %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
79 %30 = load i32* undef, align 4 ; [#uses=1]
80 %31 = sub nsw i32 %30, %9 ; [#uses=1]
81 %32 = sitofp i32 %31 to double ; [#uses=1]
82 %33 = fdiv double %32, 0.000000e+00 ; [#uses=1]
83 %34 = fptosi double %33 to i32 ; [#uses=1]
84 %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg ; [#uses=1]
85 %35 = add i32 %iftmp.46.0, 0 ; [#uses=1]
86 store i32 %35, i32* undef, align 4
87 %36 = sub nsw i32 0, %11 ; [#uses=1]
88 %37 = sitofp i32 %36 to double ; [#uses=1]
89 %38 = fmul double %37, 0.000000e+00 ; [#uses=1]
90 %39 = fptosi double %38 to i32 ; [#uses=1]
91 %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg ; [#uses=1]
92 %40 = add i32 %iftmp.47.0, 0 ; [#uses=1]
93 store i32 %40, i32* undef, align 4
94 %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
95 %42 = load %struct.termbox** %41, align 4 ; <%struct.termbox*> [#uses=2]
96 %43 = icmp eq %struct.termbox* %42, null ; [#uses=1]
97 br i1 %43, label %bb52.loopexit, label %bb36
98
99 bb43.loopexit: ; preds = %bb21, %bb13
100 br i1 undef, label %bb52.loopexit, label %bb36
101
102 bb52.loopexit: ; preds = %bb43.loopexit, %bb36
103 %44 = icmp eq i32 %4, 0 ; [#uses=1]
104 br i1 %44, label %bb.nph485, label %bb54
105
106 bb54: ; preds = %bb52.loopexit
107 switch i32 %4, label %bb62 [
108 i32 2, label %bb56
109 i32 3, label %bb57
110 ]
111
112 bb56: ; preds = %bb54
113 br label %bb62
114
115 bb57: ; preds = %bb54
116 br label %bb62
117
118 bb62: ; preds = %bb57, %bb56, %bb54
119 unreachable
120
121 bb.nph485: ; preds = %bb52.loopexit
122 br label %bb248
123
124 bb248: ; preds = %bb322, %bb.nph485
125 %45 = icmp eq i32 undef, %4 ; [#uses=1]
126 br i1 %45, label %bb322, label %bb249
127
128 bb249: ; preds = %bb248
129 %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
130 %47 = load %struct.tilebox** %46, align 4 ; <%struct.tilebox*> [#uses=1]
131 %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; [#uses=1]
132 store i32 undef, i32* %48, align 4
133 unreachable
134
135 bb322: ; preds = %bb248
136 br i1 undef, label %bb248, label %bb445
137
138 bb445: ; preds = %bb322, %bb10, %bb
139 %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1]
140 %50 = icmp eq i32 %49, 4 ; [#uses=1]
141 br i1 %50, label %bb, label %return
142
143 return: ; preds = %bb445, %entry
144 ret void
145 }
146
147 declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind