llvm.org GIT mirror llvm / 239a5fb
Merge r330264 for the fix to PR37100, a regression introduced with the new EFLAGS lowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@332938 91177308-0d34-0410-b5e6-96231b3b80d8 Chandler Carruth 1 year, 4 months ago
3 changed file(s) with 237 addition(s) and 85 deletion(s). Raw diff Collapse all Expand all
3535 #include "llvm/ADT/Statistic.h"
3636 #include "llvm/CodeGen/MachineBasicBlock.h"
3737 #include "llvm/CodeGen/MachineConstantPool.h"
38 #include "llvm/CodeGen/MachineDominators.h"
3839 #include "llvm/CodeGen/MachineFunction.h"
3940 #include "llvm/CodeGen/MachineFunctionPass.h"
4041 #include "llvm/CodeGen/MachineInstr.h"
9798 const X86InstrInfo *TII;
9899 const TargetRegisterInfo *TRI;
99100 const TargetRegisterClass *PromoteRC;
101 MachineDominatorTree *MDT;
100102
101103 CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
102104 MachineInstr &CopyDefI);
144146 char X86FlagsCopyLoweringPass::ID = 0;
145147
146148 void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
149 AU.addRequired();
147150 MachineFunctionPass::getAnalysisUsage(AU);
148151 }
149152
341344 MRI = &MF.getRegInfo();
342345 TII = Subtarget.getInstrInfo();
343346 TRI = Subtarget.getRegisterInfo();
347 MDT = &getAnalysis();
344348 PromoteRC = &X86::GR8RegClass;
345349
346350 if (MF.begin() == MF.end())
415419 // of these up front instead.
416420 CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);
417421
418 for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
419 MII != MIE;) {
420 MachineInstr &MI = *MII++;
421 MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
422 if (!FlagUse) {
423 if (MI.findRegisterDefOperand(X86::EFLAGS)) {
424 // If EFLAGS are defined, it's as-if they were killed. We can stop
425 // scanning here.
426 //
427 // NB!!! Many instructions only modify some flags. LLVM currently
428 // models this as clobbering all flags, but if that ever changes this
429 // will need to be carefully updated to handle that more complex
422 // Collect the basic blocks we need to scan. Typically this will just be
423 // a single basic block but we may have to scan multiple blocks if the
424 // EFLAGS copy lives into successors.
425 SmallVector Blocks;
426 SmallPtrSet VisitedBlocks;
427 Blocks.push_back(&MBB);
428 VisitedBlocks.insert(&MBB);
429
430 do {
431 MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
432
433 // We currently don't do any PHI insertion and so we require that the
434 // test basic block dominates all of the use basic blocks.
435 //
436 // We could in theory do PHI insertion here if it becomes useful by just
437 // taking undef values in along every edge that we don't trace this
438 // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
439 // but still seems like a great deal of complexity.
440 //
441 // Because it is theoretically possible that some earlier MI pass or
442 // other lowering transformation could induce this to happen, we do
443 // a hard check even in non-debug builds here.
444 if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
445 DEBUG({
446 dbgs() << "ERROR: Encountered use that is not dominated by our test "
447 "basic block! Rewriting this would require inserting PHI "
448 "nodes to track the flag state across the CFG.\n\nTest "
449 "block:\n";
450 TestMBB.dump();
451 dbgs() << "Use block:\n";
452 UseMBB.dump();
453 });
454 report_fatal_error("Cannot lower EFLAGS copy when original copy def "
455 "does not dominate all uses.");
456 }
457
458 for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
459 : UseMBB.instr_begin(),
460 MIE = UseMBB.instr_end();
461 MII != MIE;) {
462 MachineInstr &MI = *MII++;
463 MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
464 if (!FlagUse) {
465 if (MI.findRegisterDefOperand(X86::EFLAGS)) {
466 // If EFLAGS are defined, it's as-if they were killed. We can stop
467 // scanning here.
468 //
469 // NB!!! Many instructions only modify some flags. LLVM currently
470 // models this as clobbering all flags, but if that ever changes
471 // this will need to be carefully updated to handle that more
472 // complex logic.
473 FlagsKilled = true;
474 break;
475 }
476 continue;
477 }
478
479 DEBUG(dbgs() << " Rewriting use: "; MI.dump());
480
481 // Check the kill flag before we rewrite as that may change it.
482 if (FlagUse->isKill())
483 FlagsKilled = true;
484
485 // Once we encounter a branch, the rest of the instructions must also be
486 // branches. We can't rewrite in place here, so we handle them below.
487 //
488 // Note that we don't have to handle tail calls here, even conditional
489 // tail calls, as those are not introduced into the X86 MI until post-RA
490 // branch folding or black placement. As a consequence, we get to deal
491 // with the simpler formulation of conditional branches followed by tail
492 // calls.
493 if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
494 auto JmpIt = MI.getIterator();
495 do {
496 JmpIs.push_back(&*JmpIt);
497 ++JmpIt;
498 } while (JmpIt != UseMBB.instr_end() &&
499 X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
500 X86::COND_INVALID);
501 break;
502 }
503
504 // Otherwise we can just rewrite in-place.
505 if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
506 rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
507 } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
508 X86::COND_INVALID) {
509 rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
510 } else if (MI.getOpcode() == TargetOpcode::COPY) {
511 rewriteCopy(MI, *FlagUse, CopyDefI);
512 } else {
513 // We assume that arithmetic instructions that use flags also def
514 // them.
515 assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
516 "Expected a def of EFLAGS for this instruction!");
517
518 // NB!!! Several arithmetic instructions only *partially* update
519 // flags. Theoretically, we could generate MI code sequences that
520 // would rely on this fact and observe different flags independently.
521 // But currently LLVM models all of these instructions as clobbering
522 // all the flags in an undef way. We rely on that to simplify the
430523 // logic.
431524 FlagsKilled = true;
525
526 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
432527 break;
433528 }
434 continue;
529
530 // If this was the last use of the flags, we're done.
531 if (FlagsKilled)
532 break;
435533 }
436534
437 DEBUG(dbgs() << " Rewriting use: "; MI.dump());
438
439 // Check the kill flag before we rewrite as that may change it.
440 if (FlagUse->isKill())
441 FlagsKilled = true;
442
443 // Once we encounter a branch, the rest of the instructions must also be
444 // branches. We can't rewrite in place here, so we handle them below.
445 //
446 // Note that we don't have to handle tail calls here, even conditional
447 // tail calls, as those are not introduced into the X86 MI until post-RA
448 // branch folding or black placement. As a consequence, we get to deal
449 // with the simpler formulation of conditional branches followed by tail
450 // calls.
451 if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) {
452 auto JmpIt = MI.getIterator();
453 do {
454 JmpIs.push_back(&*JmpIt);
455 ++JmpIt;
456 } while (JmpIt != MBB.instr_end() &&
457 X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
458 X86::COND_INVALID);
459 break;
460 }
461
462 // Otherwise we can just rewrite in-place.
463 if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
464 rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
465 } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
466 rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
467 } else if (MI.getOpcode() == TargetOpcode::COPY) {
468 rewriteCopy(MI, *FlagUse, CopyDefI);
469 } else {
470 // We assume that arithmetic instructions that use flags also def them.
471 assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
472 "Expected a def of EFLAGS for this instruction!");
473
474 // NB!!! Several arithmetic instructions only *partially* update
475 // flags. Theoretically, we could generate MI code sequences that
476 // would rely on this fact and observe different flags independently.
477 // But currently LLVM models all of these instructions as clobbering
478 // all the flags in an undef way. We rely on that to simplify the
479 // logic.
480 FlagsKilled = true;
481
482 rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
483 break;
484 }
485
486 // If this was the last use of the flags, we're done.
535 // If the flags were killed, we're done with this block.
487536 if (FlagsKilled)
488537 break;
489 }
490
491 // If we didn't find a kill (or equivalent) check that the flags don't
492 // live-out of the basic block. Currently we don't support lowering copies
493 // of flags that live out in this fashion.
494 if (!FlagsKilled &&
495 llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
496 return SuccMBB->isLiveIn(X86::EFLAGS);
497 })) {
498 DEBUG({
499 dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
500 << "----\n";
501 MBB.dump();
502 dbgs() << "----\n"
503 << "ERROR: Cannot lower this EFLAGS copy!\n";
504 });
505 report_fatal_error(
506 "Cannot lower EFLAGS copy that lives out of a basic block!");
507 }
538
539 // Otherwise we need to scan successors for ones where the flags live-in
540 // and queue those up for processing.
541 for (MachineBasicBlock *SuccMBB : UseMBB.successors())
542 if (SuccMBB->isLiveIn(X86::EFLAGS) &&
543 VisitedBlocks.insert(SuccMBB).second)
544 Blocks.push_back(SuccMBB);
545 } while (!Blocks.empty());
508546
509547 // Now rewrite the jumps that use the flags. These we handle specially
510 // because if there are multiple jumps we'll have to do surgery on the CFG.
548 // because if there are multiple jumps in a single basic block we'll have
549 // to do surgery on the CFG.
550 MachineBasicBlock *LastJmpMBB = nullptr;
511551 for (MachineInstr *JmpI : JmpIs) {
512 // Past the first jump we need to split the blocks apart.
513 if (JmpI != JmpIs.front())
552 // Past the first jump within a basic block we need to split the blocks
553 // apart.
554 if (JmpI->getParent() == LastJmpMBB)
514555 splitBlock(*JmpI->getParent(), *JmpI, *TII);
556 else
557 LastJmpMBB = JmpI->getParent();
515558
516559 rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
517560 }
3636 ; CHECK-NEXT: X86 PIC Global Base Reg Initialization
3737 ; CHECK-NEXT: Expand ISel Pseudo-instructions
3838 ; CHECK-NEXT: Local Stack Slot Allocation
39 ; CHECK-NEXT: MachineDominator Tree Construction
3940 ; CHECK-NEXT: X86 EFLAGS copy lowering
4041 ; CHECK-NEXT: X86 WinAlloca Expander
4142 ; CHECK-NEXT: Eliminate PHI nodes for register allocation
195195 tail call void @external_b()
196196 ret void
197197 }
198
199 ; Test a function that gets special select lowering into CFG with copied EFLAGS
200 ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
201 ; cross-block rewrites in at least some narrow cases.
202 define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
203 ; X32-LABEL: PR37100:
204 ; X32: # %bb.0: # %bb
205 ; X32-NEXT: pushl %ebp
206 ; X32-NEXT: .cfi_def_cfa_offset 8
207 ; X32-NEXT: pushl %ebx
208 ; X32-NEXT: .cfi_def_cfa_offset 12
209 ; X32-NEXT: pushl %edi
210 ; X32-NEXT: .cfi_def_cfa_offset 16
211 ; X32-NEXT: pushl %esi
212 ; X32-NEXT: .cfi_def_cfa_offset 20
213 ; X32-NEXT: .cfi_offset %esi, -20
214 ; X32-NEXT: .cfi_offset %edi, -16
215 ; X32-NEXT: .cfi_offset %ebx, -12
216 ; X32-NEXT: .cfi_offset %ebp, -8
217 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
218 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
219 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
220 ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
221 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
222 ; X32-NEXT: jmp .LBB3_1
223 ; X32-NEXT: .p2align 4, 0x90
224 ; X32-NEXT: .LBB3_5: # %bb1
225 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
226 ; X32-NEXT: xorl %eax, %eax
227 ; X32-NEXT: xorl %edx, %edx
228 ; X32-NEXT: idivl %ebp
229 ; X32-NEXT: .LBB3_1: # %bb1
230 ; X32-NEXT: # =>This Inner Loop Header: Depth=1
231 ; X32-NEXT: movsbl %cl, %eax
232 ; X32-NEXT: movl %eax, %edx
233 ; X32-NEXT: sarl $31, %edx
234 ; X32-NEXT: cmpl %eax, %esi
235 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
236 ; X32-NEXT: sbbl %edx, %eax
237 ; X32-NEXT: setl %al
238 ; X32-NEXT: setl %dl
239 ; X32-NEXT: movzbl %dl, %ebp
240 ; X32-NEXT: negl %ebp
241 ; X32-NEXT: testb $-1, %al
242 ; X32-NEXT: jne .LBB3_3
243 ; X32-NEXT: # %bb.2: # %bb1
244 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
245 ; X32-NEXT: movb %ch, %cl
246 ; X32-NEXT: .LBB3_3: # %bb1
247 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
248 ; X32-NEXT: movb %cl, (%ebx)
249 ; X32-NEXT: movl (%edi), %edx
250 ; X32-NEXT: testb $-1, %al
251 ; X32-NEXT: jne .LBB3_5
252 ; X32-NEXT: # %bb.4: # %bb1
253 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
254 ; X32-NEXT: movl %edx, %ebp
255 ; X32-NEXT: jmp .LBB3_5
256 ;
257 ; X64-LABEL: PR37100:
258 ; X64: # %bb.0: # %bb
259 ; X64-NEXT: movq %rdx, %r10
260 ; X64-NEXT: jmp .LBB3_1
261 ; X64-NEXT: .p2align 4, 0x90
262 ; X64-NEXT: .LBB3_5: # %bb1
263 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
264 ; X64-NEXT: xorl %eax, %eax
265 ; X64-NEXT: xorl %edx, %edx
266 ; X64-NEXT: idivl %esi
267 ; X64-NEXT: .LBB3_1: # %bb1
268 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
269 ; X64-NEXT: movsbq %dil, %rax
270 ; X64-NEXT: xorl %esi, %esi
271 ; X64-NEXT: cmpq %rax, %r10
272 ; X64-NEXT: setl %sil
273 ; X64-NEXT: negl %esi
274 ; X64-NEXT: cmpq %rax, %r10
275 ; X64-NEXT: jl .LBB3_3
276 ; X64-NEXT: # %bb.2: # %bb1
277 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
278 ; X64-NEXT: movl %ecx, %edi
279 ; X64-NEXT: .LBB3_3: # %bb1
280 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
281 ; X64-NEXT: movb %dil, (%r8)
282 ; X64-NEXT: jl .LBB3_5
283 ; X64-NEXT: # %bb.4: # %bb1
284 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
285 ; X64-NEXT: movl (%r9), %esi
286 ; X64-NEXT: jmp .LBB3_5
287 bb:
288 br label %bb1
289
290 bb1:
291 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
292 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
293 %tmp3 = icmp sgt i16 %tmp2, 7
294 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
295 %tmp5 = sext i8 %tmp to i64
296 %tmp6 = icmp slt i64 %arg3, %tmp5
297 %tmp7 = sext i1 %tmp6 to i32
298 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
299 store volatile i8 %tmp8, i8* %ptr1
300 %tmp9 = load volatile i32, i32* %ptr2
301 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
302 %tmp11 = srem i32 0, %tmp10
303 %tmp12 = trunc i32 %tmp11 to i16
304 br label %bb1
305 }