llvm.org GIT mirror llvm / 7da9ecf
Add a quick pass to optimize sign / zero extension instructions. For targets where the pre-extension values are available in the subreg of the result of the extension, replace the uses of the pre-extension value with the result + extract_subreg. For now, this pass is fairly conservative. It only perform the replacement when both the pre- and post- extension values are used in the block. It will miss cases where the post-extension values are live, but not used. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93278 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
9 changed file(s) with 205 addition(s) and 30 deletion(s). Raw diff Collapse all Expand all
169169 /// instructions.
170170 FunctionPass *createMachineSinkingPass();
171171
172 /// createOptimizeExtsPass - This pass performs sign / zero extension
173 /// optimization by increasing uses of extended values.
174 FunctionPass *createOptimizeExtsPass();
175
172176 /// createStackSlotColoringPass - This pass performs stack slot coloring.
173177 FunctionPass *createStackSlotColoringPass(bool);
174178
148148 return false;
149149 }
150150
151 /// isCoalescableInstr - Return true if the instruction is "coalescable". That
152 /// is, it's like a copy where it's legal for the source to overlap the
153 /// destination. e.g. X86::MOVSX64rr32.
154 virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
155 unsigned &SrcReg, unsigned &DstReg,
156 unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
157 if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
158 isCopy = true;
159 return true;
160 }
151 /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
152 /// extension instruction. That is, it's like a copy where it's legal for the
153 /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
154 /// true, then it's expected the pre-extension value is available as a subreg
155 /// of the result register. This also returns the sub-register index in
156 /// SubIdx.
157 virtual bool isCoalescableExtInstr(const MachineInstr &MI,
158 unsigned &SrcReg, unsigned &DstReg,
159 unsigned &SubIdx) const {
161160 return false;
162161 }
163162
6161 cl::desc("Verify generated machine code"),
6262 cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
6363
64 #if 1
65 static cl::opt XX("xx", cl::Hidden);
66 #endif
67
6468 // Enable or disable FastISel. Both options are needed, because
6569 // FastISel is enabled by default with -fast, and we wish to be
6670 // able to enable or disable fast-isel independently from -O0.
323327 /* allowDoubleDefs= */ true);
324328
325329 if (OptLevel != CodeGenOpt::None) {
330 PM.add(createOptimizeExtsPass());
326331 if (!DisableMachineLICM)
327332 PM.add(createMachineLICMPass());
328333 if (!DisableMachineSink)
0 //===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #define DEBUG_TYPE "ext-opt"
10 #include "llvm/CodeGen/Passes.h"
11 #include "llvm/CodeGen/MachineDominators.h"
12 #include "llvm/CodeGen/MachineInstrBuilder.h"
13 #include "llvm/CodeGen/MachineRegisterInfo.h"
14 #include "llvm/Target/TargetInstrInfo.h"
15 #include "llvm/Target/TargetRegisterInfo.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/Statistic.h"
19 using namespace llvm;
20
21 static cl::opt Aggressive("aggressive-ext-opt", cl::Hidden,
22 cl::desc("Aggressive extension optimization"));
23
24 STATISTIC(NumReuse, "Number of extension results reused");
25
26 namespace {
27 class OptimizeExts : public MachineFunctionPass {
28 const TargetMachine *TM;
29 const TargetInstrInfo *TII;
30 MachineRegisterInfo *MRI;
31 MachineDominatorTree *DT; // Machine dominator tree
32
33 public:
34 static char ID; // Pass identification
35 OptimizeExts() : MachineFunctionPass(&ID) {}
36
37 virtual bool runOnMachineFunction(MachineFunction &MF);
38
39 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
40 AU.setPreservesCFG();
41 MachineFunctionPass::getAnalysisUsage(AU);
42 AU.addRequired();
43 AU.addPreserved();
44 }
45 };
46 }
47
48 char OptimizeExts::ID = 0;
49 static RegisterPass
50 X("opt-exts", "Optimize sign / zero extensions");
51
52 FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
53
54 bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
55 TM = &MF.getTarget();
56 TII = TM->getInstrInfo();
57 MRI = &MF.getRegInfo();
58 DT = &getAnalysis();
59
60 bool Changed = false;
61
62 SmallPtrSet LocalMIs;
63 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
64 MachineBasicBlock *MBB = &*I;
65 for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
66 ++MII) {
67 MachineInstr *MI = &*MII;
68 LocalMIs.insert(MI);
69
70 unsigned SrcReg, DstReg, SubIdx;
71 if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
72 if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
73 TargetRegisterInfo::isPhysicalRegister(SrcReg))
74 continue;
75
76 MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
77 if (++UI == MRI->use_end())
78 // No other uses.
79 continue;
80
81 // Ok, the source has other uses. See if we can replace the other uses
82 // with use of the result of the extension.
83
84 SmallPtrSet ReachedBBs;
85 UI = MRI->use_begin(DstReg);
86 for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
87 ++UI)
88 ReachedBBs.insert(UI->getParent());
89
90 bool ExtendLife = true;
91 SmallVector Uses;
92 SmallVector ExtendedUses;
93
94 UI = MRI->use_begin(SrcReg);
95 for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
96 ++UI) {
97 MachineOperand &UseMO = UI.getOperand();
98 MachineInstr *UseMI = &*UI;
99 if (UseMI == MI)
100 continue;
101 MachineBasicBlock *UseMBB = UseMI->getParent();
102 if (UseMBB == MBB) {
103 // Local uses that come after the extension.
104 if (!LocalMIs.count(UseMI))
105 Uses.push_back(&UseMO);
106 } else if (ReachedBBs.count(UseMBB))
107 // Non-local uses where the result of extension is used. Always
108 // replace these.
109 Uses.push_back(&UseMO);
110 else if (Aggressive && DT->dominates(MBB, UseMBB))
111 // We may want to extend live range of the extension result in order
112 // to replace these uses.
113 ExtendedUses.push_back(&UseMO);
114 else {
115 // Both will be live out of the def MBB anyway. Don't extend live
116 // range of the extension result.
117 ExtendLife = false;
118 break;
119 }
120 }
121
122 if (ExtendLife && !ExtendedUses.empty())
123 // Ok, we'll extend the liveness of the extension result.
124 std::copy(ExtendedUses.begin(), ExtendedUses.end(),
125 std::back_inserter(Uses));
126
127 // Now replace all uses.
128 if (!Uses.empty()) {
129 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
130 for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
131 MachineOperand *UseMO = Uses[i];
132 MachineInstr *UseMI = UseMO->getParent();
133 MachineBasicBlock *UseMBB = UseMI->getParent();
134 unsigned NewVR = MRI->createVirtualRegister(RC);
135 BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
136 TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
137 .addReg(DstReg).addImm(SubIdx);
138 UseMO->setReg(NewVR);
139 ++NumReuse;
140 Changed = true;
141 }
142 }
143 }
144 }
145 }
146
147 return Changed;
148 }
712712 }
713713
714714 bool
715 X86InstrInfo::isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
716 unsigned &SrcReg, unsigned &DstReg,
717 unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
715 X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
716 unsigned &SrcReg, unsigned &DstReg,
717 unsigned &SubIdx) const {
718718 switch (MI.getOpcode()) {
719719 default: break;
720720 case X86::MOVSX16rr8:
732732 if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
733733 // Be conservative.
734734 return false;
735 isCopy = false;
736735 SrcReg = MI.getOperand(1).getReg();
737736 DstReg = MI.getOperand(0).getReg();
738 DstSubIdx = 0;
739737 switch (MI.getOpcode()) {
740738 default:
741739 llvm_unreachable(0);
746744 case X86::MOVZX32rr8:
747745 case X86::MOVSX64rr8:
748746 case X86::MOVZX64rr8:
749 SrcSubIdx = 1;
747 SubIdx = 1;
750748 break;
751749 case X86::MOVSX32rr16:
752750 case X86::MOVZX32rr16:
753751 case X86::MOVSX64rr16:
754752 case X86::MOVZX64rr16:
755 SrcSubIdx = 3;
753 SubIdx = 3;
756754 break;
757755 case X86::MOVSX64rr32:
758756 case X86::MOVZX64rr32:
759 SrcSubIdx = 4;
757 SubIdx = 4;
760758 break;
761759 }
762 }
763 }
764 return isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
760 return true;
761 }
762 }
763 return false;
765764 }
766765
767766 /// isFrameOperand - Return true and the FrameIndex if the specified
447447 unsigned &SrcReg, unsigned &DstReg,
448448 unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
449449
450 /// isCoalescableInstr - Return true if the instruction is "coalescable". That
451 /// is, it's like a copy where it's legal for the source to overlap the
452 /// destination. e.g. X86::MOVSX64rr32.
453 virtual bool isCoalescableInstr(const MachineInstr &MI, bool &isCopy,
454 unsigned &SrcReg, unsigned &DstReg,
455 unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
456
450 /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
451 /// extension instruction. That is, it's like a copy where it's legal for the
452 /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
453 /// true, then it's expected the pre-extension value is available as a subreg
454 /// of the result register. This also returns the sub-register index in
455 /// SubIdx.
456 virtual bool isCoalescableExtInstr(const MachineInstr &MI,
457 unsigned &SrcReg, unsigned &DstReg,
458 unsigned &SubIdx) const;
457459
458460 unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
459461 /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
None ; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58
0 ; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
11 ; PR2568
22
33 @g_3 = external global i16 ; [#uses=1]
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s
1 ; rdar://7529457
2
3 define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
4 ; CHECK: t:
5 ; CHECK: movslq %e{{.*}}, %rax
6 ; CHECK: movq %rax
7 ; CHECK: movl %eax
8 %C = add i64 %A, %B
9 %D = trunc i64 %C to i32
10 volatile store i32 %D, i32* %P
11 %E = shl i64 %C, 32
12 %F = ashr i64 %E, 32
13 volatile store i64 %F, i64 *%P2
14 volatile store i32 %D, i32* %P
15 ret i64 undef
16 }
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
1 ; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 6
1 ; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 9
22
33 type { [62 x %struct.Bitvec*] } ; type %0
44 type { i8* } ; type %1