llvm.org GIT mirror llvm / b9803a8
- Add pseudo instructions tLDRpci_pic and t2LDRpci_pic which does a pc-relative load of a GV from constantpool and then add pc. It allows the code sequence to be rematerializable so it would be hoisted by machine licm. - Add a late pass to break these pseudo instructions into a number of real instructions. Also move the code in Thumb2 IT pass that breaks up t2MOVi32imm to this pass. This is done before post regalloc scheduling to allow the scheduler to proper schedule these instructions. It also allow them to be if-converted and shrunk by later passes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86304 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 10 years ago
15 changed file(s) with 258 addition(s) and 59 deletion(s). Raw diff Collapse all Expand all
102102 ObjectCodeEmitter &OCE);
103103
104104 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
105 FunctionPass *createARMExpandPseudoPass();
105106 FunctionPass *createARMConstantIslandPass();
106107 FunctionPass *createNEONPreAllocPass();
107108 FunctionPass *createNEONMoveFixPass();
260260
261261 virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
262262 MachineInstr* MI,
263 const SmallVectorImpl &Ops,
263 const SmallVectorImpl &Ops,
264264 MachineInstr* LoadMI) const;
265
266265 };
267266
268267 static inline
0 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expand pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, and other late
11 // optimizations. This pass should be run after register allocation but before
12 // post- regalloc scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #define DEBUG_TYPE "arm-pseudo"
17 #include "ARM.h"
18 #include "ARMBaseInstrInfo.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21
22 using namespace llvm;
23
24 namespace {
25 class ARMExpandPseudo : public MachineFunctionPass {
26 public:
27 static char ID;
28 ARMExpandPseudo() : MachineFunctionPass(&ID) {}
29
30 const TargetInstrInfo *TII;
31
32 virtual bool runOnMachineFunction(MachineFunction &Fn);
33
34 virtual const char *getPassName() const {
35 return "ARM pseudo instruction expansion pass";
36 }
37
38 private:
39 bool ExpandMBB(MachineBasicBlock &MBB);
40 };
41 char ARMExpandPseudo::ID = 0;
42 }
43
44 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
45 bool Modified = false;
46
47 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
48 while (MBBI != E) {
49 MachineInstr &MI = *MBBI;
50 MachineBasicBlock::iterator NMBBI = next(MBBI);
51
52 unsigned Opcode = MI.getOpcode();
53 switch (Opcode) {
54 default: break;
55 case ARM::tLDRpci_pic:
56 case ARM::t2LDRpci_pic: {
57 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
58 ? ARM::tLDRpci : ARM::t2LDRpci;
59 unsigned DstReg = MI.getOperand(0).getReg();
60 if (!MI.getOperand(0).isDead()) {
61 MachineInstr *NewMI =
62 AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
63 TII->get(NewLdOpc), DstReg)
64 .addOperand(MI.getOperand(1)));
65 NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
66 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
67 .addReg(DstReg, getDefRegState(true))
68 .addReg(DstReg)
69 .addOperand(MI.getOperand(2));
70 }
71 MI.eraseFromParent();
72 Modified = true;
73 break;
74 }
75 case ARM::t2MOVi32imm: {
76 unsigned DstReg = MI.getOperand(0).getReg();
77 unsigned Imm = MI.getOperand(1).getImm();
78 unsigned Lo16 = Imm & 0xffff;
79 unsigned Hi16 = (Imm >> 16) & 0xffff;
80 if (!MI.getOperand(0).isDead()) {
81 AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
82 TII->get(ARM::t2MOVi16), DstReg)
83 .addImm(Lo16));
84 AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
85 TII->get(ARM::t2MOVTi16))
86 .addReg(DstReg, getDefRegState(true))
87 .addReg(DstReg).addImm(Hi16));
88 }
89 MI.eraseFromParent();
90 Modified = true;
91 }
92 // FIXME: expand t2MOVi32imm
93 }
94 MBBI = NMBBI;
95 }
96
97 return Modified;
98 }
99
100 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
101 TII = MF.getTarget().getInstrInfo();
102
103 bool Modified = false;
104 for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
105 ++MFI)
106 Modified |= ExpandMBB(*MFI);
107 return Modified;
108 }
109
110 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
111 /// expansion pass.
112 FunctionPass *llvm::createARMExpandPseudoPass() {
113 return new ARMExpandPseudo();
114 }
8484 unsigned DestReg, unsigned SubIdx,
8585 const MachineInstr *Orig) const {
8686 DebugLoc dl = Orig->getDebugLoc();
87 if (Orig->getOpcode() == ARM::MOVi2pieces) {
87 unsigned Opcode = Orig->getOpcode();
88 switch (Opcode) {
89 default: {
90 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
91 MI->getOperand(0).setReg(DestReg);
92 MBB.insert(I, MI);
93 break;
94 }
95 case ARM::MOVi2pieces:
8896 RI.emitLoadConstPool(MBB, I, dl,
8997 DestReg, SubIdx,
9098 Orig->getOperand(1).getImm(),
9199 (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
92100 Orig->getOperand(3).getReg());
93 return;
101 break;
94102 }
95103
96 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
97 MI->getOperand(0).setReg(DestReg);
98 MBB.insert(I, MI);
104 MachineInstr *NewMI = prior(I);
105 NewMI->getOperand(0).setSubReg(SubIdx);
99106 }
100107
3434 // Return true if the block does not fall through.
3535 bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
3636
37 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
38 unsigned DestReg, unsigned SubIdx,
39 const MachineInstr *Orig) const;
40
3741 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
3842 /// such, whenever a client has an instance of instruction info, it should
3943 /// always be able to get register info as well (through this method).
4044 ///
4145 const ARMRegisterInfo &getRegisterInfo() const { return RI; }
42
43 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
44 unsigned DestReg, unsigned SubIdx,
45 const MachineInstr *Orig) const;
4646 };
4747
4848 }
739739
740740 def : T1Pat<(i32 imm0_255_comp:$src),
741741 (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
742
743 // Pseudo instruction that combines ldr from constpool and add pc. This should
744 // be expanded into two instructions late to allow if-conversion and
745 // scheduling.
746 let isReMaterializable = 1 in
747 def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
748 NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
749 [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
750 imm:$cp))]>,
751 Requires<[IsThumb1Only]>;
11781178 def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
11791179 "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
11801180 [(set GPR:$dst, (i32 imm:$src))]>;
1181
1182 // Pseudo instruction that combines ldr from constpool and add pc. This should
1183 // be expanded into two instructions late to allow if-conversion and
1184 // scheduling.
1185 let isReMaterializable = 1 in
1186 def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
1187 NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
1188 [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
1189 imm:$cp))]>,
1190 Requires<[IsThumb2]>;
104104 if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
105105 PM.add(createARMLoadStoreOptimizationPass());
106106
107 // Expand some pseudo instructions into multiple instructions to allow
108 // proper scheduling.
109 PM.add(createARMExpandPseudoPass());
110
107111 return true;
108112 }
109113
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "ARMInstrInfo.h"
13 #include "Thumb1InstrInfo.h"
1414 #include "ARM.h"
15 #include "ARMConstantPoolValue.h"
1516 #include "ARMGenInstrInfo.inc"
1617 #include "ARMMachineFunctionInfo.h"
18 #include "llvm/GlobalValue.h"
19 #include "llvm/CodeGen/MachineConstantPool.h"
1720 #include "llvm/CodeGen/MachineFrameInfo.h"
1821 #include "llvm/CodeGen/MachineInstrBuilder.h"
1922 #include "llvm/CodeGen/MachineMemOperand.h"
262265
263266 return NewMI;
264267 }
268
269 void Thumb1InstrInfo::reMaterialize(MachineBasicBlock &MBB,
270 MachineBasicBlock::iterator I,
271 unsigned DestReg, unsigned SubIdx,
272 const MachineInstr *Orig) const {
273 DebugLoc dl = Orig->getDebugLoc();
274 unsigned Opcode = Orig->getOpcode();
275 switch (Opcode) {
276 default: {
277 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
278 MI->getOperand(0).setReg(DestReg);
279 MBB.insert(I, MI);
280 break;
281 }
282 case ARM::tLDRpci_pic: {
283 MachineFunction &MF = *MBB.getParent();
284 ARMFunctionInfo *AFI = MF.getInfo();
285 MachineConstantPool *MCP = MF.getConstantPool();
286 unsigned CPI = Orig->getOperand(1).getIndex();
287 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
288 assert(MCPE.isMachineConstantPoolEntry() &&
289 "Expecting a machine constantpool entry!");
290 ARMConstantPoolValue *ACPV =
291 static_cast(MCPE.Val.MachineCPVal);
292 assert(ACPV->isGlobalValue() && "Expecting a GV!");
293 unsigned PCLabelId = AFI->createConstPoolEntryUId();
294 ARMConstantPoolValue *NewCPV =
295 new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, ARMCP::CPValue, 4);
296 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
297 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
298 DestReg)
299 .addConstantPoolIndex(CPI).addImm(PCLabelId);
300 (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
301 break;
302 }
303 }
304
305 MachineInstr *NewMI = prior(I);
306 NewMI->getOperand(0).setSubReg(SubIdx);
307 }
308
7575 MachineInstr* LoadMI) const {
7676 return 0;
7777 }
78
79 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
80 unsigned DestReg, unsigned SubIdx,
81 const MachineInstr *Orig) const;
7882 };
7983 }
8084
3333 }
3434
3535 private:
36 MachineBasicBlock::iterator
37 SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
38 MachineInstr *MI, DebugLoc dl,
39 unsigned PredReg, ARMCC::CondCodes CC);
4036 bool InsertITBlocks(MachineBasicBlock &MBB);
4137 };
4238 char Thumb2ITBlockPass::ID = 0;
4945 return llvm::getInstrPredicate(MI, PredReg);
5046 }
5147
52 MachineBasicBlock::iterator
53 Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB,
54 MachineBasicBlock::iterator MBBI,
55 MachineInstr *MI,
56 DebugLoc dl, unsigned PredReg,
57 ARMCC::CondCodes CC) {
58 // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
59 // The only reason it was a single instruction was so it could be
60 // re-materialized. We want to split it before this and the thumb2
61 // size reduction pass to make sure the IT mask is correct and expose
62 // width reduction opportunities. It doesn't make sense to do this in a
63 // separate pass so here it is.
64 unsigned DstReg = MI->getOperand(0).getReg();
65 bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
66 unsigned Imm = MI->getOperand(1).getImm();
67 unsigned Lo16 = Imm & 0xffff;
68 unsigned Hi16 = (Imm >> 16) & 0xffff;
69 BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
70 .addImm(Lo16).addImm(CC).addReg(PredReg);
71 BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
72 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
73 .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
74 --MBBI;
75 --MBBI;
76 MI->eraseFromParent();
77 return MBBI;
78 }
79
8048 bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
8149 bool Modified = false;
8250
8654 DebugLoc dl = MI->getDebugLoc();
8755 unsigned PredReg = 0;
8856 ARMCC::CondCodes CC = getPredicate(MI, PredReg);
89
90 if (MI->getOpcode() == ARM::t2MOVi32imm) {
91 MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC);
92 continue;
93 }
9457
9558 if (CC == ARMCC::AL) {
9659 ++MBBI;
11477 DebugLoc ndl = NMI->getDebugLoc();
11578 unsigned NPredReg = 0;
11679 ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
117 if (NMI->getOpcode() == ARM::t2MOVi32imm) {
118 MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC);
119 continue;
120 }
121
12280 if (NCC == OCC) {
12381 Mask |= (1 << Pos);
12482 } else if (NCC != CC)
1010 //
1111 //===----------------------------------------------------------------------===//
1212
13 #include "ARMInstrInfo.h"
13 #include "Thumb2InstrInfo.h"
1414 #include "ARM.h"
15 #include "ARMConstantPoolValue.h"
1516 #include "ARMAddressingModes.h"
1617 #include "ARMGenInstrInfo.inc"
1718 #include "ARMMachineFunctionInfo.h"
19 #include "llvm/GlobalValue.h"
20 #include "llvm/CodeGen/MachineConstantPool.h"
1821 #include "llvm/CodeGen/MachineFrameInfo.h"
1922 #include "llvm/CodeGen/MachineInstrBuilder.h"
2023 #include "llvm/CodeGen/MachineMemOperand.h"
131134 ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
132135 }
133136
137 void Thumb2InstrInfo::reMaterialize(MachineBasicBlock &MBB,
138 MachineBasicBlock::iterator I,
139 unsigned DestReg, unsigned SubIdx,
140 const MachineInstr *Orig) const {
141 DebugLoc dl = Orig->getDebugLoc();
142 unsigned Opcode = Orig->getOpcode();
143 switch (Opcode) {
144 default: {
145 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
146 MI->getOperand(0).setReg(DestReg);
147 MBB.insert(I, MI);
148 break;
149 }
150 case ARM::t2LDRpci_pic: {
151 MachineFunction &MF = *MBB.getParent();
152 ARMFunctionInfo *AFI = MF.getInfo();
153 MachineConstantPool *MCP = MF.getConstantPool();
154 unsigned CPI = Orig->getOperand(1).getIndex();
155 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
156 assert(MCPE.isMachineConstantPoolEntry() &&
157 "Expecting a machine constantpool entry!");
158 ARMConstantPoolValue *ACPV =
159 static_cast(MCPE.Val.MachineCPVal);
160 assert(ACPV->isGlobalValue() && "Expecting a GV!");
161 unsigned PCLabelId = AFI->createConstPoolEntryUId();
162 ARMConstantPoolValue *NewCPV =
163 new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, ARMCP::CPValue, 4);
164 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
165 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
166 DestReg)
167 .addConstantPoolIndex(CPI).addImm(PCLabelId);
168 (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
169 break;
170 }
171 }
172
173 MachineInstr *NewMI = prior(I);
174 NewMI->getOperand(0).setSubReg(SubIdx);
175 }
134176
135177 void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
136178 MachineBasicBlock::iterator &MBBI, DebugLoc dl,
4949 unsigned DestReg, int FrameIndex,
5050 const TargetRegisterClass *RC) const;
5151
52 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
53 unsigned DestReg, unsigned SubIdx,
54 const MachineInstr *Orig) const;
55
5256 /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
5357 /// such, whenever a client has an instance of instruction info, it should
5458 /// always be able to get register info as well (through this method).
55
66 define arm_apcscc void @t() nounwind {
77 ; CHECK: t:
8 ; CHECK: ittt eq
9 ; CHECK-NEXT: addeq
10 ; CHECK-NEXT: movweq
11 ; CHECK-NEXT: movteq
8 ; CHECK: it eq
9 ; CHECK-NEXT: cmpeq
1210 entry:
1311 %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
1412 br i1 undef, label %land.rhs, label %lor.end
1616 ; CHECK: BB#1
1717 ; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_0
1818 ; CHECK: ldr{{.*}} r{{[0-9]+}}, LCPI1_1
19 ; CHECK: add r{{[0-9]+}}, pc
20 ; CHECK: add r{{[0-9]+}}, pc
21 ; CHECK: LBB1_2
1922 %.pre = load i32* @GV, align 4 ; [#uses=1]
2023 br label %bb
2124