llvm.org GIT mirror llvm / ffde080
Convert VLD1 and VLD2 instructions to use pseudo-instructions until after regalloc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112825 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
5 changed file(s) with 213 addition(s) and 55 deletion(s). Raw diff Collapse all Expand all
4747 void TransferImpOps(MachineInstr &OldMI,
4848 MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
4949 bool ExpandMBB(MachineBasicBlock &MBB);
50 void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
51 bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
5052 void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
5153 bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
5254 };
6971 DefMI.addReg(MO.getReg(),
7072 getDefRegState(true) | getDeadRegState(MO.isDead()));
7173 }
74 }
75
76 /// ExpandVLD -
77 ///
78 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
79 unsigned Opc, bool hasWriteBack,
80 NEONRegSpacing RegSpc, unsigned NumRegs) {
81 MachineInstr &MI = *MBBI;
82 MachineBasicBlock &MBB = *MI.getParent();
83
84 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
85 unsigned OpIdx = 0;
86
87 bool DstIsDead = MI.getOperand(OpIdx).isDead();
88 unsigned DstReg = MI.getOperand(OpIdx++).getReg();
89 unsigned D0, D1, D2, D3;
90 if (RegSpc == SingleSpc) {
91 D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
92 D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
93 D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
94 D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
95 } else if (RegSpc == EvenDblSpc) {
96 D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
97 D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
98 D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
99 D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
100 } else {
101 assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
102 D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
103 D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
104 D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
105 D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
106 }
107 MIB.addReg(D0).addReg(D1);
108 if (NumRegs > 2)
109 MIB.addReg(D2);
110 if (NumRegs > 3)
111 MIB.addReg(D3);
112
113 if (hasWriteBack) {
114 bool WBIsDead = MI.getOperand(OpIdx).isDead();
115 unsigned WBReg = MI.getOperand(OpIdx++).getReg();
116 MIB.addReg(WBReg, getDefRegState(true) | getDeadRegState(WBIsDead));
117 }
118 // Copy the addrmode6 operands.
119 bool AddrIsKill = MI.getOperand(OpIdx).isKill();
120 MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
121 MIB.addImm(MI.getOperand(OpIdx++).getImm());
122 if (hasWriteBack) {
123 // Copy the am6offset operand.
124 bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
125 MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
126 }
127
128 MIB = AddDefaultPred(MIB);
129 TransferImpOps(MI, MIB, MIB);
130 // Add an implicit def for the super-reg.
131 MIB.addReg(DstReg, (getDefRegState(true) | getDeadRegState(DstIsDead) |
132 getImplRegState(true)));
133 MI.eraseFromParent();
72134 }
73135
74136 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
230292 TransferImpOps(MI, Even, Odd);
231293 MI.eraseFromParent();
232294 }
295
296 case ARM::VLD1q8Pseudo:
297 ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
298 case ARM::VLD1q16Pseudo:
299 ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
300 case ARM::VLD1q32Pseudo:
301 ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
302 case ARM::VLD1q64Pseudo:
303 ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
304 case ARM::VLD1q8Pseudo_UPD:
305 ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
306 case ARM::VLD1q16Pseudo_UPD:
307 ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
308 case ARM::VLD1q32Pseudo_UPD:
309 ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
310 case ARM::VLD1q64Pseudo_UPD:
311 ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
312
313 case ARM::VLD2d8Pseudo:
314 ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
315 case ARM::VLD2d16Pseudo:
316 ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
317 case ARM::VLD2d32Pseudo:
318 ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
319 case ARM::VLD2q8Pseudo:
320 ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
321 case ARM::VLD2q16Pseudo:
322 ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
323 case ARM::VLD2q32Pseudo:
324 ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
325 case ARM::VLD2d8Pseudo_UPD:
326 ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
327 case ARM::VLD2d16Pseudo_UPD:
328 ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
329 case ARM::VLD2d32Pseudo_UPD:
330 ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
331 case ARM::VLD2q8Pseudo_UPD:
332 ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
333 case ARM::VLD2q16Pseudo_UPD:
334 ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
335 case ARM::VLD2q32Pseudo_UPD:
336 ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
337
338 case ARM::VLD1d64TPseudo:
339 ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
340 case ARM::VLD1d64TPseudo_UPD:
341 ExpandVLD(MBBI, ARM::VLD1d64T, true, SingleSpc, 3); break;
342
343 case ARM::VLD1d64QPseudo:
344 ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
345 case ARM::VLD1d64QPseudo_UPD:
346 ExpandVLD(MBBI, ARM::VLD1d64Q, true, SingleSpc, 4); break;
233347
234348 case ARM::VST1q8Pseudo:
235349 ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
11151115 if (is64BitVector) {
11161116 unsigned Opc = DOpcodes[OpcodeIndex];
11171117 const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
1118 std::vector ResTys(NumVecs, VT);
1119 ResTys.push_back(MVT::Other);
1120 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
1121 if (NumVecs < 2)
1118 SDNode *VLd;
1119 if (NumVecs <= 2) {
1120 EVT ResTy;
1121 if (NumVecs == 1)
1122 ResTy = VT;
1123 else
1124 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs);
1125 VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
1126 } else {
1127 std::vector ResTys(NumVecs, VT);
1128 ResTys.push_back(MVT::Other);
1129 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
1130 }
1131 if (NumVecs == 1)
11221132 return VLd;
11231133
1124 SDValue RegSeq;
1125 SDValue V0 = SDValue(VLd, 0);
1126 SDValue V1 = SDValue(VLd, 1);
1127
1128 // Form a REG_SEQUENCE to force register allocation.
1129 if (NumVecs == 2)
1130 RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
1134 SDValue SuperReg;
1135 if (NumVecs <= 2)
1136 SuperReg = SDValue(VLd, 0);
11311137 else {
1138 SDValue V0 = SDValue(VLd, 0);
1139 SDValue V1 = SDValue(VLd, 1);
1140 // Form a REG_SEQUENCE to force register allocation.
11321141 SDValue V2 = SDValue(VLd, 2);
11331142 // If it's a vld3, form a quad D-register but discard the last part.
11341143 SDValue V3 = (NumVecs == 3)
11351144 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
11361145 : SDValue(VLd, 3);
1137 RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
1146 SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
11381147 }
11391148
11401149 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
11411150 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
11421151 SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
1143 dl, VT, RegSeq);
1152 dl, VT, SuperReg);
11441153 ReplaceUses(SDValue(N, Vec), D);
11451154 }
1146 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
1155 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs <= 2 ? 1 : NumVecs));
11471156 return NULL;
11481157 }
11491158
11531162 // loading pairs of D regs.
11541163 unsigned Opc = QOpcodes0[OpcodeIndex];
11551164 const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
1156 std::vector ResTys(2 * NumVecs, RegVT);
1157 ResTys.push_back(MVT::Other);
1158 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
1159 Chain = SDValue(VLd, 2 * NumVecs);
1165
1166 EVT ResTy;
1167 if (NumVecs == 1)
1168 ResTy = VT;
1169 else
1170 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 2 * NumVecs);
1171 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
11601172
11611173 // Combine the even and odd subregs to produce the result.
1162 if (NumVecs == 1) {
1163 SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
1164 ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
1165 } else {
1166 SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
1167 SDValue(VLd, 0), SDValue(VLd, 1),
1168 SDValue(VLd, 2), SDValue(VLd, 3)), 0);
1169 SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
1170 SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
1171 ReplaceUses(SDValue(N, 0), Q0);
1172 ReplaceUses(SDValue(N, 1), Q1);
1173 }
1174 if (NumVecs == 1)
1175 return VLd;
1176
1177 SDValue QQ = SDValue(VLd, 0);
1178 Chain = SDValue(VLd, 1);
1179
1180 SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
1181 SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
1182 ReplaceUses(SDValue(N, 0), Q0);
1183 ReplaceUses(SDValue(N, 1), Q1);
11741184 } else {
11751185 // Otherwise, quad registers are loaded with two separate instructions,
11761186 // where one loads the even registers and the other loads the odd registers.
21412151 case Intrinsic::arm_neon_vld1: {
21422152 unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
21432153 ARM::VLD1d32, ARM::VLD1d64 };
2144 unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
2145 ARM::VLD1q32, ARM::VLD1q64 };
2154 unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
2155 ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
21462156 return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
21472157 }
21482158
21492159 case Intrinsic::arm_neon_vld2: {
2150 unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
2151 ARM::VLD2d32, ARM::VLD1q64 };
2152 unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
2160 unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
2161 ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
2162 unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
2163 ARM::VLD2q32Pseudo };
21532164 return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
21542165 }
21552166
166166
167167 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
168168
169 // Classes for VLD* pseudo-instructions with multi-register operands.
170 // These are expanded to real instructions after register allocation.
171 class VLDQPseudo
172 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
173 class VLDQWBPseudo
174 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
175 (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
176 "$addr.addr = $wb">;
177 class VLDQQPseudo
178 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
179 class VLDQQWBPseudo
180 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
181 (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
182 "$addr.addr = $wb">;
183
169184 // VLD1 : Vector Load (multiple single elements)
170185 class VLD1D op7_4, string Dt>
171186 : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
185200 def VLD1q16 : VLD1Q<0b0100, "16">;
186201 def VLD1q32 : VLD1Q<0b1000, "32">;
187202 def VLD1q64 : VLD1Q<0b1100, "64">;
203
204 def VLD1q8Pseudo : VLDQPseudo;
205 def VLD1q16Pseudo : VLDQPseudo;
206 def VLD1q32Pseudo : VLDQPseudo;
207 def VLD1q64Pseudo : VLDQPseudo;
188208
189209 // ...with address register writeback:
190210 class VLD1DWB op7_4, string Dt>
208228 def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
209229 def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
210230
231 def VLD1q8Pseudo_UPD : VLDQWBPseudo;
232 def VLD1q16Pseudo_UPD : VLDQWBPseudo;
233 def VLD1q32Pseudo_UPD : VLDQWBPseudo;
234 def VLD1q64Pseudo_UPD : VLDQWBPseudo;
235
211236 // ...with 3 registers (some of these are only for the disassembler):
212237 class VLD1D3 op7_4, string Dt>
213238 : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
227252 def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
228253 def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
229254 def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
255
256 def VLD1d64TPseudo : VLDQQPseudo;
257 def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
230258
231259 // ...with 4 registers (some of these are only for the disassembler):
232260 class VLD1D4 op7_4, string Dt>
250278 def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
251279 def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
252280
281 def VLD1d64QPseudo : VLDQQPseudo;
282 def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
283
253284 // VLD2 : Vector Load (multiple 2-element structures)
254285 class VLD2D op11_8, bits<4> op7_4, string Dt>
255286 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
268299 def VLD2q8 : VLD2Q<0b0000, "8">;
269300 def VLD2q16 : VLD2Q<0b0100, "16">;
270301 def VLD2q32 : VLD2Q<0b1000, "32">;
302
303 def VLD2d8Pseudo : VLDQPseudo;
304 def VLD2d16Pseudo : VLDQPseudo;
305 def VLD2d32Pseudo : VLDQPseudo;
306
307 def VLD2q8Pseudo : VLDQQPseudo;
308 def VLD2q16Pseudo : VLDQQPseudo;
309 def VLD2q32Pseudo : VLDQQPseudo;
271310
272311 // ...with address register writeback:
273312 class VLD2DWB op11_8, bits<4> op7_4, string Dt>
289328 def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
290329 def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
291330 def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
331
332 def VLD2d8Pseudo_UPD : VLDQWBPseudo;
333 def VLD2d16Pseudo_UPD : VLDQWBPseudo;
334 def VLD2d32Pseudo_UPD : VLDQWBPseudo;
335
336 def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
337 def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
338 def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
292339
293340 // ...with double-spaced registers (for disassembly only):
294341 def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
530577 def VST1q32 : VST1Q<0b1000, "32">;
531578 def VST1q64 : VST1Q<0b1100, "64">;
532579
533 def VST1q8Pseudo : VSTQPseudo;
534 def VST1q16Pseudo : VSTQPseudo;
535 def VST1q32Pseudo : VSTQPseudo;
536 def VST1q64Pseudo : VSTQPseudo;
580 def VST1q8Pseudo : VSTQPseudo;
581 def VST1q16Pseudo : VSTQPseudo;
582 def VST1q32Pseudo : VSTQPseudo;
583 def VST1q64Pseudo : VSTQPseudo;
537584
538585 // ...with address register writeback:
539586 class VST1DWB op7_4, string Dt>
5050 default:
5151 break;
5252
53 case ARM::VLD1q8:
54 case ARM::VLD1q16:
55 case ARM::VLD1q32:
56 case ARM::VLD1q64:
57 case ARM::VLD2d8:
58 case ARM::VLD2d16:
59 case ARM::VLD2d32:
6053 case ARM::VLD2LNd8:
6154 case ARM::VLD2LNd16:
6255 case ARM::VLD2LNd32:
6356 FirstOpnd = 0;
6457 NumRegs = 2;
65 return true;
66
67 case ARM::VLD2q8:
68 case ARM::VLD2q16:
69 case ARM::VLD2q32:
70 FirstOpnd = 0;
71 NumRegs = 4;
7258 return true;
7359
7460 case ARM::VLD2LNq16:
4444 entry:
4545 ; CHECK: t2:
4646 ; CHECK: vld1.16
47 ; CHECK-NOT: vmov
48 ; CHECK: vld1.16
4749 ; CHECK: vmul.i16
48 ; CHECK-NOT: vmov
49 ; CHECK: vld1.16
5050 ; CHECK: vmul.i16
5151 ; CHECK-NOT: vmov
5252 ; CHECK: vst1.16