llvm.org GIT mirror llvm / 17f42e0
Revert r163298 "Optimize codegen for VSETLNi{8,16,32} operating on Q registers." Keep the integer_insertelement test case, the new coalescer can handle this kind of lane insertion without help from pseudo-instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166835 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 7 years ago
4 changed file(s) with 20 addition(s) and 69 deletion(s). Raw diff Collapse all Expand all
12071207 ExpandLaneOp(MBBI);
12081208 return true;
12091209
1210 case ARM::VSETLNi8Q:
1211 case ARM::VSETLNi16Q: {
1212 // Expand VSETLNs acting on a Q register to equivalent VSETLNs acting
1213 // on the respective D register.
1214
1215 unsigned QReg = MI.getOperand(1).getReg();
1216 unsigned QLane = MI.getOperand(3).getImm();
1217
1218 unsigned NewOpcode, DLane, DSubReg;
1219 switch (Opcode) {
1220 default: llvm_unreachable("Invalid opcode!");
1221 case ARM::VSETLNi8Q:
1222 // 4 possible 8-bit lanes per DPR:
1223 NewOpcode = ARM::VSETLNi8;
1224 DLane = QLane % 8;
1225 DSubReg = (QLane / 8) ? ARM::dsub_1 : ARM::dsub_0;
1226 break;
1227 case ARM::VSETLNi16Q:
1228 // 4 possible 16-bit lanes per DPR.
1229 NewOpcode = ARM::VSETLNi16;
1230 DLane = QLane % 4;
1231 DSubReg = (QLane / 4) ? ARM::dsub_1 : ARM::dsub_0;
1232 break;
1233 }
1234
1235 MachineInstrBuilder MIB =
1236 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpcode));
1237
1238 unsigned DReg = TRI->getSubReg(QReg, DSubReg);
1239
1240 MIB.addReg(DReg, RegState::Define); // Output DPR
1241 MIB.addReg(DReg); // Input DPR
1242 MIB.addOperand(MI.getOperand(2)); // Input GPR
1243 MIB.addImm(DLane); // Lane
1244
1245 // Add the predicate operands.
1246 MIB.addOperand(MI.getOperand(4));
1247 MIB.addOperand(MI.getOperand(5));
1248
1249 if (MI.getOperand(1).isKill()) // Add an implicit kill for the Q register.
1250 MIB->addRegisterKilled(QReg, TRI, true);
1251 // And an implicit def of the output register (which should always be the
1252 // same as the input register).
1253 MIB->addRegisterDefined(QReg, TRI);
1254
1255 TransferImpOps(MI, MIB, MIB);
1256
1257 MI.eraseFromParent();
1258 return true;
1259 }
1260
12611210 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
12621211 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
12631212 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
51395139 GPR:$R, imm:$lane))]> {
51405140 let Inst{21} = lane{0};
51415141 }
5142
5143 def VSETLNi8Q : PseudoNeonI<(outs QPR:$V),
5144 (ins QPR:$src1, GPR:$R, VectorIndex8:$lane),
5145 IIC_VMOVISL, "",
5146 [(set QPR:$V, (vector_insert (v16i8 QPR:$src1),
5147 GPR:$R, imm:$lane))]>;
5148 def VSETLNi16Q : PseudoNeonI<(outs QPR:$V),
5149 (ins QPR:$src1, GPR:$R, VectorIndex16:$lane),
5150 IIC_VMOVISL, "",
5151 [(set QPR:$V, (vector_insert (v8i16 QPR:$src1),
5152 GPR:$R, imm:$lane))]>;
5153 }
5154
5142 }
5143 def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
5144 (v16i8 (INSERT_SUBREG QPR:$src1,
5145 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
5146 (DSubReg_i8_reg imm:$lane))),
5147 GPR:$src2, (SubReg_i8_lane imm:$lane))),
5148 (DSubReg_i8_reg imm:$lane)))>;
5149 def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
5150 (v8i16 (INSERT_SUBREG QPR:$src1,
5151 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
5152 (DSubReg_i16_reg imm:$lane))),
5153 GPR:$src2, (SubReg_i16_lane imm:$lane))),
5154 (DSubReg_i16_reg imm:$lane)))>;
51555155 def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
5156 (v4i32 (INSERT_SUBREG QPR:$src1,
5157 GPR:$src2,
5158 (SSubReg_f32_reg imm:$lane)))>;
5156 (v4i32 (INSERT_SUBREG QPR:$src1,
5157 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
5158 (DSubReg_i32_reg imm:$lane))),
5159 GPR:$src2, (SubReg_i32_lane imm:$lane))),
5160 (DSubReg_i32_reg imm:$lane)))>;
51595161
51605162 def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
51615163 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
55
66 ; CHECK: @f
77 ; CHECK-NOT: vorr d
8 ; CHECK: vmov s
8 ; CHECK: vmov.32 d
99 ; CHECK-NOT: vorr d
1010 ; CHECK: mov pc, lr
1111 define <4 x i32> @f(<4 x i32> %in) {
199199
200200 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
201201 ;CHECK: vsetQ_lane32:
202 ;CHECK: vmov s
202 ;CHECK: vmov.32 d{{.*}}[1], r1
203203 %tmp1 = load <4 x i32>* %A
204204 %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
205205 ret <4 x i32> %tmp2