llvm.org GIT mirror llvm / 3c8ad92
Implement NEON domain switching for scalar <-> S-register vmovs on ARM git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162094 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 7 years ago
2 changed file(s) with 132 addition(s) and 50 deletion(s). Raw diff Collapse all Expand all
33533353 //
33543354 std::pair
33553355 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
3356 // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
3357 // predicated.
3356 // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
3357 // if they are not predicated.
33583358 if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
3359 return std::make_pair(ExeVFP, (1<
3360
3361 // Cortex-A9 is particularly picky about mixing the two and wants these
3362 // converted.
3363 if (Subtarget.isCortexA9() && !isPredicated(MI) &&
3364 (MI->getOpcode() == ARM::VMOVRS ||
3365 MI->getOpcode() == ARM::VMOVSR))
33593366 return std::make_pair(ExeVFP, (1<
33603367
33613368 // No other instructions can be swizzled, so just determine their domain.
33773384
33783385 void
33793386 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
3380 // We only know how to change VMOVD into VORR.
3381 assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
3382 if (Domain != ExeNEON)
3383 return;
3384
3385 // Zap the predicate operands.
3386 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
3387 MI->RemoveOperand(3);
3388 MI->RemoveOperand(2);
3389
3390 // Change to a VORRd which requires two identical use operands.
3391 MI->setDesc(get(ARM::VORRd));
3392
3393 // Add the extra source operand and new predicates.
3394 // This will go before any implicit ops.
3395 AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
3387 unsigned DstReg, SrcReg, DReg;
3388 unsigned Lane;
3389 MachineInstrBuilder MIB(MI);
3390 const TargetRegisterInfo *TRI = &getRegisterInfo();
3391 bool isKill;
3392 switch (MI->getOpcode()) {
3393 default:
3394 llvm_unreachable("cannot handle opcode!");
3395 break;
3396 case ARM::VMOVD:
3397 if (Domain != ExeNEON)
3398 break;
3399
3400 // Zap the predicate operands.
3401 assert(!isPredicated(MI) && "Cannot predicate a VORRd");
3402 MI->RemoveOperand(3);
3403 MI->RemoveOperand(2);
3404
3405 // Change to a VORRd which requires two identical use operands.
3406 MI->setDesc(get(ARM::VORRd));
3407
3408 // Add the extra source operand and new predicates.
3409 // This will go before any implicit ops.
3410 AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
3411 break;
3412 case ARM::VMOVRS:
3413 if (Domain != ExeNEON)
3414 break;
3415 assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
3416
3417 DstReg = MI->getOperand(0).getReg();
3418 SrcReg = MI->getOperand(1).getReg();
3419
3420 DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
3421 Lane = 0;
3422 if (DReg == ARM::NoRegister) {
3423 DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
3424 Lane = 1;
3425 assert(DReg && "S-register with no D super-register?");
3426 }
3427
3428 MI->RemoveOperand(3);
3429 MI->RemoveOperand(2);
3430 MI->RemoveOperand(1);
3431
3432 MI->setDesc(get(ARM::VGETLNi32));
3433 MIB.addReg(DReg);
3434 MIB.addImm(Lane);
3435
3436 MIB->getOperand(1).setIsUndef();
3437 MIB.addReg(SrcReg, RegState::Implicit);
3438
3439 AddDefaultPred(MIB);
3440 break;
3441 case ARM::VMOVSR:
3442 if (Domain != ExeNEON)
3443 break;
3444 assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
3445
3446 DstReg = MI->getOperand(0).getReg();
3447 SrcReg = MI->getOperand(1).getReg();
3448 DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
3449 Lane = 0;
3450 if (DReg == ARM::NoRegister) {
3451 DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
3452 Lane = 1;
3453 assert(DReg && "S-register with no D super-register?");
3454 }
3455 isKill = MI->getOperand(0).isKill();
3456
3457 MI->RemoveOperand(3);
3458 MI->RemoveOperand(2);
3459 MI->RemoveOperand(1);
3460 MI->RemoveOperand(0);
3461
3462 MI->setDesc(get(ARM::VSETLNi32));
3463 MIB.addReg(DReg);
3464 MIB.addReg(DReg);
3465 MIB.addReg(SrcReg);
3466 MIB.addImm(Lane);
3467
3468 MIB->getOperand(1).setIsUndef();
3469
3470 if (isKill)
3471 MIB->addRegisterKilled(DstReg, TRI, true);
3472 MIB->addRegisterDefined(DstReg, TRI);
3473
3474 AddDefaultPred(MIB);
3475 break;
3476 }
3477
33963478 }
33973479
33983480 bool ARMBaseInstrInfo::hasNOP() const {
3232 ; CHECK: movt [[reg0]], :upper16:{{.*}}
3333 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
3434
35 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
35 ; CHECK: {{[mov|vmov.32]}} r0,
3636 ; CHECK: bl {{.*}}cosf
3737
38 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
38 ; CHECK: {{[mov|vmov.32]}} r0,
3939 ; CHECK: bl {{.*}}cosf
4040
41 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
41 ; CHECK: {{[mov|vmov.32]}} r0,
4242 ; CHECK: bl {{.*}}cosf
4343
44 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
44 ; CHECK: {{[mov|vmov.32]}} r0,
4545 ; CHECK: bl {{.*}}cosf
4646
4747 ; CHECK: vstmia {{.*}}
6363 ; CHECK: movt [[reg0]], :upper16:{{.*}}
6464 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
6565
66 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
66 ; CHECK: {{[mov|vmov.32]}} r0,
6767 ; CHECK: bl {{.*}}expf
6868
69 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
69 ; CHECK: {{[mov|vmov.32]}} r0,
7070 ; CHECK: bl {{.*}}expf
7171
72 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
72 ; CHECK: {{[mov|vmov.32]}} r0,
7373 ; CHECK: bl {{.*}}expf
7474
75 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
75 ; CHECK: {{[mov|vmov.32]}} r0,
7676 ; CHECK: bl {{.*}}expf
7777
7878 ; CHECK: vstmia {{.*}}
9494 ; CHECK: movt [[reg0]], :upper16:{{.*}}
9595 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
9696
97 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
97 ; CHECK: {{[mov|vmov.32]}} r0,
9898 ; CHECK: bl {{.*}}exp2f
9999
100 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
100 ; CHECK: {{[mov|vmov.32]}} r0,
101101 ; CHECK: bl {{.*}}exp2f
102102
103 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
103 ; CHECK: {{[mov|vmov.32]}} r0,
104104 ; CHECK: bl {{.*}}exp2f
105105
106 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
106 ; CHECK: {{[mov|vmov.32]}} r0,
107107 ; CHECK: bl {{.*}}exp2f
108108
109109 ; CHECK: vstmia {{.*}}
125125 ; CHECK: movt [[reg0]], :upper16:{{.*}}
126126 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
127127
128 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
128 ; CHECK: {{[mov|vmov.32]}} r0,
129129 ; CHECK: bl {{.*}}log10f
130130
131 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
131 ; CHECK: {{[mov|vmov.32]}} r0,
132132 ; CHECK: bl {{.*}}log10f
133133
134 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
134 ; CHECK: {{[mov|vmov.32]}} r0,
135135 ; CHECK: bl {{.*}}log10f
136136
137 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
137 ; CHECK: {{[mov|vmov.32]}} r0,
138138 ; CHECK: bl {{.*}}log10f
139139
140140 ; CHECK: vstmia {{.*}}
156156 ; CHECK: movt [[reg0]], :upper16:{{.*}}
157157 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
158158
159 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
159 ; CHECK: {{[mov|vmov.32]}} r0,
160160 ; CHECK: bl {{.*}}logf
161161
162 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
162 ; CHECK: {{[mov|vmov.32]}} r0,
163163 ; CHECK: bl {{.*}}logf
164164
165 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
165 ; CHECK: {{[mov|vmov.32]}} r0,
166166 ; CHECK: bl {{.*}}logf
167167
168 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
168 ; CHECK: {{[mov|vmov.32]}} r0,
169169 ; CHECK: bl {{.*}}logf
170170
171171 ; CHECK: vstmia {{.*}}
187187 ; CHECK: movt [[reg0]], :upper16:{{.*}}
188188 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
189189
190 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
190 ; CHECK: {{[mov|vmov.32]}} r0,
191191 ; CHECK: bl {{.*}}log2f
192192
193 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
193 ; CHECK: {{[mov|vmov.32]}} r0,
194194 ; CHECK: bl {{.*}}log2f
195195
196 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
196 ; CHECK: {{[mov|vmov.32]}} r0,
197197 ; CHECK: bl {{.*}}log2f
198198
199 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
199 ; CHECK: {{[mov|vmov.32]}} r0,
200200 ; CHECK: bl {{.*}}log2f
201201
202202 ; CHECK: vstmia {{.*}}
219219 ; CHECK: movt [[reg0]], :upper16:{{.*}}
220220 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
221221
222 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
222 ; CHECK: {{[mov|vmov.32]}} r0,
223223 ; CHECK: bl {{.*}}powf
224224
225 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
225 ; CHECK: {{[mov|vmov.32]}} r0,
226226 ; CHECK: bl {{.*}}powf
227227
228 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
228 ; CHECK: {{[mov|vmov.32]}} r0,
229229 ; CHECK: bl {{.*}}powf
230230
231 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
231 ; CHECK: {{[mov|vmov.32]}} r0,
232232 ; CHECK: bl {{.*}}powf
233233
234234 ; CHECK: vstmia {{.*}}
276276 ; CHECK: movt [[reg0]], :upper16:{{.*}}
277277 ; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
278278
279 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
279 ; CHECK: {{[mov|vmov.32]}} r0,
280280 ; CHECK: bl {{.*}}sinf
281281
282 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
282 ; CHECK: {{[mov|vmov.32]}} r0,
283283 ; CHECK: bl {{.*}}sinf
284284
285 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
285 ; CHECK: {{[mov|vmov.32]}} r0,
286286 ; CHECK: bl {{.*}}sinf
287287
288 ; CHECK: {{[v]?mov}} r0, {{[r|s][0-9]+}}
288 ; CHECK: {{[mov|vmov.32]}} r0,
289289 ; CHECK: bl {{.*}}sinf
290290
291291 ; CHECK: vstmia {{.*}}