llvm.org GIT mirror llvm / 32c2bfd
ARM: implement some simple f64 materializations. Previously we used a const-pool load for virtually all 64-bit floating values. Actually, we can get quite a few common values (including 0.0, 1.0) via "vmov" instructions of one stripe or another. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188773 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
3 changed file(s) with 110 addition(s) and 13 deletion(s). Raw diff Collapse all Expand all
451451 }
452452
453453 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
454 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
454455
455456 if (Subtarget->hasNEON()) {
456457 addDRTypeForNEON(MVT::v2f32);
42704271
42714272 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
42724273 const ARMSubtarget *ST) const {
4273 if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
4274 if (!ST->hasVFP3())
42744275 return SDValue();
42754276
4277 bool IsDouble = Op.getValueType() == MVT::f64;
42764278 ConstantFPSDNode *CFP = cast(Op);
4277 assert(Op.getValueType() == MVT::f32 &&
4278 "ConstantFP custom lowering should only occur for f32.");
42794279
42804280 // Try splatting with a VMOV.f32...
42814281 APFloat FPVal = CFP->getValueAPF();
4282 int ImmVal = ARM_AM::getFP32Imm(FPVal);
4282 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
4283
42834284 if (ImmVal != -1) {
4285 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
4286 // We have code in place to select a valid ConstantFP already, no need to
4287 // do any mangling.
4288 return Op;
4289 }
4290
4291 // It's a float and we are trying to use NEON operations where
4292 // possible. Lower it to a splat followed by an extract.
42844293 SDLoc DL(Op);
42854294 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
42864295 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
42894298 DAG.getConstant(0, MVT::i32));
42904299 }
42914300
4292 // If that fails, try a VMOV.i32
4301 // The rest of our options are NEON only, make sure that's allowed before
4302 // proceeding..
4303 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
4304 return SDValue();
4305
42934306 EVT VMovVT;
4294 unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
4295 SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
4296 VMOVModImm);
4307 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
4308
4309 // It wouldn't really be worth bothering for doubles except for one very
4310 // important value, which does happen to match: 0.0. So make sure we don't do
4311 // anything stupid.
4312 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
4313 return SDValue();
4314
4315 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
4316 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
4317 false, VMOVModImm);
42974318 if (NewVal != SDValue()) {
42984319 SDLoc DL(Op);
42994320 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
43004321 NewVal);
4322 if (IsDouble)
4323 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4324
4325 // It's a float: cast and extract a vector element.
43014326 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
43024327 VecConstant);
43034328 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
43054330 }
43064331
43074332 // Finally, try a VMVN.i32
4308 NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
4309 VMVNModImm);
4333 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
4334 false, VMVNModImm);
43104335 if (NewVal != SDValue()) {
43114336 SDLoc DL(Op);
43124337 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
4338
4339 if (IsDouble)
4340 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
4341
4342 // It's a float: cast and extract a vector element.
43134343 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
43144344 VecConstant);
43154345 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
0 ; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=swift %s -o - | FileCheck %s
1 ; RUN: llc -mtriple=armv7 -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEONFP %s
2 ; RUN: llc -mtriple=armv7 -mattr=-neon -mcpu=cortex-a8 %s -o - | FileCheck --check-prefix=CHECK-NONEON %s
3
4 define arm_aapcs_vfpcc float @test_vmov_f32() {
5 ; CHECK-LABEL: test_vmov_f32:
6 ; CHECK: vmov.f32 d0, #1.0
7
8 ; CHECK-NONEONFP: vmov.f32 s0, #1.0
9 ret float 1.0
10 }
11
12 define arm_aapcs_vfpcc float @test_vmov_imm() {
13 ; CHECK-LABEL: test_vmov_imm:
14 ; CHECK: vmov.i32 d0, #0
15
16 ; CHECK-NONEON-LABEL: test_vmov_imm:
17 ; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
18 ret float 0.0
19 }
20
21 define arm_aapcs_vfpcc float @test_vmvn_imm() {
22 ; CHECK-LABEL: test_vmvn_imm:
23 ; CHECK: vmvn.i32 d0, #0xb0000000
24
25 ; CHECK-NONEON-LABEL: test_vmvn_imm:
26 ; CHECK_NONEON: vldr s0, {{.?LCPI[0-9]+_[0-9]+}}
27 ret float 8589934080.0
28 }
29
30 define arm_aapcs_vfpcc double @test_vmov_f64() {
31 ; CHECK-LABEL: test_vmov_f64:
32 ; CHECK: vmov.f64 d0, #1.0
33
34 ; CHECK-NONEON-LABEL: test_vmov_f64:
35 ; CHECK_NONEON: vmov.f64 d0, #1.0
36
37 ret double 1.0
38 }
39
40 define arm_aapcs_vfpcc double @test_vmov_double_imm() {
41 ; CHECK-LABEL: test_vmov_double_imm:
42 ; CHECK: vmov.i32 d0, #0
43
44 ; CHECK-NONEON-LABEL: test_vmov_double_imm:
45 ; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
46 ret double 0.0
47 }
48
49 define arm_aapcs_vfpcc double @test_vmvn_double_imm() {
50 ; CHECK-LABEL: test_vmvn_double_imm:
51 ; CHECK: vmvn.i32 d0, #0xb0000000
52
53 ; CHECK-NONEON-LABEL: test_vmvn_double_imm:
54 ; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
55 ret double 0x4fffffff4fffffff
56 }
57
58 ; Make sure we don't ignore the high half of 64-bit values when deciding whether
59 ; a vmov/vmvn is possible.
60 define arm_aapcs_vfpcc double @test_notvmvn_double_imm() {
61 ; CHECK-LABEL: test_notvmvn_double_imm:
62 ; CHECK: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
63
64 ; CHECK-NONEON-LABEL: test_notvmvn_double_imm:
65 ; CHECK_NONEON: vldr d0, {{.?LCPI[0-9]+_[0-9]+}}
66 ret double 0x4fffffffffffffff
67 }
238238 ; PR7157
239239 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
240240 ; CHECK-LABEL: t9:
241 ; CHECK: vldr
242 ; CHECK-NOT: vmov d{{.*}}, d16
243 ; CHECK: vmov.i32 d17
241 ; CHECK: vmov.i32 d16, #0x0
244242 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
243 ; CHECK-NEXT: vorr d17, d16, d16
245244 ; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]
246245 %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
247246 %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1]