llvm.org GIT mirror llvm / 573330f
[ARM] Do not fuse VADD and VMUL, continued (2/2) This is patch 2/2, following up on D53314, and is the functional change to prevent fusing mul + add sequences into VFMAs. Differential revision: https://reviews.llvm.org/D53315 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344683 91177308-0d34-0410-b5e6-96231b3b80d8 Sjoerd Meijer 1 year, 11 months ago
2 changed file(s) with 13 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
364364 def UseMulOps : Predicate<"Subtarget->useMulOps()">;
365365
366366 // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
367 // But only select them if more precision in FP computation is allowed.
367 // But only select them if more precision in FP computation is allowed, and when
368 // they are not slower than a mul + add sequence.
368369 // Do not use them for Darwin platforms.
369370 def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
370371 " FPOpFusion::Fast && "
371372 " Subtarget->hasVFP4()) && "
372 "!Subtarget->isTargetDarwin()">;
373 "!Subtarget->isTargetDarwin() &&"
374 "Subtarget->useFPVMLx()">;
373375
374376 def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">;
375377 def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">;
0 ; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
1 ; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m7 -fp-contract=fast | FileCheck %s
2 ; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m4 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
3 ; RUN: llc < %s -mtriple=arm-arm-eabi -mcpu=cortex-m33 -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
4
15 ; Check generated fused MAC and MLS.
26
37 define double @fusedMACTest1(double %d1, double %d2, double %d3) {
1115 define float @fusedMACTest2(float %f1, float %f2, float %f3) {
1216 ;CHECK-LABEL: fusedMACTest2:
1317 ;CHECK: vfma.f32
18
19 ;DONT-FUSE-LABEL: fusedMACTest2:
20 ;DONT-FUSE: vmul.f32
21 ;DONT-FUSE-NEXT: vadd.f32
22
1423 %1 = fmul float %f1, %f2
1524 %2 = fadd float %1, %f3
1625 ret float %2