llvm.org GIT mirror llvm / d0848a6
Fix PR11829. PostRA LICM was too aggressive. This fixes a typo in r148589. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148724 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 8 years ago
2 changed file(s) with 109 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
461461 // register, then this is not safe. Two defs is indicated by setting a
462462 // PhysRegClobbers bit.
463463 for (const unsigned *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
464 if (PhysRegDefs.test(Reg))
465 PhysRegClobbers.set(Reg);
466 if (PhysRegClobbers.test(Reg))
464 if (PhysRegDefs.test(*AS))
465 PhysRegClobbers.set(*AS);
466 if (PhysRegClobbers.test(*AS))
467467 // MI defined register is seen defined by another instruction in
468468 // the loop, it cannot be a LICM candidate.
469469 RuledOut = true;
470 PhysRegDefs.set(Reg);
470 PhysRegDefs.set(*AS);
471471 }
472472 }
473473
0 ; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
1 ; PR11829
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
3 target triple = "armv7-none-linux-gnueabi"
4
5 define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
6 bb:
7 br i1 undef, label %bb1, label %bb2
8
9 bb1: ; preds = %bb
10 unreachable
11
12 bb2: ; preds = %bb
13 br label %bb3
14
15 bb3: ; preds = %bb4, %bb2
16 %tmp = icmp slt i32 undef, undef
17 br i1 %tmp, label %bb4, label %bb67
18
19 bb4: ; preds = %bb3
20 %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
21 %tmp6 = and <4 x i32> %tmp5,
22 %tmp7 = or <4 x i32> %tmp6,
23 %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
24 %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>)
25 %tmp10 = fmul <4 x float> undef, %tmp9
26 %tmp11 = fadd <4 x float> undef, %tmp10
27 %tmp12 = bitcast <4 x float> zeroinitializer to i128
28 %tmp13 = lshr i128 %tmp12, 64
29 %tmp14 = trunc i128 %tmp13 to i64
30 %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
31 %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
32 %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
33 %tmp18 = fmul <4 x float> %tmp17, %tmp16
34 %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
35 %tmp20 = fmul <4 x float> %tmp19, %tmp18
36 %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
37 %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
38 call arm_aapcs_vfpcc void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
39 %tmp23 = bitcast <4 x float> %tmp22 to i128
40 %tmp24 = trunc i128 %tmp23 to i64
41 %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
42 %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
43 %tmp27 = load float* undef, align 4, !tbaa !2
44 %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
45 %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
46 %tmp30 = and <4 x i32> %tmp29,
47 %tmp31 = or <4 x i32> %tmp30,
48 %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
49 %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> to i128) to i64) to i128)) to <4 x float>)
50 %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
51 %tmp35 = fmul <4 x float> %tmp34, undef
52 %tmp36 = fmul <4 x float> %tmp35, undef
53 %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
54 %tmp38 = load float* undef, align 4, !tbaa !2
55 %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
56 %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
57 %tmp41 = load float* undef, align 4, !tbaa !2
58 %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
59 %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
60 %tmp44 = fmul <4 x float> %tmp33, %tmp43
61 %tmp45 = fadd <4 x float> %tmp42, %tmp44
62 %tmp46 = fsub <4 x float> %tmp45, undef
63 %tmp47 = fmul <4 x float> %tmp46, %tmp36
64 %tmp48 = fadd <4 x float> undef, %tmp47
65 %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
66 %tmp50 = load float* undef, align 4, !tbaa !2
67 %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
68 %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
69 %tmp54 = load float* %tmp52, align 4, !tbaa !2
70 %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
71 %tmp56 = fsub <4 x float> , %tmp22
72 %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
73 %tmp58 = fmul <4 x float> undef, %tmp57
74 %tmp59 = fsub <4 x float> %tmp51, %tmp48
75 %tmp60 = fsub <4 x float> , %tmp58
76 %tmp61 = fmul <4 x float> %tmp59, %tmp60
77 %tmp62 = fadd <4 x float> %tmp48, %tmp61
78 call arm_aapcs_vfpcc void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
79 %tmp63 = bitcast <4 x float> %tmp62 to i128
80 %tmp64 = lshr i128 %tmp63, 64
81 %tmp65 = trunc i128 %tmp64 to i64
82 %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
83 call arm_aapcs_vfpcc void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
84 br label %bb3
85
86 bb67: ; preds = %bb3
87 ret void
88 }
89
90 declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
91
92 declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
93
94 declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
95
96 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
97
98 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
99
100 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
101
102 !0 = metadata !{metadata !"omnipotent char", metadata !1}
103 !1 = metadata !{metadata !"Simple C/C++ TBAA", null}
104 !2 = metadata !{metadata !"float", metadata !0}