llvm.org GIT mirror llvm / 0eb257d
Merging r232142: ------------------------------------------------------------------------ r232142 | Hao.Liu | 2015-03-13 01:15:23 -0400 (Fri, 13 Mar 2015) | 9 lines [MachineCopyPropagation] Fix a bug causing incorrect removal for the instruction sequences as follows %Q5_Q6<def> = COPY %Q2_Q3 %D5<def> = %D3<def> = %D3<def> = COPY %D6 // Incorrectly removed in MachineCopyPropagation Using of %D3 results in incorrect result ... Reviewed in http://reviews.llvm.org/D8242 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@236065 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 5 years ago
2 changed file(s) with 104 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
7474 I != E; ++I) {
7575 unsigned MappedDef = *I;
7676 // Source of copy is no longer available for propagation.
77 if (AvailCopyMap.erase(MappedDef)) {
78 for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
79 AvailCopyMap.erase(*SR);
80 }
77 AvailCopyMap.erase(MappedDef);
78 for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
79 AvailCopyMap.erase(*SR);
8180 }
8281 }
8382 }
0 ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -verify-machineinstrs < %s | FileCheck %s
1
2 ; This file check a bug in MachineCopyPropagation pass. The last COPY will be
3 ; incorrectly removed if the machine instructions are as follows:
4 ; %Q5_Q6 = COPY %Q2_Q3
5 ; %D5 =
6 ; %D3 =
7 ; %D3 = COPY %D6
8 ; This is caused by a bug in function SourceNoLongerAvailable(), which fails to
9 ; remove the relationship of D6 and "%Q5_Q6 = COPY %Q2_Q3".
10
11 @failed = internal unnamed_addr global i1 false
12
13 ; CHECK-LABEL: foo:
14 ; CHECK: ld2
15 ; CHECK-NOT: // kill: D{{[0-9]+}} D{{[0-9]+}}
16 define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) {
17 entry:
18 %val0 = alloca [2 x i64], align 8
19 %val1 = alloca <2 x i64>, align 16
20 %vmull = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> , <2 x i32> %shuffle251)
21 %vgetq_lane = extractelement <2 x i64> %vmull, i32 0
22 %cmp = icmp eq i64 %vgetq_lane, 1
23 br i1 %cmp, label %if.end, label %if.then
24
25 if.then: ; preds = %entry
26 store i1 true, i1* @failed, align 1
27 br label %if.end
28
29 if.end: ; preds = %if.then, %entry
30 tail call void @f2()
31 %sqdmull = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> , <4 x i16> )
32 %sqadd = tail call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> zeroinitializer, <4 x i32> %sqdmull)
33 %shuffle = shufflevector <4 x i32> %sqadd, <4 x i32> undef, <2 x i32> zeroinitializer
34 %0 = mul <2 x i32> %shuffle,
35 %sub = add <2 x i32> %0,
36 %sext = sext <2 x i32> %sub to <2 x i64>
37 %vset_lane603 = shufflevector <2 x i64> %sext, <2 x i64> undef, <1 x i32> zeroinitializer
38 %t1 = bitcast [2 x i64]* %val0 to i8*
39 call void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64> zeroinitializer, <2 x i64> zeroinitializer, i64 1, i8* %t1)
40 call void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64> , <1 x i64> , i64 0, i8* %t2)
41 %vld2_lane = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> , <1 x i64> , i64 0, i8* %t2)
42 %vld2_lane.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 0
43 %vld2_lane.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane, 1
44 %vld2_lane1 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> %vld2_lane.0.extract, <1 x i64> %vld2_lane.1.extract, i64 0, i8* %t1)
45 %vld2_lane1.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 0
46 %vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1
47 %t3 = bitcast <2 x i64>* %val1 to i8*
48 call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3)
49 %t4 = load <2 x i64>* %val1, align 16
50 %vsubhn = sub <2 x i64> , %t4
51 %vsubhn1 = lshr <2 x i64> %vsubhn,
52 %vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32>
53 %neg = xor <2 x i32> %vsubhn2,
54 %sqadd1 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> , <1 x i64> )
55 %sqadd2 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %vset_lane603, <1 x i64> %sqadd1)
56 %sqadd3 = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> , <1 x i64> %sqadd2)
57 %shuffle.i = shufflevector <2 x i32> , <2 x i32> %vrsubhn_v2.i1364, <2 x i32>
58 %cmp.i = icmp uge <2 x i32> %shuffle.i, %neg
59 %sext.i = sext <2 x i1> %cmp.i to <2 x i32>
60 %vpadal = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %sext.i)
61 %t5 = sub <1 x i64> %vpadal, %sqadd3
62 %vget_lane1 = extractelement <1 x i64> %t5, i32 0
63 %cmp2 = icmp eq i64 %vget_lane1, 15
64 br i1 %cmp2, label %if.end2, label %if.then2
65
66 if.then2: ; preds = %if.end
67 store i1 true, i1* @failed, align 1
68 br label %if.end2
69
70 if.end2: ; preds = %if.then682, %if.end
71 call void @f2()
72 %vext = shufflevector <8 x i8> , <8 x i8> %vtbl1.i, <8 x i32>
73 %t6 = bitcast <8 x i8> %vext to <2 x i32>
74 call void @f0(<2 x i32> %t6)
75 ret void
76 }
77
78 declare void @f0(<2 x i32>)
79
80 declare <8 x i8> @f1()
81
82 declare void @f2()
83
84 declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>)
85
86 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i8(<2 x i64>, <2 x i64>, i64, i8* nocapture)
87
88 declare void @llvm.aarch64.neon.st2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8* nocapture)
89
90 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64>, <1 x i64>, i64, i8*)
91
92 declare void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64>, <1 x i64>, i8* nocapture)
93
94 declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>)
95
96 declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>)
97
98 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
99
100 declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>)