llvm.org GIT mirror llvm / c5d3eed
Merging r370720 and r370721: ------------------------------------------------------------------------ r370720 | bjope | 2019-09-03 11:33:40 +0200 (Tue, 03 Sep 2019) | 13 lines [LV] Precommit test case showing miscompile from PR43166. NFC Summary: Precommit test case showing miscompile from PR43166. Reviewers: fhahn, Ayal Reviewed By: fhahn Subscribers: rkruppe, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67072 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r370721 | bjope | 2019-09-03 11:33:55 +0200 (Tue, 03 Sep 2019) | 20 lines [LV] Fix miscompiles by adding non-header PHI nodes to AllowedExit Summary: Fold-tail currently supports reduction last-vector-value live-out's, but has yet to support last-scalar-value live-outs, including non-header phi's. As it relies on AllowedExit in order to detect them and bail out we need to add the non-header PHI nodes to AllowedExit, otherwise we end up with miscompiles. Solves https://bugs.llvm.org/show_bug.cgi?id=43166 Reviewers: fhahn, Ayal Reviewed By: fhahn, Ayal Subscribers: anna, hiraditya, rkruppe, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67074 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@371044 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg a month ago
3 changed file(s) with 168 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
451451 /// Holds the widest induction type encountered.
452452 Type *WidestIndTy = nullptr;
453453
454 /// Allowed outside users. This holds the induction and reduction
455 /// vars which can be accessed from outside the loop.
454 /// Allowed outside users. This holds the variables that can be accessed from
455 /// outside the loop.
456456 SmallPtrSet AllowedExit;
457457
458458 /// Can we assume the absence of NaNs.
630630 // Unsafe cyclic dependencies with header phis are identified during
631631 // legalization for reduction, induction and first order
632632 // recurrences.
633 AllowedExit.insert(&I);
633634 continue;
634635 }
635636
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
2
3
4 ; Test cases below are reduced (and slightly modified) reproducers based on a
5 ; problem seen when compiling a C program like this:
6 ;
7 ; #include
8 ; #include
9 ;
10 ; int y = 0;
11 ; int b = 1;
12 ; int d = 1;
13 ;
14 ; int main() {
15 ; #pragma clang loop vectorize_width(4)
16 ; for (int i = 0; i < 3; ++i) {
17 ; b = (y == 0) ? d : (d / y);
18 ; }
19 ;
20 ; if (b == 1)
21 ; printf("GOOD!\n");
22 ; else
23 ; printf("BAD!\n");
24 ; }
25 ;
26 ; When compiled+executed using
27 ; build-all/bin/clang -O1 lv-bug.c && ./a.out
28 ; the result is "GOOD!"
29 ;
30 ; When compiled+executed using
31 ; build-all/bin/clang -O1 lv-bug.c -fvectorize && ./a.out
32 ; the result is "BAD!"
33
34
35 ; This test case miscompiled with clang 8.0.0 (see PR43166), now we get
36 ; loop not vectorized: Cannot fold tail by masking in the presence of live outs.
37 ; instead.
38 define i64 @test1(i64 %y) {
39 ; CHECK-LABEL: @test1(
40 ; CHECK-NEXT: entry:
41 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
42 ; CHECK: for.body:
43 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
44 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[Y:%.*]], 0
45 ; CHECK-NEXT: br i1 [[CMP]], label [[COND_END]], label [[COND_FALSE:%.*]]
46 ; CHECK: cond.false:
47 ; CHECK-NEXT: [[DIV:%.*]] = xor i64 3, [[Y]]
48 ; CHECK-NEXT: br label [[COND_END]]
49 ; CHECK: cond.end:
50 ; CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[DIV]], [[COND_FALSE]] ], [ 77, [[FOR_BODY]] ]
51 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1
52 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 3
53 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
54 ; CHECK: for.cond.cleanup:
55 ; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], [[COND_END]] ]
56 ; CHECK-NEXT: ret i64 [[COND_LCSSA]]
57 ;
58 entry:
59 br label %for.body
60
61 for.body:
62 %i = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
63 %cmp = icmp eq i64 %y, 0
64 br i1 %cmp, label %cond.end, label %cond.false
65
66 cond.false:
67 %div = xor i64 3, %y
68 br label %cond.end
69
70 cond.end:
71 %cond = phi i64 [ %div, %cond.false ], [ 77, %for.body ]
72 %inc = add nuw nsw i32 %i, 1
73 %exitcond = icmp eq i32 %inc, 3
74 br i1 %exitcond, label %for.cond.cleanup, label %for.body
75
76 for.cond.cleanup:
77 ret i64 %cond
78 }
79
80 ; This test case miscompiled with clang 8.0.0 (see PR43166), now we get
81 ; loop not vectorized: Cannot fold tail by masking in the presence of live outs.
82 ; instead.
83 define i64 @test2(i64 %y) {
84 ; CHECK-LABEL: @test2(
85 ; CHECK-NEXT: entry:
86 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
87 ; CHECK: for.body:
88 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
89 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[Y:%.*]], 0
90 ; CHECK-NEXT: br i1 [[CMP]], label [[COND_END]], label [[COND_FALSE:%.*]]
91 ; CHECK: cond.false:
92 ; CHECK-NEXT: br label [[COND_END]]
93 ; CHECK: cond.end:
94 ; CHECK-NEXT: [[COND:%.*]] = phi i64 [ 55, [[COND_FALSE]] ], [ 77, [[FOR_BODY]] ]
95 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1
96 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 3
97 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
98 ; CHECK: for.cond.cleanup:
99 ; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], [[COND_END]] ]
100 ; CHECK-NEXT: ret i64 [[COND_LCSSA]]
101 ;
102 entry:
103 br label %for.body
104
105 for.body:
106 %i = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
107 %cmp = icmp eq i64 %y, 0
108 br i1 %cmp, label %cond.end, label %cond.false
109
110 cond.false:
111 br label %cond.end
112
113 cond.end:
114 %cond = phi i64 [ 55, %cond.false ], [ 77, %for.body ]
115 %inc = add nuw nsw i32 %i, 1
116 %exitcond = icmp eq i32 %inc, 3
117 br i1 %exitcond, label %for.cond.cleanup, label %for.body
118
119 for.cond.cleanup:
120 ret i64 %cond
121 }
122
123 ; This test case miscompiled with clang 8.0.0 (see PR43166), now we get
124 ; loop not vectorized: Cannot fold tail by masking in the presence of live outs.
125 ; instead.
126 define i32 @test3(i64 %y) {
127 ; CHECK-LABEL: @test3(
128 ; CHECK-NEXT: entry:
129 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
130 ; CHECK: for.body:
131 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
132 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[Y:%.*]], 0
133 ; CHECK-NEXT: br i1 [[CMP]], label [[COND_END]], label [[COND_FALSE:%.*]]
134 ; CHECK: cond.false:
135 ; CHECK-NEXT: br label [[COND_END]]
136 ; CHECK: cond.end:
137 ; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 55, [[COND_FALSE]] ], [ [[I]], [[FOR_BODY]] ]
138 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1
139 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 3
140 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
141 ; CHECK: for.cond.cleanup:
142 ; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[COND_END]] ]
143 ; CHECK-NEXT: ret i32 [[COND_LCSSA]]
144 ;
145 entry:
146 br label %for.body
147
148 for.body:
149 %i = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
150 %cmp = icmp eq i64 %y, 0
151 br i1 %cmp, label %cond.end, label %cond.false
152
153 cond.false:
154 br label %cond.end
155
156 cond.end:
157 %cond = phi i32 [ 55, %cond.false ], [ %i, %for.body ]
158 %inc = add nuw nsw i32 %i, 1
159 %exitcond = icmp eq i32 %inc, 3
160 br i1 %exitcond, label %for.cond.cleanup, label %for.body
161
162 for.cond.cleanup:
163 ret i32 %cond
164 }