llvm.org GIT mirror llvm / add3956
[SLP][X86] Add lookahead reordering tests from D60897 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363925 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 3 months ago
1 changed file(s) with 235 addition(s) and 3 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
11 ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
22 ;
3 ; This checks the look-ahead operand reordering heuristic
3 ; This file tests the look-ahead operand reordering heuristic.
4 ;
5 ;
6 ; This checks that operand reordering will reorder the operands of the adds
7 ; by taking into consideration the instructions beyond the immediate
8 ; predecessors.
49 ;
510 ; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
611 ; \ / \ / \ / \ /
1015 ; | |
1116 ; S[0] S[1]
1217 ;
13 define void @test(double* %array) {
14 ; CHECK-LABEL: @test(
18 define void @lookahead_basic(double* %array) {
19 ; CHECK-LABEL: @lookahead_basic(
1520 ; CHECK-NEXT: entry:
1621 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
1722 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
7176 store double %addCDAB_1, double *%idx1, align 8
7277 ret void
7378 }
79
80
81 ; Check whether the look-ahead operand reordering heuristic will avoid
82 ; bundling the alt opcodes. The vectorized code should have no shuffles.
83 ;
84 ; A[0] B[0] A[0] B[0] A[1] A[1] A[1] B[1]
85 ; \ / \ / \ / \ /
86 ; + - - +
87 ; \ / \ /
88 ; + +
89 ; | |
90 ; S[0] S[1]
91 ;
92 define void @lookahead_alt1(double* %array) {
93 ; CHECK-LABEL: @lookahead_alt1(
94 ; CHECK-NEXT: entry:
95 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
96 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
97 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
98 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
99 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
100 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
101 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
102 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
103 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
104 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
105 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>*
106 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8
107 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
108 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
109 ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]]
110 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[IDX0]] to <2 x double>*
111 ; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8
112 ; CHECK-NEXT: ret void
113 ;
114 entry:
115 %idx0 = getelementptr inbounds double, double* %array, i64 0
116 %idx1 = getelementptr inbounds double, double* %array, i64 1
117 %idx2 = getelementptr inbounds double, double* %array, i64 2
118 %idx3 = getelementptr inbounds double, double* %array, i64 3
119 %idx4 = getelementptr inbounds double, double* %array, i64 4
120 %idx5 = getelementptr inbounds double, double* %array, i64 5
121 %idx6 = getelementptr inbounds double, double* %array, i64 6
122 %idx7 = getelementptr inbounds double, double* %array, i64 7
123
124 %A_0 = load double, double *%idx0, align 8
125 %A_1 = load double, double *%idx1, align 8
126 %B_0 = load double, double *%idx2, align 8
127 %B_1 = load double, double *%idx3, align 8
128
129 %addAB_0_L = fadd fast double %A_0, %B_0
130 %subAB_0_R = fsub fast double %A_0, %B_0
131
132 %subAB_1_L = fsub fast double %A_1, %B_1
133 %addAB_1_R = fadd fast double %A_1, %B_1
134
135 %addABCD_0 = fadd fast double %addAB_0_L, %subAB_0_R
136 %addCDAB_1 = fadd fast double %subAB_1_L, %addAB_1_R
137
138 store double %addABCD_0, double *%idx0, align 8
139 store double %addCDAB_1, double *%idx1, align 8
140 ret void
141 }
142
143
144 ; This code should get vectorized all the way to the loads with shuffles for
145 ; the alt opcodes.
146 ;
147 ; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
148 ; \ / \ / \ / \ /
149 ; + - + -
150 ; \ / \ /
151 ; + +
152 ; | |
153 ; S[0] S[1]
154 ;
155 define void @lookahead_alt2(double* %array) {
156 ; CHECK-LABEL: @lookahead_alt2(
157 ; CHECK-NEXT: entry:
158 ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
159 ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
160 ; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2
161 ; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3
162 ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4
163 ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5
164 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6
165 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7
166 ; CHECK-NEXT: [[A_0:%.*]] = load double, double* [[IDX0]], align 8
167 ; CHECK-NEXT: [[A_1:%.*]] = load double, double* [[IDX1]], align 8
168 ; CHECK-NEXT: [[B_0:%.*]] = load double, double* [[IDX2]], align 8
169 ; CHECK-NEXT: [[B_1:%.*]] = load double, double* [[IDX3]], align 8
170 ; CHECK-NEXT: [[C_0:%.*]] = load double, double* [[IDX4]], align 8
171 ; CHECK-NEXT: [[C_1:%.*]] = load double, double* [[IDX5]], align 8
172 ; CHECK-NEXT: [[D_0:%.*]] = load double, double* [[IDX6]], align 8
173 ; CHECK-NEXT: [[D_1:%.*]] = load double, double* [[IDX7]], align 8
174 ; CHECK-NEXT: [[ADDAB_0:%.*]] = fadd fast double [[A_0]], [[B_0]]
175 ; CHECK-NEXT: [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]]
176 ; CHECK-NEXT: [[ADDCD_1:%.*]] = fadd fast double [[C_1]], [[D_1]]
177 ; CHECK-NEXT: [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]]
178 ; CHECK-NEXT: [[ADDABCD_0:%.*]] = fadd fast double [[ADDAB_0]], [[SUBCD_0]]
179 ; CHECK-NEXT: [[ADDCDAB_1:%.*]] = fadd fast double [[ADDCD_1]], [[SUBAB_1]]
180 ; CHECK-NEXT: store double [[ADDABCD_0]], double* [[IDX0]], align 8
181 ; CHECK-NEXT: store double [[ADDCDAB_1]], double* [[IDX1]], align 8
182 ; CHECK-NEXT: ret void
183 ;
184 entry:
185 %idx0 = getelementptr inbounds double, double* %array, i64 0
186 %idx1 = getelementptr inbounds double, double* %array, i64 1
187 %idx2 = getelementptr inbounds double, double* %array, i64 2
188 %idx3 = getelementptr inbounds double, double* %array, i64 3
189 %idx4 = getelementptr inbounds double, double* %array, i64 4
190 %idx5 = getelementptr inbounds double, double* %array, i64 5
191 %idx6 = getelementptr inbounds double, double* %array, i64 6
192 %idx7 = getelementptr inbounds double, double* %array, i64 7
193
194 %A_0 = load double, double *%idx0, align 8
195 %A_1 = load double, double *%idx1, align 8
196 %B_0 = load double, double *%idx2, align 8
197 %B_1 = load double, double *%idx3, align 8
198 %C_0 = load double, double *%idx4, align 8
199 %C_1 = load double, double *%idx5, align 8
200 %D_0 = load double, double *%idx6, align 8
201 %D_1 = load double, double *%idx7, align 8
202
203 %addAB_0 = fadd fast double %A_0, %B_0
204 %subCD_0 = fsub fast double %C_0, %D_0
205
206 %addCD_1 = fadd fast double %C_1, %D_1
207 %subAB_1 = fsub fast double %A_1, %B_1
208
209 %addABCD_0 = fadd fast double %addAB_0, %subCD_0
210 %addCDAB_1 = fadd fast double %addCD_1, %subAB_1
211
212 store double %addABCD_0, double *%idx0, align 8
213 store double %addCDAB_1, double *%idx1, align 8
214 ret void
215 }
216
217
218 ;
219 ; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1]
220 ; \ / \ / / \ / \ /
221 ; - - U - -
222 ; \ / \ /
223 ; + +
224 ; | |
225 ; S[0] S[1]
226 ;
227 ; SLP should reorder the operands of the RHS add taking into consideration the cost of external uses.
228 ; It is more profitable to reorder the operands of the RHS add, because A[1] has an external use.
229
230 define void @lookahead_external_uses(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2) {
231 ; CHECK-LABEL: @lookahead_external_uses(
232 ; CHECK-NEXT: entry:
233 ; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0
234 ; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0
235 ; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0
236 ; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0
237 ; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
238 ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2
239 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2
240 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
241 ; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8
242 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8
243 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8
244 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
245 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
246 ; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8
247 ; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8
248 ; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8
249 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
250 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1
251 ; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]]
252 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0
253 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1
254 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0
255 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1
256 ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]]
257 ; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]]
258 ; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0
259 ; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1
260 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
261 ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8
262 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
263 ; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8
264 ; CHECK-NEXT: ret void
265 ;
266 entry:
267 %IdxA0 = getelementptr inbounds double, double* %A, i64 0
268 %IdxB0 = getelementptr inbounds double, double* %B, i64 0
269 %IdxC0 = getelementptr inbounds double, double* %C, i64 0
270 %IdxD0 = getelementptr inbounds double, double* %D, i64 0
271
272 %IdxA1 = getelementptr inbounds double, double* %A, i64 1
273 %IdxB2 = getelementptr inbounds double, double* %B, i64 2
274 %IdxA2 = getelementptr inbounds double, double* %A, i64 2
275 %IdxB1 = getelementptr inbounds double, double* %B, i64 1
276
277 %A0 = load double, double *%IdxA0, align 8
278 %B0 = load double, double *%IdxB0, align 8
279 %C0 = load double, double *%IdxC0, align 8
280 %D0 = load double, double *%IdxD0, align 8
281
282 %A1 = load double, double *%IdxA1, align 8
283 %B2 = load double, double *%IdxB2, align 8
284 %A2 = load double, double *%IdxA2, align 8
285 %B1 = load double, double *%IdxB1, align 8
286
287 %subA0B0 = fsub fast double %A0, %B0
288 %subC0D0 = fsub fast double %C0, %D0
289
290 %subA1B2 = fsub fast double %A1, %B2
291 %subA2B1 = fsub fast double %A2, %B1
292
293 %add0 = fadd fast double %subA0B0, %subC0D0
294 %add1 = fadd fast double %subA1B2, %subA2B1
295
296 %IdxS0 = getelementptr inbounds double, double* %S, i64 0
297 %IdxS1 = getelementptr inbounds double, double* %S, i64 1
298
299 store double %add0, double *%IdxS0, align 8
300 store double %add1, double *%IdxS1, align 8
301
302 ; External use
303 store double %A1, double *%Ext1, align 8
304 ret void
305 }