llvm.org GIT mirror llvm / 50ee711
Merging r314251: ------------------------------------------------------------------------ r314251 | gberry | 2017-09-26 14:40:41 -0700 (Tue, 26 Sep 2017) | 16 lines [AArch64][Falkor] Fix correctness bug in falkor prefetcher fix pass and correct some opcode tag computations. Summary: This addresses a correctness bug for LD[1234]*_POST opcodes that have the prefetcher fix applied to them: the base register was not being written back from the temp after being incremented, so it would appear to never be incremented. Also, fix some opcode tag computations based on some updated HW details to get better tag avoidance and thus better prefetcher performance. Reviewers: mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls Differential Revision: https://reviews.llvm.org/D38256 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_50@314554 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 3 years ago
2 changed file(s) with 330 addition(s) and 72 deletion(s). Raw diff Collapse all Expand all
219219 default:
220220 return None;
221221
222 case AArch64::LD1i8:
223 case AArch64::LD1i16:
224 case AArch64::LD1i32:
225222 case AArch64::LD1i64:
226 case AArch64::LD2i8:
227 case AArch64::LD2i16:
228 case AArch64::LD2i32:
229223 case AArch64::LD2i64:
230 case AArch64::LD3i8:
231 case AArch64::LD3i16:
232 case AArch64::LD3i32:
233 case AArch64::LD4i8:
234 case AArch64::LD4i16:
235 case AArch64::LD4i32:
236224 DestRegIdx = 0;
237225 BaseRegIdx = 3;
238226 OffsetIdx = -1;
239227 IsPrePost = false;
240228 break;
241229
230 case AArch64::LD1i8:
231 case AArch64::LD1i16:
232 case AArch64::LD1i32:
233 case AArch64::LD2i8:
234 case AArch64::LD2i16:
235 case AArch64::LD2i32:
236 case AArch64::LD3i8:
237 case AArch64::LD3i16:
238 case AArch64::LD3i32:
242239 case AArch64::LD3i64:
240 case AArch64::LD4i8:
241 case AArch64::LD4i16:
242 case AArch64::LD4i32:
243243 case AArch64::LD4i64:
244244 DestRegIdx = -1;
245245 BaseRegIdx = 3;
263263 case AArch64::LD1Rv4s:
264264 case AArch64::LD1Rv8h:
265265 case AArch64::LD1Rv16b:
266 DestRegIdx = 0;
267 BaseRegIdx = 1;
268 OffsetIdx = -1;
269 IsPrePost = false;
270 break;
271
266272 case AArch64::LD1Twov1d:
267273 case AArch64::LD1Twov2s:
268274 case AArch64::LD1Twov4h:
269275 case AArch64::LD1Twov8b:
270 case AArch64::LD2Twov2s:
271 case AArch64::LD2Twov4s:
272 case AArch64::LD2Twov8b:
273 case AArch64::LD2Rv1d:
274 case AArch64::LD2Rv2s:
275 case AArch64::LD2Rv4s:
276 case AArch64::LD2Rv8b:
277 DestRegIdx = 0;
278 BaseRegIdx = 1;
279 OffsetIdx = -1;
280 IsPrePost = false;
281 break;
282
283276 case AArch64::LD1Twov2d:
284277 case AArch64::LD1Twov4s:
285278 case AArch64::LD1Twov8h:
300293 case AArch64::LD1Fourv4s:
301294 case AArch64::LD1Fourv8h:
302295 case AArch64::LD1Fourv16b:
296 case AArch64::LD2Twov2s:
297 case AArch64::LD2Twov4s:
298 case AArch64::LD2Twov8b:
303299 case AArch64::LD2Twov2d:
304300 case AArch64::LD2Twov4h:
305301 case AArch64::LD2Twov8h:
306302 case AArch64::LD2Twov16b:
303 case AArch64::LD2Rv1d:
304 case AArch64::LD2Rv2s:
305 case AArch64::LD2Rv4s:
306 case AArch64::LD2Rv8b:
307307 case AArch64::LD2Rv2d:
308308 case AArch64::LD2Rv4h:
309309 case AArch64::LD2Rv8h:
344344 IsPrePost = false;
345345 break;
346346
347 case AArch64::LD1i64_POST:
348 case AArch64::LD2i64_POST:
349 DestRegIdx = 1;
350 BaseRegIdx = 4;
351 OffsetIdx = 5;
352 IsPrePost = true;
353 break;
354
347355 case AArch64::LD1i8_POST:
348356 case AArch64::LD1i16_POST:
349357 case AArch64::LD1i32_POST:
350 case AArch64::LD1i64_POST:
351358 case AArch64::LD2i8_POST:
352359 case AArch64::LD2i16_POST:
353360 case AArch64::LD2i32_POST:
354 case AArch64::LD2i64_POST:
355361 case AArch64::LD3i8_POST:
356362 case AArch64::LD3i16_POST:
357363 case AArch64::LD3i32_POST:
364 case AArch64::LD3i64_POST:
358365 case AArch64::LD4i8_POST:
359366 case AArch64::LD4i16_POST:
360367 case AArch64::LD4i32_POST:
361 DestRegIdx = 1;
362 BaseRegIdx = 4;
363 OffsetIdx = 5;
364 IsPrePost = false;
365 break;
366
367 case AArch64::LD3i64_POST:
368368 case AArch64::LD4i64_POST:
369369 DestRegIdx = -1;
370370 BaseRegIdx = 4;
371371 OffsetIdx = 5;
372 IsPrePost = false;
372 IsPrePost = true;
373373 break;
374374
375375 case AArch64::LD1Onev1d_POST:
388388 case AArch64::LD1Rv4s_POST:
389389 case AArch64::LD1Rv8h_POST:
390390 case AArch64::LD1Rv16b_POST:
391 DestRegIdx = 1;
392 BaseRegIdx = 2;
393 OffsetIdx = 3;
394 IsPrePost = true;
395 break;
396
391397 case AArch64::LD1Twov1d_POST:
392398 case AArch64::LD1Twov2s_POST:
393399 case AArch64::LD1Twov4h_POST:
394400 case AArch64::LD1Twov8b_POST:
395 case AArch64::LD2Twov2s_POST:
396 case AArch64::LD2Twov4s_POST:
397 case AArch64::LD2Twov8b_POST:
398 case AArch64::LD2Rv1d_POST:
399 case AArch64::LD2Rv2s_POST:
400 case AArch64::LD2Rv4s_POST:
401 case AArch64::LD2Rv8b_POST:
402 DestRegIdx = 1;
403 BaseRegIdx = 2;
404 OffsetIdx = 3;
405 IsPrePost = false;
406 break;
407
408401 case AArch64::LD1Twov2d_POST:
409402 case AArch64::LD1Twov4s_POST:
410403 case AArch64::LD1Twov8h_POST:
425418 case AArch64::LD1Fourv4s_POST:
426419 case AArch64::LD1Fourv8h_POST:
427420 case AArch64::LD1Fourv16b_POST:
421 case AArch64::LD2Twov2s_POST:
422 case AArch64::LD2Twov4s_POST:
423 case AArch64::LD2Twov8b_POST:
428424 case AArch64::LD2Twov2d_POST:
429425 case AArch64::LD2Twov4h_POST:
430426 case AArch64::LD2Twov8h_POST:
431427 case AArch64::LD2Twov16b_POST:
428 case AArch64::LD2Rv1d_POST:
429 case AArch64::LD2Rv2s_POST:
430 case AArch64::LD2Rv4s_POST:
431 case AArch64::LD2Rv8b_POST:
432432 case AArch64::LD2Rv2d_POST:
433433 case AArch64::LD2Rv4h_POST:
434434 case AArch64::LD2Rv8h_POST:
466466 DestRegIdx = -1;
467467 BaseRegIdx = 2;
468468 OffsetIdx = 3;
469 IsPrePost = false;
469 IsPrePost = true;
470470 break;
471471
472472 case AArch64::LDRBBroW:
571571 IsPrePost = true;
572572 break;
573573
574 case AArch64::LDNPDi:
575 case AArch64::LDNPQi:
576 case AArch64::LDNPSi:
574577 case AArch64::LDPQi:
578 case AArch64::LDPDi:
579 case AArch64::LDPSi:
575580 DestRegIdx = -1;
576581 BaseRegIdx = 2;
577582 OffsetIdx = 3;
578583 IsPrePost = false;
579584 break;
580585
581 case AArch64::LDPDi:
582586 case AArch64::LDPSWi:
583 case AArch64::LDPSi:
584587 case AArch64::LDPWi:
585588 case AArch64::LDPXi:
586589 DestRegIdx = 0;
591594
592595 case AArch64::LDPQpost:
593596 case AArch64::LDPQpre:
597 case AArch64::LDPDpost:
598 case AArch64::LDPDpre:
599 case AArch64::LDPSpost:
600 case AArch64::LDPSpre:
594601 DestRegIdx = -1;
595602 BaseRegIdx = 3;
596603 OffsetIdx = 4;
597604 IsPrePost = true;
598605 break;
599606
600 case AArch64::LDPDpost:
601 case AArch64::LDPDpre:
602607 case AArch64::LDPSWpost:
603608 case AArch64::LDPSWpre:
604 case AArch64::LDPSpost:
605 case AArch64::LDPSpre:
606609 case AArch64::LDPWpost:
607610 case AArch64::LDPWpre:
608611 case AArch64::LDPXpost:
0 # RUN: llc -mtriple=aarch64-linux-gnu -mcpu=falkor -run-pass falkor-hwpf-fix-late -o - %s | FileCheck %s
1 --- |
2 @g = external global i32
3
4 define void @hwpf1() { ret void }
5 define void @hwpf2() { ret void }
6 ...
7 ---
8 # Verify that the tag collision between the loads is resolved.
1 ---
2 # Verify that the tag collision between the loads is resolved for various load opcodes.
3
94 # CHECK-LABEL: name: hwpf1
105 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
116 # CHECK: LDRWui %[[BASE]], 0
1611 bb.0:
1712 liveins: %w0, %x1
1813
19 %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
14 %w2 = LDRWui %x1, 0 :: ("aarch64-strided-access" load 4)
2015 %w2 = LDRWui %x1, 1
2116
2217 %w0 = SUBWri %w0, 1, 0
2722 RET_ReallyLR
2823 ...
2924 ---
30 # Verify that the tag collision between the loads is resolved and written back for post increment addressing.
3125 # CHECK-LABEL: name: hwpf2
3226 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
27 # CHECK: LD1i64 %q2, 0, %[[BASE]]
28 # CHECK: LDRWui %x1, 0
29 name: hwpf2
30 tracksRegLiveness: true
31 body: |
32 bb.0:
33 liveins: %w0, %x1, %q2
34
35 %q2 = LD1i64 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
36 %w2 = LDRWui %x1, 0
37
38 %w0 = SUBWri %w0, 1, 0
39 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
40 Bcc 9, %bb.0, implicit %nzcv
41
42 bb.1:
43 RET_ReallyLR
44 ...
45 ---
46 # CHECK-LABEL: name: hwpf3
47 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
48 # CHECK: LD1i8 %q2, 0, %[[BASE]]
49 # CHECK: LDRWui %x1, 0
50 name: hwpf3
51 tracksRegLiveness: true
52 body: |
53 bb.0:
54 liveins: %w0, %x1, %q2
55
56 %q2 = LD1i8 %q2, 0, %x1 :: ("aarch64-strided-access" load 4)
57 %w0 = LDRWui %x1, 0
58
59 %w0 = SUBWri %w0, 1, 0
60 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
61 Bcc 9, %bb.0, implicit %nzcv
62
63 bb.1:
64 RET_ReallyLR
65 ...
66 ---
67 # CHECK-LABEL: name: hwpf4
68 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
69 # CHECK: LD1Onev1d %[[BASE]]
70 # CHECK: LDRWui %x1, 0
71 name: hwpf4
72 tracksRegLiveness: true
73 body: |
74 bb.0:
75 liveins: %w0, %x1
76
77 %d2 = LD1Onev1d %x1 :: ("aarch64-strided-access" load 4)
78 %w2 = LDRWui %x1, 0
79
80 %w0 = SUBWri %w0, 1, 0
81 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
82 Bcc 9, %bb.0, implicit %nzcv
83
84 bb.1:
85 RET_ReallyLR
86 ...
87 ---
88 # CHECK-LABEL: name: hwpf5
89 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
90 # CHECK: LD1Twov1d %[[BASE]]
91 # CHECK: LDRWui %x1, 0
92 name: hwpf5
93 tracksRegLiveness: true
94 body: |
95 bb.0:
96 liveins: %w0, %x1
97
98 %d2_d3 = LD1Twov1d %x1 :: ("aarch64-strided-access" load 4)
99 %w0 = LDRWui %x1, 0
100
101 %w0 = SUBWri %w0, 1, 0
102 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
103 Bcc 9, %bb.0, implicit %nzcv
104
105 bb.1:
106 RET_ReallyLR
107 ...
108 ---
109 # CHECK-LABEL: name: hwpf6
110 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
111 # CHECK: LDPQi %[[BASE]]
112 # CHECK: LDRWui %x1, 3
113 name: hwpf6
114 tracksRegLiveness: true
115 body: |
116 bb.0:
117 liveins: %w0, %x1
118
119 %q2, %q3 = LDPQi %x1, 3 :: ("aarch64-strided-access" load 4)
120 %w0 = LDRWui %x1, 3
121
122 %w0 = SUBWri %w0, 1, 0
123 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
124 Bcc 9, %bb.0, implicit %nzcv
125
126 bb.1:
127 RET_ReallyLR
128 ...
129 ---
130 # CHECK-LABEL: name: hwpf7
131 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
132 # CHECK: LDPXi %[[BASE]]
133 # CHECK: LDRWui %x1, 2
134 name: hwpf7
135 tracksRegLiveness: true
136 body: |
137 bb.0:
138 liveins: %w0, %x1
139
140 %x2, %x3 = LDPXi %x1, 3 :: ("aarch64-strided-access" load 4)
141 %w2 = LDRWui %x1, 2
142
143 %w0 = SUBWri %w0, 1, 0
144 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
145 Bcc 9, %bb.0, implicit %nzcv
146
147 bb.1:
148 RET_ReallyLR
149 ...
150 ---
151 # Verify that the tag collision between the loads is resolved and written back
152 # for post increment addressing for various load opcodes.
153
154 # CHECK-LABEL: name: hwpfinc1
155 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
33156 # CHECK: LDRWpost %[[BASE]], 0
34157 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
35158 # CHECK: LDRWui %x1, 1
36 name: hwpf2
37 tracksRegLiveness: true
38 body: |
39 bb.0:
40 liveins: %w0, %x1
41
42 %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4 from @g)
159 name: hwpfinc1
160 tracksRegLiveness: true
161 body: |
162 bb.0:
163 liveins: %w0, %x1
164
165 %x1, %w2 = LDRWpost %x1, 0 :: ("aarch64-strided-access" load 4)
43166 %w2 = LDRWui %x1, 1
44167
45168 %w0 = SUBWri %w0, 1, 0
49172 bb.1:
50173 RET_ReallyLR
51174 ...
175 ---
176 # CHECK-LABEL: name: hwpfinc2
177 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
178 # CHECK: LD1i64_POST %q2, 0, %[[BASE]]
179 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
180 # CHECK: LDRWui %x1, 1
181 name: hwpfinc2
182 tracksRegLiveness: true
183 body: |
184 bb.0:
185 liveins: %w0, %x1, %q2
186
187 %x1, %q2 = LD1i64_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
188 %w2 = LDRWui %x1, 132
189
190 %w0 = SUBWri %w0, 1, 0
191 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
192 Bcc 9, %bb.0, implicit %nzcv
193
194 bb.1:
195 RET_ReallyLR
196 ...
197 ---
198 # CHECK-LABEL: name: hwpfinc3
199 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
200 # CHECK: LD1i8_POST %q2, 0, %[[BASE]]
201 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
202 # CHECK: LDRWui %x1, 132
203 name: hwpfinc3
204 tracksRegLiveness: true
205 body: |
206 bb.0:
207 liveins: %w0, %x1, %q2
208
209 %x1, %q2 = LD1i8_POST %q2, 0, %x1, %x1 :: ("aarch64-strided-access" load 4)
210 %w0 = LDRWui %x1, 132
211
212 %w0 = SUBWri %w0, 1, 0
213 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
214 Bcc 9, %bb.0, implicit %nzcv
215
216 bb.1:
217 RET_ReallyLR
218 ...
219 ---
220 # CHECK-LABEL: name: hwpfinc4
221 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
222 # CHECK: LD1Rv1d_POST %[[BASE]]
223 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
224 # CHECK: LDRWui %x1, 252
225 name: hwpfinc4
226 tracksRegLiveness: true
227 body: |
228 bb.0:
229 liveins: %w0, %x1, %q2
230
231 %x1, %d2 = LD1Rv1d_POST %x1, %xzr :: ("aarch64-strided-access" load 4)
232 %w2 = LDRWui %x1, 252
233
234 %w0 = SUBWri %w0, 1, 0
235 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
236 Bcc 9, %bb.0, implicit %nzcv
237
238 bb.1:
239 RET_ReallyLR
240 ...
241 ---
242 # CHECK-LABEL: name: hwpfinc5
243 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
244 # CHECK: LD3Threev2s_POST %[[BASE]]
245 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
246 # CHECK: LDRWroX %x17, %x0
247 name: hwpfinc5
248 tracksRegLiveness: true
249 body: |
250 bb.0:
251 liveins: %w0, %x1, %x17, %q2
252
253 %x1, %d2_d3_d4 = LD3Threev2s_POST %x1, %x0 :: ("aarch64-strided-access" load 4)
254 %w0 = LDRWroX %x17, %x0, 0, 0
255
256 %w0 = SUBWri %w0, 1, 0
257 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
258 Bcc 9, %bb.0, implicit %nzcv
259
260 bb.1:
261 RET_ReallyLR
262 ...
263 ---
264 # CHECK-LABEL: name: hwpfinc6
265 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
266 # CHECK: LDPDpost %[[BASE]]
267 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
268 # CHECK: LDRWui %x17, 2
269 name: hwpfinc6
270 tracksRegLiveness: true
271 body: |
272 bb.0:
273 liveins: %w0, %x1, %x17, %q2
274
275 %x1, %d2, %d3 = LDPDpost %x1, 3 :: ("aarch64-strided-access" load 4)
276 %w16 = LDRWui %x17, 2
277
278 %w0 = SUBWri %w0, 1, 0
279 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
280 Bcc 9, %bb.0, implicit %nzcv
281
282 bb.1:
283 RET_ReallyLR
284 ...
285 ---
286 # CHECK-LABEL: name: hwpfinc7
287 # CHECK: %[[BASE:[a-z0-9]+]] = ORRXrs %xzr, %x1, 0
288 # CHECK: LDPXpost %[[BASE]]
289 # CHECK: %x1 = ORRXrs %xzr, %[[BASE]], 0
290 # CHECK: LDRWui %x17, 2
291 name: hwpfinc7
292 tracksRegLiveness: true
293 body: |
294 bb.0:
295 liveins: %w0, %x1, %x17, %q2
296
297 %x1, %x2, %x3 = LDPXpost %x1, 3 :: ("aarch64-strided-access" load 4)
298 %w18 = LDRWui %x17, 2
299
300 %w0 = SUBWri %w0, 1, 0
301 %wzr = SUBSWri %w0, 0, 0, implicit-def %nzcv
302 Bcc 9, %bb.0, implicit %nzcv
303
304 bb.1:
305 RET_ReallyLR
306 ...