llvm.org GIT mirror llvm / c05af7a
[X86] Regenerate merge store tests. NFCI. Gives us a much better idea of what is going on than just relying on a few checks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310780 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 2 years ago
1 changed file(s) with 333 addition(s) and 106 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s
12 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s
23
34 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
45 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
56
7 ; save 1,2,3 ... as one big integer.
8 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
69 ; CHECK-LABEL: merge_const_store:
7 ; save 1,2,3 ... as one big integer.
8 ; CHECK: movabsq $578437695752307201
9 ; CHECK: ret
10 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
10 ; CHECK: # BB#0:
11 ; CHECK-NEXT: testl %edi, %edi
12 ; CHECK-NEXT: jle .LBB0_3
13 ; CHECK-NEXT: # BB#1: # %.lr.ph.preheader
14 ; CHECK-NEXT: movabsq $578437695752307201, %rax # imm = 0x807060504030201
15 ; CHECK-NEXT: .p2align 4, 0x90
16 ; CHECK-NEXT: .LBB0_2: # %.lr.ph
17 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
18 ; CHECK-NEXT: movq %rax, (%rsi)
19 ; CHECK-NEXT: addq $8, %rsi
20 ; CHECK-NEXT: decl %edi
21 ; CHECK-NEXT: jne .LBB0_2
22 ; CHECK-NEXT: .LBB0_3: # %._crit_edge
23 ; CHECK-NEXT: retq
1124 %1 = icmp sgt i32 %count, 0
1225 br i1 %1, label %.lr.ph, label %._crit_edge
1326 .lr.ph:
3851 }
3952
4053 ; No vectors because we use noimplicitfloat
54 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
4155 ; CHECK-LABEL: merge_const_store_no_vec:
42 ; CHECK-NOT: vmovups
43 ; CHECK: ret
44 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
56 ; CHECK: # BB#0:
57 ; CHECK-NEXT: testl %edi, %edi
58 ; CHECK-NEXT: jle .LBB1_2
59 ; CHECK-NEXT: .p2align 4, 0x90
60 ; CHECK-NEXT: .LBB1_1: # %.lr.ph
61 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
62 ; CHECK-NEXT: movq $0, (%rsi)
63 ; CHECK-NEXT: movq $0, 8(%rsi)
64 ; CHECK-NEXT: movq $0, 16(%rsi)
65 ; CHECK-NEXT: movq $0, 24(%rsi)
66 ; CHECK-NEXT: addq $32, %rsi
67 ; CHECK-NEXT: decl %edi
68 ; CHECK-NEXT: jne .LBB1_1
69 ; CHECK-NEXT: .LBB1_2: # %._crit_edge
70 ; CHECK-NEXT: retq
4571 %1 = icmp sgt i32 %count, 0
4672 br i1 %1, label %.lr.ph, label %._crit_edge
4773 .lr.ph:
7298 }
7399
74100 ; Move the constants using a single vector store.
101 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
75102 ; CHECK-LABEL: merge_const_store_vec:
76 ; CHECK: vmovups
77 ; CHECK: ret
78 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
103 ; CHECK: # BB#0:
104 ; CHECK-NEXT: testl %edi, %edi
105 ; CHECK-NEXT: jle .LBB2_3
106 ; CHECK-NEXT: # BB#1: # %.lr.ph.preheader
107 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
108 ; CHECK-NEXT: .p2align 4, 0x90
109 ; CHECK-NEXT: .LBB2_2: # %.lr.ph
110 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
111 ; CHECK-NEXT: vmovups %ymm0, (%rsi)
112 ; CHECK-NEXT: addq $32, %rsi
113 ; CHECK-NEXT: decl %edi
114 ; CHECK-NEXT: jne .LBB2_2
115 ; CHECK-NEXT: .LBB2_3: # %._crit_edge
116 ; CHECK-NEXT: vzeroupper
117 ; CHECK-NEXT: retq
79118 %1 = icmp sgt i32 %count, 0
80119 br i1 %1, label %.lr.ph, label %._crit_edge
81120 .lr.ph:
106145 }
107146
108147 ; Move the first 4 constants as a single vector. Move the rest as scalars.
148 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
109149 ; CHECK-LABEL: merge_nonconst_store:
110 ; CHECK: movl $67305985
111 ; CHECK: movb
112 ; CHECK: movw
113 ; CHECK: movb
114 ; CHECK: ret
115 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
150 ; CHECK: # BB#0:
151 ; CHECK-NEXT: testl %edi, %edi
152 ; CHECK-NEXT: jle .LBB3_2
153 ; CHECK-NEXT: .p2align 4, 0x90
154 ; CHECK-NEXT: .LBB3_1: # %.lr.ph
155 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
156 ; CHECK-NEXT: movl $67305985, (%rdx) # imm = 0x4030201
157 ; CHECK-NEXT: movb %sil, 4(%rdx)
158 ; CHECK-NEXT: movw $1798, 5(%rdx) # imm = 0x706
159 ; CHECK-NEXT: movb $8, 7(%rdx)
160 ; CHECK-NEXT: addq $8, %rdx
161 ; CHECK-NEXT: decl %edi
162 ; CHECK-NEXT: jne .LBB3_1
163 ; CHECK-NEXT: .LBB3_2: # %._crit_edge
164 ; CHECK-NEXT: retq
116165 %1 = icmp sgt i32 %count, 0
117166 br i1 %1, label %.lr.ph, label %._crit_edge
118167 .lr.ph:
142191 ret void
143192 }
144193
145
146 ; CHECK-LABEL: merge_loads_i16:
147 ; load:
148 ; BWON: movzwl
149 ; BWOFF: movw
150 ; store:
151 ; CHECK: movw
152 ; CHECK: ret
153194 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
195 ; BWON-LABEL: merge_loads_i16:
196 ; BWON: # BB#0:
197 ; BWON-NEXT: testl %edi, %edi
198 ; BWON-NEXT: jle .LBB4_2
199 ; BWON-NEXT: .p2align 4, 0x90
200 ; BWON-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
201 ; BWON-NEXT: movzwl (%rsi), %eax
202 ; BWON-NEXT: movw %ax, (%rdx)
203 ; BWON-NEXT: addq $8, %rdx
204 ; BWON-NEXT: decl %edi
205 ; BWON-NEXT: jne .LBB4_1
206 ; BWON-NEXT: .LBB4_2: # %._crit_edge
207 ; BWON-NEXT: retq
208 ;
209 ; BWOFF-LABEL: merge_loads_i16:
210 ; BWOFF: # BB#0:
211 ; BWOFF-NEXT: testl %edi, %edi
212 ; BWOFF-NEXT: jle .LBB4_2
213 ; BWOFF-NEXT: .p2align 4, 0x90
214 ; BWOFF-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
215 ; BWOFF-NEXT: movw (%rsi), %ax
216 ; BWOFF-NEXT: movw %ax, (%rdx)
217 ; BWOFF-NEXT: addq $8, %rdx
218 ; BWOFF-NEXT: decl %edi
219 ; BWOFF-NEXT: jne .LBB4_1
220 ; BWOFF-NEXT: .LBB4_2: # %._crit_edge
221 ; BWOFF-NEXT: retq
154222 %1 = icmp sgt i32 %count, 0
155223 br i1 %1, label %.lr.ph, label %._crit_edge
156224
178246 }
179247
180248 ; The loads and the stores are interleaved. Can't merge them.
181 ; CHECK-LABEL: no_merge_loads:
182 ; BWON: movzbl
183 ; BWOFF: movb
184 ; CHECK: movb
185 ; BWON: movzbl
186 ; BWOFF: movb
187 ; CHECK: movb
188 ; CHECK: ret
189249 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
250 ; BWON-LABEL: no_merge_loads:
251 ; BWON: # BB#0:
252 ; BWON-NEXT: testl %edi, %edi
253 ; BWON-NEXT: jle .LBB5_2
254 ; BWON-NEXT: .p2align 4, 0x90
255 ; BWON-NEXT: .LBB5_1: # %a4
256 ; BWON-NEXT: # =>This Inner Loop Header: Depth=1
257 ; BWON-NEXT: movzbl (%rsi), %eax
258 ; BWON-NEXT: movb %al, (%rdx)
259 ; BWON-NEXT: movzbl 1(%rsi), %eax
260 ; BWON-NEXT: movb %al, 1(%rdx)
261 ; BWON-NEXT: addq $8, %rdx
262 ; BWON-NEXT: decl %edi
263 ; BWON-NEXT: jne .LBB5_1
264 ; BWON-NEXT: .LBB5_2: # %._crit_edge
265 ; BWON-NEXT: retq
266 ;
267 ; BWOFF-LABEL: no_merge_loads:
268 ; BWOFF: # BB#0:
269 ; BWOFF-NEXT: testl %edi, %edi
270 ; BWOFF-NEXT: jle .LBB5_2
271 ; BWOFF-NEXT: .p2align 4, 0x90
272 ; BWOFF-NEXT: .LBB5_1: # %a4
273 ; BWOFF-NEXT: # =>This Inner Loop Header: Depth=1
274 ; BWOFF-NEXT: movb (%rsi), %al
275 ; BWOFF-NEXT: movb %al, (%rdx)
276 ; BWOFF-NEXT: movb 1(%rsi), %al
277 ; BWOFF-NEXT: movb %al, 1(%rdx)
278 ; BWOFF-NEXT: addq $8, %rdx
279 ; BWOFF-NEXT: decl %edi
280 ; BWOFF-NEXT: jne .LBB5_1
281 ; BWOFF-NEXT: .LBB5_2: # %._crit_edge
282 ; BWOFF-NEXT: retq
190283 %1 = icmp sgt i32 %count, 0
191284 br i1 %1, label %.lr.ph, label %._crit_edge
192285
213306 ret void
214307 }
215308
216
309 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
217310 ; CHECK-LABEL: merge_loads_integer:
218 ; load:
219 ; CHECK: movq
220 ; store:
221 ; CHECK: movq
222 ; CHECK: ret
223 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
311 ; CHECK: # BB#0:
312 ; CHECK-NEXT: testl %edi, %edi
313 ; CHECK-NEXT: jle .LBB6_2
314 ; CHECK-NEXT: .p2align 4, 0x90
315 ; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
316 ; CHECK-NEXT: movq (%rsi), %rax
317 ; CHECK-NEXT: movq %rax, (%rdx)
318 ; CHECK-NEXT: addq $32, %rdx
319 ; CHECK-NEXT: decl %edi
320 ; CHECK-NEXT: jne .LBB6_1
321 ; CHECK-NEXT: .LBB6_2: # %._crit_edge
322 ; CHECK-NEXT: retq
224323 %1 = icmp sgt i32 %count, 0
225324 br i1 %1, label %.lr.ph, label %._crit_edge
226325
247346 ret void
248347 }
249348
250
349 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
251350 ; CHECK-LABEL: merge_loads_vector:
252 ; load:
253 ; CHECK: movups
254 ; store:
255 ; CHECK: movups
256 ; CHECK: ret
257 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
351 ; CHECK: # BB#0:
352 ; CHECK-NEXT: testl %edi, %edi
353 ; CHECK-NEXT: jle .LBB7_2
354 ; CHECK-NEXT: .p2align 4, 0x90
355 ; CHECK-NEXT: .LBB7_1: # %block4
356 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
357 ; CHECK-NEXT: vmovups (%rsi), %xmm0
358 ; CHECK-NEXT: vmovups %xmm0, (%rdx)
359 ; CHECK-NEXT: addq $32, %rdx
360 ; CHECK-NEXT: decl %edi
361 ; CHECK-NEXT: jne .LBB7_1
362 ; CHECK-NEXT: .LBB7_2: # %._crit_edge
363 ; CHECK-NEXT: retq
258364 %a1 = icmp sgt i32 %count, 0
259365 br i1 %a1, label %.lr.ph, label %._crit_edge
260366
289395 ret void
290396 }
291397
292 ;; On x86, even unaligned copies can be merged to vector ops.
398 ; On x86, even unaligned copies can be merged to vector ops.
399 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
293400 ; CHECK-LABEL: merge_loads_no_align:
294 ; load:
295 ; CHECK: vmovups
296 ; store:
297 ; CHECK: vmovups
298 ; CHECK: ret
299 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
401 ; CHECK: # BB#0:
402 ; CHECK-NEXT: testl %edi, %edi
403 ; CHECK-NEXT: jle .LBB8_2
404 ; CHECK-NEXT: .p2align 4, 0x90
405 ; CHECK-NEXT: .LBB8_1: # %block4
406 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
407 ; CHECK-NEXT: vmovups (%rsi), %xmm0
408 ; CHECK-NEXT: vmovups %xmm0, (%rdx)
409 ; CHECK-NEXT: addq $32, %rdx
410 ; CHECK-NEXT: decl %edi
411 ; CHECK-NEXT: jne .LBB8_1
412 ; CHECK-NEXT: .LBB8_2: # %._crit_edge
413 ; CHECK-NEXT: retq
300414 %a1 = icmp sgt i32 %count, 0
301415 br i1 %a1, label %.lr.ph, label %._crit_edge
302416
333447
334448 ; Make sure that we merge the consecutive load/store sequence below and use a
335449 ; word (16 bit) instead of a byte copy.
336 ; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
337 ; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
338 ; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
339 ; CHECK: movw %[[REG]], (%{{.*}})
340450 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
451 ; BWON-LABEL: MergeLoadStoreBaseIndexOffset:
452 ; BWON: # BB#0:
453 ; BWON-NEXT: movl %ecx, %r8d
454 ; BWON-NEXT: xorl %ecx, %ecx
455 ; BWON-NEXT: .p2align 4, 0x90
456 ; BWON-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
457 ; BWON-NEXT: movq (%rdi,%rcx,8), %rax
458 ; BWON-NEXT: movzwl (%rdx,%rax), %eax
459 ; BWON-NEXT: movw %ax, (%rsi,%rcx,2)
460 ; BWON-NEXT: incq %rcx
461 ; BWON-NEXT: cmpl %ecx, %r8d
462 ; BWON-NEXT: jne .LBB9_1
463 ; BWON-NEXT: # BB#2:
464 ; BWON-NEXT: retq
465 ;
466 ; BWOFF-LABEL: MergeLoadStoreBaseIndexOffset:
467 ; BWOFF: # BB#0:
468 ; BWOFF-NEXT: movl %ecx, %r8d
469 ; BWOFF-NEXT: xorl %ecx, %ecx
470 ; BWOFF-NEXT: .p2align 4, 0x90
471 ; BWOFF-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
472 ; BWOFF-NEXT: movq (%rdi,%rcx,8), %rax
473 ; BWOFF-NEXT: movw (%rdx,%rax), %ax
474 ; BWOFF-NEXT: movw %ax, (%rsi,%rcx,2)
475 ; BWOFF-NEXT: incq %rcx
476 ; BWOFF-NEXT: cmpl %ecx, %r8d
477 ; BWOFF-NEXT: jne .LBB9_1
478 ; BWOFF-NEXT: # BB#2:
479 ; BWOFF-NEXT: retq
341480 br label %1
342481
343482 ;
365504
366505 ; Make sure that we merge the consecutive load/store sequence below and use a
367506 ; word (16 bit) instead of a byte copy for complicated address calculation.
368 ; .
369 ; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetComplicated:
370 ; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
371 ; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
372 ; CHECK: movw %[[REG]], (%{{.*}})
373507 define void @MergeLoadStoreBaseIndexOffsetComplicated(i8* %a, i8* %b, i8* %c, i64 %n) {
508 ; BWON-LABEL: MergeLoadStoreBaseIndexOffsetComplicated:
509 ; BWON: # BB#0:
510 ; BWON-NEXT: xorl %r8d, %r8d
511 ; BWON-NEXT: .p2align 4, 0x90
512 ; BWON-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
513 ; BWON-NEXT: movsbq (%rsi), %rax
514 ; BWON-NEXT: movzwl (%rdx,%rax), %eax
515 ; BWON-NEXT: movw %ax, (%rdi,%r8)
516 ; BWON-NEXT: incq %rsi
517 ; BWON-NEXT: addq $2, %r8
518 ; BWON-NEXT: cmpq %rcx, %r8
519 ; BWON-NEXT: jl .LBB10_1
520 ; BWON-NEXT: # BB#2:
521 ; BWON-NEXT: retq
522 ;
523 ; BWOFF-LABEL: MergeLoadStoreBaseIndexOffsetComplicated:
524 ; BWOFF: # BB#0:
525 ; BWOFF-NEXT: xorl %r8d, %r8d
526 ; BWOFF-NEXT: .p2align 4, 0x90
527 ; BWOFF-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
528 ; BWOFF-NEXT: movsbq (%rsi), %rax
529 ; BWOFF-NEXT: movw (%rdx,%rax), %ax
530 ; BWOFF-NEXT: movw %ax, (%rdi,%r8)
531 ; BWOFF-NEXT: incq %rsi
532 ; BWOFF-NEXT: addq $2, %r8
533 ; BWOFF-NEXT: cmpq %rcx, %r8
534 ; BWOFF-NEXT: jl .LBB10_1
535 ; BWOFF-NEXT: # BB#2:
536 ; BWOFF-NEXT: retq
374537 br label %1
375538
376539 ;
400563 ; Make sure that we merge the consecutive load/store sequence below and use a
401564 ; word (16 bit) instead of a byte copy even if there are intermediate sign
402565 ; extensions.
403 ; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
404 ; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]]
405 ; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]]
406 ; CHECK: movw %[[REG]], (%{{.*}})
407566 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
567 ; BWON-LABEL: MergeLoadStoreBaseIndexOffsetSext:
568 ; BWON: # BB#0:
569 ; BWON-NEXT: movl %ecx, %r8d
570 ; BWON-NEXT: xorl %ecx, %ecx
571 ; BWON-NEXT: .p2align 4, 0x90
572 ; BWON-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
573 ; BWON-NEXT: movsbq (%rdi,%rcx), %rax
574 ; BWON-NEXT: movzwl (%rdx,%rax), %eax
575 ; BWON-NEXT: movw %ax, (%rsi,%rcx,2)
576 ; BWON-NEXT: incq %rcx
577 ; BWON-NEXT: cmpl %ecx, %r8d
578 ; BWON-NEXT: jne .LBB11_1
579 ; BWON-NEXT: # BB#2:
580 ; BWON-NEXT: retq
581 ;
582 ; BWOFF-LABEL: MergeLoadStoreBaseIndexOffsetSext:
583 ; BWOFF: # BB#0:
584 ; BWOFF-NEXT: movl %ecx, %r8d
585 ; BWOFF-NEXT: xorl %ecx, %ecx
586 ; BWOFF-NEXT: .p2align 4, 0x90
587 ; BWOFF-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
588 ; BWOFF-NEXT: movsbq (%rdi,%rcx), %rax
589 ; BWOFF-NEXT: movw (%rdx,%rax), %ax
590 ; BWOFF-NEXT: movw %ax, (%rsi,%rcx,2)
591 ; BWOFF-NEXT: incq %rcx
592 ; BWOFF-NEXT: cmpl %ecx, %r8d
593 ; BWOFF-NEXT: jne .LBB11_1
594 ; BWOFF-NEXT: # BB#2:
595 ; BWOFF-NEXT: retq
408596 br label %1
409597
410598 ;
433621
434622 ; However, we can only merge ignore sign extensions when they are on all memory
435623 ; computations;
436 ; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex:
437 ; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
438 ; CHECK-NOT: movw [[REG]], (%{{.*}})
439624 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
625 ; BWON-LABEL: loadStoreBaseIndexOffsetSextNoSex:
626 ; BWON: # BB#0:
627 ; BWON-NEXT: movl %ecx, %r8d
628 ; BWON-NEXT: xorl %ecx, %ecx
629 ; BWON-NEXT: .p2align 4, 0x90
630 ; BWON-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
631 ; BWON-NEXT: movsbq (%rdi,%rcx), %rax
632 ; BWON-NEXT: movzbl (%rdx,%rax), %r9d
633 ; BWON-NEXT: incb %al
634 ; BWON-NEXT: movsbq %al, %rax
635 ; BWON-NEXT: movzbl (%rdx,%rax), %eax
636 ; BWON-NEXT: movb %r9b, (%rsi,%rcx,2)
637 ; BWON-NEXT: movb %al, 1(%rsi,%rcx,2)
638 ; BWON-NEXT: incq %rcx
639 ; BWON-NEXT: cmpl %ecx, %r8d
640 ; BWON-NEXT: jne .LBB12_1
641 ; BWON-NEXT: # BB#2:
642 ; BWON-NEXT: retq
643 ;
644 ; BWOFF-LABEL: loadStoreBaseIndexOffsetSextNoSex:
645 ; BWOFF: # BB#0:
646 ; BWOFF-NEXT: movl %ecx, %r8d
647 ; BWOFF-NEXT: xorl %ecx, %ecx
648 ; BWOFF-NEXT: .p2align 4, 0x90
649 ; BWOFF-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
650 ; BWOFF-NEXT: movsbq (%rdi,%rcx), %rax
651 ; BWOFF-NEXT: movb (%rdx,%rax), %r9b
652 ; BWOFF-NEXT: incb %al
653 ; BWOFF-NEXT: movsbq %al, %rax
654 ; BWOFF-NEXT: movb (%rdx,%rax), %al
655 ; BWOFF-NEXT: movb %r9b, (%rsi,%rcx,2)
656 ; BWOFF-NEXT: movb %al, 1(%rsi,%rcx,2)
657 ; BWOFF-NEXT: incq %rcx
658 ; BWOFF-NEXT: cmpl %ecx, %r8d
659 ; BWOFF-NEXT: jne .LBB12_1
660 ; BWOFF-NEXT: # BB#2:
661 ; BWOFF-NEXT: retq
440662 br label %1
441663
442664 ;
466688
467689 ; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
468690 define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
691 ; CHECK-LABEL: merge_vec_element_store:
692 ; CHECK: # BB#0:
693 ; CHECK-NEXT: vmovups %ymm0, (%rdi)
694 ; CHECK-NEXT: vzeroupper
695 ; CHECK-NEXT: retq
469696 %vecext0 = extractelement <8 x float> %v, i32 0
470697 %vecext1 = extractelement <8 x float> %v, i32 1
471698 %vecext2 = extractelement <8 x float> %v, i32 2
491718 store float %vecext7, float* %arrayidx7, align 4
492719 ret void
493720
494 ; CHECK-LABEL: merge_vec_element_store
495 ; CHECK: vmovups %ymm0, (%rdi)
496 ; CHECK: vzeroupper
497 ; CHECK: retq
498721 }
499722
500723 ; PR21711 - Merge vector stores into wider vector stores.
501724 ; These should be merged into 32-byte stores.
502725 define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x float>* %ptr) {
726 ; CHECK-LABEL: merge_vec_extract_stores:
727 ; CHECK: # BB#0:
728 ; CHECK-NEXT: vmovups %ymm0, 48(%rdi)
729 ; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
730 ; CHECK-NEXT: vzeroupper
731 ; CHECK-NEXT: retq
503732 %idx0 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
504733 %idx1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
505734 %idx2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
514743 store <4 x float> %shuffle3, <4 x float>* %idx3, align 16
515744 ret void
516745
517 ; CHECK-LABEL: merge_vec_extract_stores
518 ; CHECK: vmovups %ymm0, 48(%rdi)
519 ; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
520 ; CHECK-NEXT: vzeroupper
521 ; CHECK-NEXT: retq
522746 }
523747
524748 ; Merging vector stores when sourced from vector loads.
525749 define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
750 ; CHECK-LABEL: merge_vec_stores_from_loads:
751 ; CHECK: # BB#0:
752 ; CHECK-NEXT: vmovups (%rdi), %ymm0
753 ; CHECK-NEXT: vmovups %ymm0, (%rsi)
754 ; CHECK-NEXT: vzeroupper
755 ; CHECK-NEXT: retq
526756 %load_idx0 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 0
527757 %load_idx1 = getelementptr inbounds <4 x float>, <4 x float>* %v, i64 1
528758 %v0 = load <4 x float>, <4 x float>* %load_idx0
533763 store <4 x float> %v1, <4 x float>* %store_idx1, align 16
534764 ret void
535765
536 ; CHECK-LABEL: merge_vec_stores_from_loads
537 ; CHECK: vmovups (%rdi), %ymm0
538 ; CHECK-NEXT: vmovups %ymm0, (%rsi)
539 ; CHECK-NEXT: vzeroupper
540 ; CHECK-NEXT: retq
541 }
542
543 ; Merging vector stores when sourced from a constant vector is not currently handled.
766 }
767
768 ; Merging vector stores when sourced from a constant vector is not currently handled.
544769 define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
770 ; CHECK-LABEL: merge_vec_stores_of_constants:
771 ; CHECK: # BB#0:
772 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
773 ; CHECK-NEXT: vmovaps %xmm0, 48(%rdi)
774 ; CHECK-NEXT: vmovaps %xmm0, 64(%rdi)
775 ; CHECK-NEXT: retq
545776 %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
546777 %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
547778 store <4 x i32> , <4 x i32>* %idx0, align 16
548779 store <4 x i32> , <4 x i32>* %idx1, align 16
549780 ret void
550781
551 ; CHECK-LABEL: merge_vec_stores_of_constants
552 ; CHECK: vxorps
553 ; CHECK-NEXT: vmovaps
554 ; CHECK-NEXT: vmovaps
555 ; CHECK-NEXT: retq
556782 }
557783
558784 ; This is a minimized test based on real code that was failing.
559785 ; This should now be merged.
560786 define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
787 ; CHECK-LABEL: merge_vec_element_and_scalar_load:
788 ; CHECK: # BB#0:
789 ; CHECK-NEXT: vmovups (%rdi), %xmm0
790 ; CHECK-NEXT: vmovups %xmm0, 32(%rdi)
791 ; CHECK-NEXT: retq
561792 %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
562793 %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
563794 %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
572803 store i64 %a1, i64* %idx5, align 8
573804 ret void
574805
575 ; CHECK-LABEL: merge_vec_element_and_scalar_load
576 ; CHECK: vmovups (%rdi), %xmm0
577 ; CHECK-NEXT: vmovups %xmm0, 32(%rdi)
578 ; CHECK-NEXT: retq
579806 }
580807
581808 ; Don't let a non-consecutive store thwart merging of the last two.
582809 define void @almost_consecutive_stores(i8* %p) {
810 ; CHECK-LABEL: almost_consecutive_stores:
811 ; CHECK: # BB#0:
812 ; CHECK-NEXT: movb $0, (%rdi)
813 ; CHECK-NEXT: movb $1, 42(%rdi)
814 ; CHECK-NEXT: movw $770, 2(%rdi) # imm = 0x302
815 ; CHECK-NEXT: retq
583816 store i8 0, i8* %p
584817 %p1 = getelementptr i8, i8* %p, i64 42
585818 store i8 1, i8* %p1
588821 %p3 = getelementptr i8, i8* %p, i64 3
589822 store i8 3, i8* %p3
590823 ret void
591 ; CHECK-LABEL: almost_consecutive_stores
592 ; CHECK-DAG: movb $0, (%rdi)
593 ; CHECK-DAG: movb $1, 42(%rdi)
594 ; CHECK-DAG: movw $770, 2(%rdi)
595 ; CHECK: retq
596824 }
597825
598826 ; We should be able to merge these.
599827 define void @merge_bitcast(<4 x i32> %v, float* %ptr) {
828 ; CHECK-LABEL: merge_bitcast:
829 ; CHECK: # BB#0:
830 ; CHECK-NEXT: vmovups %xmm0, (%rdi)
831 ; CHECK-NEXT: retq
600832 %fv = bitcast <4 x i32> %v to <4 x float>
601
602833 %vecext1 = extractelement <4 x i32> %v, i32 1
603834 %vecext2 = extractelement <4 x i32> %v, i32 2
604835 %vecext3 = extractelement <4 x i32> %v, i32 3
615846 store float %f2, float* %idx2, align 4
616847 store float %f3, float* %idx3, align 4
617848 ret void
618
619 ; CHECK-LABEL: merge_bitcast
620 ; CHECK: vmovups %xmm0, (%rdi)
621 ; CHECK-NEXT: retq
622 }
849 }