llvm.org GIT mirror llvm / 644a7d6
[X86][AVX512] Fixed 512-bit vector nontemporal load alignment git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271673 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
1 changed file(s) with 20 addition(s) and 20 deletion(s). Raw diff Collapse all Expand all
363363 ;
364364 ; AVX512-LABEL: test_v16f32:
365365 ; AVX512: # BB#0:
366 ; AVX512-NEXT: vmovups (%rdi), %zmm0
367 ; AVX512-NEXT: retq
368 %1 = load <16 x float>, <16 x float>* %src, align 32, !nontemporal !1
366 ; AVX512-NEXT: vmovaps (%rdi), %zmm0
367 ; AVX512-NEXT: retq
368 %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
369369 ret <16 x float> %1
370370 }
371371
386386 ;
387387 ; AVX512-LABEL: test_v16i32:
388388 ; AVX512: # BB#0:
389 ; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0
390 ; AVX512-NEXT: retq
391 %1 = load <16 x i32>, <16 x i32>* %src, align 32, !nontemporal !1
389 ; AVX512-NEXT: vmovdqa32 (%rdi), %zmm0
390 ; AVX512-NEXT: retq
391 %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
392392 ret <16 x i32> %1
393393 }
394394
409409 ;
410410 ; AVX512-LABEL: test_v8f64:
411411 ; AVX512: # BB#0:
412 ; AVX512-NEXT: vmovupd (%rdi), %zmm0
413 ; AVX512-NEXT: retq
414 %1 = load <8 x double>, <8 x double>* %src, align 32, !nontemporal !1
412 ; AVX512-NEXT: vmovapd (%rdi), %zmm0
413 ; AVX512-NEXT: retq
414 %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
415415 ret <8 x double> %1
416416 }
417417
432432 ;
433433 ; AVX512-LABEL: test_v8i64:
434434 ; AVX512: # BB#0:
435 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
436 ; AVX512-NEXT: retq
437 %1 = load <8 x i64>, <8 x i64>* %src, align 32, !nontemporal !1
435 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
436 ; AVX512-NEXT: retq
437 %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
438438 ret <8 x i64> %1
439439 }
440440
469469 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
470470 ; AVX512VL-NEXT: vmovdqa64 32(%rdi), %ymm1
471471 ; AVX512VL-NEXT: retq
472 %1 = load <32 x i16>, <32 x i16>* %src, align 32, !nontemporal !1
472 %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
473473 ret <32 x i16> %1
474474 }
475475
504504 ; AVX512VL-NEXT: vmovdqa64 (%rdi), %ymm0
505505 ; AVX512VL-NEXT: vmovdqa64 32(%rdi), %ymm1
506506 ; AVX512VL-NEXT: retq
507 %1 = load <64 x i8>, <64 x i8>* %src, align 32, !nontemporal !1
507 %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
508508 ret <64 x i8> %1
509509 }
510510
820820 ; AVX512: # BB#0:
821821 ; AVX512-NEXT: vaddps (%rdi), %zmm0, %zmm0
822822 ; AVX512-NEXT: retq
823 %1 = load <16 x float>, <16 x float>* %src, align 32, !nontemporal !1
823 %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
824824 %2 = fadd <16 x float> %arg, %1
825825 ret <16 x float> %2
826826 }
860860 ; AVX512: # BB#0:
861861 ; AVX512-NEXT: vpaddd (%rdi), %zmm0, %zmm0
862862 ; AVX512-NEXT: retq
863 %1 = load <16 x i32>, <16 x i32>* %src, align 32, !nontemporal !1
863 %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
864864 %2 = add <16 x i32> %arg, %1
865865 ret <16 x i32> %2
866866 }
884884 ; AVX512: # BB#0:
885885 ; AVX512-NEXT: vaddpd (%rdi), %zmm0, %zmm0
886886 ; AVX512-NEXT: retq
887 %1 = load <8 x double>, <8 x double>* %src, align 32, !nontemporal !1
887 %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
888888 %2 = fadd <8 x double> %arg, %1
889889 ret <8 x double> %2
890890 }
924924 ; AVX512: # BB#0:
925925 ; AVX512-NEXT: vpaddq (%rdi), %zmm0, %zmm0
926926 ; AVX512-NEXT: retq
927 %1 = load <8 x i64>, <8 x i64>* %src, align 32, !nontemporal !1
927 %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
928928 %2 = add <8 x i64> %arg, %1
929929 ret <8 x i64> %2
930930 }
976976 ; AVX512VL-NEXT: vpaddw (%rdi), %ymm0, %ymm0
977977 ; AVX512VL-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
978978 ; AVX512VL-NEXT: retq
979 %1 = load <32 x i16>, <32 x i16>* %src, align 32, !nontemporal !1
979 %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
980980 %2 = add <32 x i16> %arg, %1
981981 ret <32 x i16> %2
982982 }
10281028 ; AVX512VL-NEXT: vpaddb (%rdi), %ymm0, %ymm0
10291029 ; AVX512VL-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
10301030 ; AVX512VL-NEXT: retq
1031 %1 = load <64 x i8>, <64 x i8>* %src, align 32, !nontemporal !1
1031 %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
10321032 %2 = add <64 x i8> %arg, %1
10331033 ret <64 x i8> %2
10341034 }