llvm.org GIT mirror llvm / c964a30
[X86][AVX512] Autoupgrade the MOVDDUP/MOVSLDUP/MOVSHDUP intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274439 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
5 changed file(s) with 227 addition(s) and 204 deletion(s). Raw diff Collapse all Expand all
218218 Name.startswith("x86.avx2.pbroadcast") ||
219219 Name.startswith("x86.avx.vpermil.") ||
220220 Name.startswith("x86.sse2.pshuf") ||
221 Name.startswith("x86.avx512.mask.movddup") ||
222 Name.startswith("x86.avx512.mask.movshdup") ||
223 Name.startswith("x86.avx512.mask.movsldup") ||
221224 Name.startswith("x86.avx512.mask.pshuf.d.") ||
222225 Name.startswith("x86.avx512.mask.pshufl.w.") ||
223226 Name.startswith("x86.avx512.mask.pshufh.w.") ||
10621065 if (CI->getNumArgOperands() == 4)
10631066 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
10641067 CI->getArgOperand(2));
1068 } else if (Name.startswith("llvm.x86.avx512.mask.movddup") ||
1069 Name.startswith("llvm.x86.avx512.mask.movshdup") ||
1070 Name.startswith("llvm.x86.avx512.mask.movsldup")) {
1071 Value *Op0 = CI->getArgOperand(0);
1072 unsigned NumElts = CI->getType()->getVectorNumElements();
1073 unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
1074
1075 unsigned Offset = 0;
1076 if (Name.startswith("llvm.x86.avx512.mask.movshdup."))
1077 Offset = 1;
1078
1079 SmallVector Idxs(NumElts);
1080 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
1081 for (unsigned i = 0; i != NumLaneElts; i += 2) {
1082 Idxs[i + l + 0] = i + l + Offset;
1083 Idxs[i + l + 1] = i + l + Offset;
1084 }
1085
1086 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1087
1088 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1089 CI->getArgOperand(1));
10651090 } else if (Name.startswith("llvm.x86.avx512.mask.punpckl") ||
10661091 Name.startswith("llvm.x86.avx512.mask.unpckl.")) {
10671092 Value *Op0 = CI->getArgOperand(0);
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
4
5 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
6 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
7 ; CHECK: ## BB#0:
8 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
9 ; CHECK-NEXT: kmovw %edi, %k1
10 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
11 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
12 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
13 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
14 ; CHECK-NEXT: retq
15 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
16 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
17 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
18 %res3 = fadd <16 x float> %res, %res1
19 %res4 = fadd <16 x float> %res2, %res3
20 ret <16 x float> %res4
21 }
22
23 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
24
25 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
26 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
27 ; CHECK: ## BB#0:
28 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
29 ; CHECK-NEXT: kmovw %edi, %k1
30 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
31 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
32 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
33 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
34 ; CHECK-NEXT: retq
35 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
36 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
37 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
38 %res3 = fadd <16 x float> %res, %res1
39 %res4 = fadd <16 x float> %res2, %res3
40 ret <16 x float> %res4
41 }
42
43 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
44
45 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
46 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
47 ; CHECK: ## BB#0:
48 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
49 ; CHECK-NEXT: kmovw %edi, %k1
50 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
51 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
52 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
53 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
54 ; CHECK-NEXT: retq
55 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
56 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
57 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
58 %res3 = fadd <8 x double> %res, %res1
59 %res4 = fadd <8 x double> %res2, %res3
60 ret <8 x double> %res4
61 }
262
363 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
464 ; CHECK-LABEL: test_store1:
None ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
21 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
32
57005699 ret <8 x i64> %res2
57015700 }
57025701
5703 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
5704
5705 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
5706 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
5707 ; CHECK: ## BB#0:
5708 ; CHECK-NEXT: kmovw %edi, %k1
5709 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
5710 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
5711 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
5712 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5713 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5714 ; CHECK-NEXT: retq
5715 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
5716 %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
5717 %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
5718 %res3 = fadd <16 x float> %res, %res1
5719 %res4 = fadd <16 x float> %res2, %res3
5720 ret <16 x float> %res4
5721 }
5722
5723 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
5724
5725 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
5726 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
5727 ; CHECK: ## BB#0:
5728 ; CHECK-NEXT: kmovw %edi, %k1
5729 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
5730 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
5731 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
5732 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
5733 ; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
5734 ; CHECK-NEXT: retq
5735 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
5736 %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
5737 %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
5738 %res3 = fadd <16 x float> %res, %res1
5739 %res4 = fadd <16 x float> %res2, %res3
5740 ret <16 x float> %res4
5741 }
5742
5743 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
5744
5745 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
5746 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
5747 ; CHECK: ## BB#0:
5748 ; CHECK-NEXT: kmovw %edi, %k1
5749 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
5750 ; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
5751 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
5752 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
5753 ; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
5754 ; CHECK-NEXT: retq
5755 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
5756 %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
5757 %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
5758 %res3 = fadd <8 x double> %res, %res1
5759 %res4 = fadd <8 x double> %res2, %res3
5760 ret <8 x double> %res4
5761 }
5762
57635702 define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
57645703 ; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
57655704 ; CHECK: ## BB#0:
None ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
11 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
2
3 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
4
5 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
7 ; CHECK: ## BB#0:
8 ; CHECK-NEXT: vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0]
9 ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
10 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
11 ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
12 ; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
13 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
14 ; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
15 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
16 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
17 ; CHECK-NEXT: retq ## encoding: [0xc3]
18 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
19 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
20 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
21 %res3 = fadd <4 x float> %res, %res1
22 %res4 = fadd <4 x float> %res2, %res3
23 ret <4 x float> %res4
24 }
25
26 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
27
28 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
29 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
30 ; CHECK: ## BB#0:
31 ; CHECK-NEXT: vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0]
32 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
33 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
34 ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
35 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
36 ; CHECK-NEXT: vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
37 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
38 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
39 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
40 ; CHECK-NEXT: retq ## encoding: [0xc3]
41 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
42 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
43 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
44 %res3 = fadd <8 x float> %res, %res1
45 %res4 = fadd <8 x float> %res2, %res3
46 ret <8 x float> %res4
47 }
48
49 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
50
51 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
52 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
53 ; CHECK: ## BB#0:
54 ; CHECK-NEXT: vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0]
55 ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
56 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
57 ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
58 ; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
59 ; CHECK-NEXT: vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
60 ; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
61 ; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
62 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
63 ; CHECK-NEXT: retq ## encoding: [0xc3]
64 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
65 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
66 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
67 %res3 = fadd <4 x float> %res, %res1
68 %res4 = fadd <4 x float> %res2, %res3
69 ret <4 x float> %res4
70 }
71
72 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
73
74 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
75 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
76 ; CHECK: ## BB#0:
77 ; CHECK-NEXT: vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0]
78 ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
79 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
80 ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
81 ; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
82 ; CHECK-NEXT: vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
83 ; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
84 ; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
85 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
86 ; CHECK-NEXT: retq ## encoding: [0xc3]
87 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
88 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
89 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
90 %res3 = fadd <8 x float> %res, %res1
91 %res4 = fadd <8 x float> %res2, %res3
92 ret <8 x float> %res4
93 }
94 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
95
96 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
97 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
98 ; CHECK: ## BB#0:
99 ; CHECK-NEXT: vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0]
100 ; CHECK-NEXT: ## xmm2 = xmm0[0,0]
101 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
102 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
103 ; CHECK-NEXT: ## xmm1 = xmm0[0,0]
104 ; CHECK-NEXT: vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
105 ; CHECK-NEXT: ## xmm0 = xmm0[0,0]
106 ; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
107 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1]
108 ; CHECK-NEXT: retq ## encoding: [0xc3]
109 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
110 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
111 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
112 %res3 = fadd <2 x double> %res, %res1
113 %res4 = fadd <2 x double> %res2, %res3
114 ret <2 x double> %res4
115 }
116
117 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
118
119 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
120 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
121 ; CHECK: ## BB#0:
122 ; CHECK-NEXT: vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0]
123 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
124 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
125 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
126 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
127 ; CHECK-NEXT: vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
128 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
129 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
130 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
131 ; CHECK-NEXT: retq ## encoding: [0xc3]
132 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
133 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
134 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
135 %res3 = fadd <4 x double> %res, %res1
136 %res4 = fadd <4 x double> %res2, %res3
137 ret <4 x double> %res4
138 }
2139
3140 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
4141
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
21 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
32
43 ; 256-bit
62586257
62596258 declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
62606259
6261 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
6262
6263 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6264 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
6265 ; CHECK: ## BB#0:
6266 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6267 ; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
6268 ; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
6269 ; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xd0]
6270 ; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
6271 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xc0]
6272 ; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
6273 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
6274 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
6275 ; CHECK-NEXT: retq ## encoding: [0xc3]
6276 %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
6277 %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
6278 %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
6279 %res3 = fadd <4 x float> %res, %res1
6280 %res4 = fadd <4 x float> %res2, %res3
6281 ret <4 x float> %res4
6282 }
6283
6284 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
6285
6286 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
6287 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
6288 ; CHECK: ## BB#0:
6289 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6290 ; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
6291 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
6292 ; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xd0]
6293 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
6294 ; CHECK-NEXT: vmovsldup %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xc0]
6295 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
6296 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
6297 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
6298 ; CHECK-NEXT: retq ## encoding: [0xc3]
6299 %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
6300 %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
6301 %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
6302 %res3 = fadd <8 x float> %res, %res1
6303 %res4 = fadd <8 x float> %res2, %res3
6304 ret <8 x float> %res4
6305 }
6306
6307 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
6308
6309 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
6310 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
6311 ; CHECK: ## BB#0:
6312 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6313 ; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
6314 ; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
6315 ; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xd0]
6316 ; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
6317 ; CHECK-NEXT: vmovshdup %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xc0]
6318 ; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
6319 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
6320 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
6321 ; CHECK-NEXT: retq ## encoding: [0xc3]
6322 %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
6323 %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
6324 %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
6325 %res3 = fadd <4 x float> %res, %res1
6326 %res4 = fadd <4 x float> %res2, %res3
6327 ret <4 x float> %res4
6328 }
6329
6330 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
6331
6332 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
6333 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
6334 ; CHECK: ## BB#0:
6335 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6336 ; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
6337 ; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
6338 ; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xd0]
6339 ; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
6340 ; CHECK-NEXT: vmovshdup %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xc0]
6341 ; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
6342 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
6343 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc0]
6344 ; CHECK-NEXT: retq ## encoding: [0xc3]
6345 %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
6346 %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
6347 %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
6348 %res3 = fadd <8 x float> %res, %res1
6349 %res4 = fadd <8 x float> %res2, %res3
6350 ret <8 x float> %res4
6351 }
6352 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
6353
6354 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
6355 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
6356 ; CHECK: ## BB#0:
6357 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6358 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
6359 ; CHECK-NEXT: ## xmm1 = xmm0[0,0]
6360 ; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xd0]
6361 ; CHECK-NEXT: ## xmm2 = xmm0[0,0]
6362 ; CHECK-NEXT: vmovddup %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xc0]
6363 ; CHECK-NEXT: ## xmm0 = xmm0[0,0]
6364 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
6365 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]
6366 ; CHECK-NEXT: retq ## encoding: [0xc3]
6367 %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
6368 %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
6369 %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
6370 %res3 = fadd <2 x double> %res, %res1
6371 %res4 = fadd <2 x double> %res2, %res3
6372 ret <2 x double> %res4
6373 }
6374
6375 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
6376
6377 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
6378 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
6379 ; CHECK: ## BB#0:
6380 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6381 ; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
6382 ; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
6383 ; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xd0]
6384 ; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
6385 ; CHECK-NEXT: vmovddup %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xc0]
6386 ; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
6387 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
6388 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
6389 ; CHECK-NEXT: retq ## encoding: [0xc3]
6390 %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
6391 %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
6392 %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
6393 %res3 = fadd <4 x double> %res, %res1
6394 %res4 = fadd <4 x double> %res2, %res3
6395 ret <4 x double> %res4
6396 }
6397
63986260 define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
63996261 ; CHECK-LABEL: test_rsqrt_ps_256_rr:
64006262 ; CHECK: ## BB#0:
73557217 ; CHECK: ## BB#0:
73567218 ; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
73577219 ; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
7358 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI478_0-4, kind: reloc_riprel_4byte
7220 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_0-4, kind: reloc_riprel_4byte
73597221 ; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
7360 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI478_1-4, kind: reloc_riprel_4byte
7222 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_1-4, kind: reloc_riprel_4byte
73617223 ; CHECK-NEXT: retq ## encoding: [0xc3]
73627224 %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> , <8 x i32> , <8 x i32> zeroinitializer, i8 -1)
73637225 ret <8 x i32> %res
73887250 ; CHECK: ## BB#0:
73897251 ; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
73907252 ; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
7391 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI480_0-4, kind: reloc_riprel_4byte
7253 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_0-4, kind: reloc_riprel_4byte
73927254 ; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
7393 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI480_1-4, kind: reloc_riprel_4byte
7255 ; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_1-4, kind: reloc_riprel_4byte
73947256 ; CHECK-NEXT: retq ## encoding: [0xc3]
73957257 %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> , <2 x i64> , <2 x i64> zeroinitializer, i8 -1)
73967258 ret <2 x i64> %res