llvm.org GIT mirror llvm / f371ead
[X86][AVX512] Autoupgrade the VPERMPD/VPERMQ intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274506 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
5 changed file(s) with 104 addition(s) and 85 deletion(s). Raw diff Collapse all Expand all
225225 Name.startswith("x86.avx512.mask.pshufl.w.") ||
226226 Name.startswith("x86.avx512.mask.pshufh.w.") ||
227227 Name.startswith("x86.avx512.mask.vpermil.p") ||
228 Name.startswith("x86.avx512.mask.perm.df.") ||
229 Name.startswith("x86.avx512.mask.perm.di.") ||
228230 Name.startswith("x86.avx512.mask.punpckl") ||
229231 Name.startswith("x86.avx512.mask.punpckh") ||
230232 Name.startswith("x86.avx512.mask.unpckl.") ||
10051007 Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
10061008 } else if (Name == "llvm.stackprotectorcheck") {
10071009 Rep = nullptr;
1010 } else if (Name.startswith("llvm.x86.avx512.mask.perm.df.") ||
1011 Name.startswith("llvm.x86.avx512.mask.perm.di.")) {
1012 Value *Op0 = CI->getArgOperand(0);
1013 unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue();
1014 VectorType *VecTy = cast(CI->getType());
1015 unsigned NumElts = VecTy->getNumElements();
1016
1017 SmallVector Idxs(NumElts);
1018 for (unsigned i = 0; i != NumElts; ++i)
1019 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
1020
1021 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
1022
1023 if (CI->getNumArgOperands() == 4)
1024 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1025 CI->getArgOperand(2));
10081026 } else if (Name.startswith("llvm.x86.avx.vpermil.") ||
10091027 Name == "llvm.x86.sse2.pshuf.d" ||
10101028 Name.startswith("llvm.x86.avx512.mask.vpermil.p") ||
6060 ret <8 x double> %res4
6161 }
6262
63 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
64
65 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) {
66 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_512:
67 ; CHECK: ## BB#0:
68 ; CHECK-NEXT: vpermpd {{.*#+}} zmm2 = zmm0[3,0,0,0,7,4,4,4]
69 ; CHECK-NEXT: kmovw %esi, %k1
70 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
71 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
72 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
73 ; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0
74 ; CHECK-NEXT: retq
75 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
76 %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
77 %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
78 %res3 = fadd <8 x double> %res, %res1
79 %res4 = fadd <8 x double> %res3, %res2
80 ret <8 x double> %res4
81 }
82
83 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
84
85 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
86 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_512:
87 ; CHECK: ## BB#0:
88 ; CHECK-NEXT: vpermq {{.*#+}} zmm2 = zmm0[3,0,0,0,7,4,4,4]
89 ; CHECK-NEXT: kmovw %esi, %k1
90 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
91 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
92 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
93 ; CHECK-NEXT: vpaddq %zmm2, %zmm0, %zmm0
94 ; CHECK-NEXT: retq
95 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
96 %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
97 %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
98 %res3 = add <8 x i64> %res, %res1
99 %res4 = add <8 x i64> %res3, %res2
100 ret <8 x i64> %res4
101 }
102
63103 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
64104 ; CHECK-LABEL: test_store1:
65105 ; CHECK: ## BB#0:
63056305 ret <8 x i64> %res4
63066306 }
63076307
6308 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
6309
6310 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) {
6311 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_512:
6312 ; CHECK: ## BB#0:
6313 ; CHECK-NEXT: kmovw %esi, %k1
6314 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
6315 ; CHECK-NEXT: vpermpd {{.*#+}} zmm2 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
6316 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,0,0,0,7,4,4,4]
6317 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
6318 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
6319 ; CHECK-NEXT: retq
6320 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
6321 %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
6322 %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
6323 %res3 = fadd <8 x double> %res, %res1
6324 %res4 = fadd <8 x double> %res3, %res2
6325 ret <8 x double> %res4
6326 }
6327
6328 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
6329
6330 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
6331 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_512:
6332 ; CHECK: ## BB#0:
6333 ; CHECK-NEXT: kmovw %esi, %k1
6334 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
6335 ; CHECK-NEXT: vpermq {{.*#+}} zmm2 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
6336 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,0,0,0,7,4,4,4]
6337 ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
6338 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
6339 ; CHECK-NEXT: retq
6340 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
6341 %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
6342 %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
6343 %res3 = add <8 x i64> %res, %res1
6344 %res4 = add <8 x i64> %res3, %res2
6345 ret <8 x i64> %res4
6346 }
6347
63486308 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
63496309
63506310 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
229229 ret <4 x float> %res4
230230 }
231231
232 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
233
234 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
235 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256:
236 ; CHECK: ## BB#0:
237 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03]
238 ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0]
239 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
240 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
241 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0]
242 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
243 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
244 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
245 ; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2]
246 ; CHECK-NEXT: retq ## encoding: [0xc3]
247 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
248 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
249 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
250 %res3 = fadd <4 x double> %res, %res1
251 %res4 = fadd <4 x double> %res3, %res2
252 ret <4 x double> %res4
253 }
254
255 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
256
257 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
258 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256:
259 ; CHECK: ## BB#0:
260 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03]
261 ; CHECK-NEXT: ## ymm2 = ymm0[3,0,0,0]
262 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
263 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
264 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0]
265 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
266 ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
267 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
268 ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2]
269 ; CHECK-NEXT: retq ## encoding: [0xc3]
270 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
271 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
272 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
273 %res3 = add <4 x i64> %res, %res1
274 %res4 = add <4 x i64> %res3, %res2
275 ret <4 x i64> %res4
276 }
277
232278 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
233279
234280 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
79767976 ret <4 x i64> %res4
79777977 }
79787978
7979 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
7980
7981 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
7982 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256:
7983 ; CHECK: ## BB#0:
7984 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7985 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
7986 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0]
7987 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xd0,0x03]
7988 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[3,0,0,0]
7989 ; CHECK-NEXT: vpermpd $3, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xc0,0x03]
7990 ; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0]
7991 ; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
7992 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
7993 ; CHECK-NEXT: retq ## encoding: [0xc3]
7994 %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
7995 %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
7996 %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
7997 %res3 = fadd <4 x double> %res, %res1
7998 %res4 = fadd <4 x double> %res3, %res2
7999 ret <4 x double> %res4
8000 }
8001
8002 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
8003
8004 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
8005 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256:
8006 ; CHECK: ## BB#0:
8007 ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8008 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
8009 ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0]
8010 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xd0,0x03]
8011 ; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[3,0,0,0]
8012 ; CHECK-NEXT: vpermq $3, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xc0,0x03]
8013 ; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0]
8014 ; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
8015 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
8016 ; CHECK-NEXT: retq ## encoding: [0xc3]
8017 %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
8018 %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
8019 %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
8020 %res3 = add <4 x i64> %res, %res1
8021 %res4 = add <4 x i64> %res3, %res2
8022 ret <4 x i64> %res4
8023 }
80247979 declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
80257980
80267981 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {