llvm.org GIT mirror llvm / 4a4c1fc
[AVX-512] Improve lowering of zero_extend of v4i1 to v4i32 and v2i1 to v2i64 with VLX, but no DQ or BW support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291747 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 3 years ago
2 changed file(s) with 49 addition(s) and 160 deletion(s). Raw diff Collapse all Expand all
1529415294 MVT InVT = In.getSimpleValueType();
1529515295 SDLoc DL(Op);
1529615296 unsigned NumElts = VT.getVectorNumElements();
15297 if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
15297
15298 if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1 &&
15299 (NumElts == 8 || NumElts == 16 || Subtarget.hasBWI()))
15300 return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
15301
15302 if (InVT.getVectorElementType() != MVT::i1)
1529815303 return SDValue();
15299
15300 if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
15301 return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
15302
15303 assert(InVT.getVectorElementType() == MVT::i1);
1530415304
1530515305 // Extend VT if the target is 256 or 128bit vector and VLX is not supported.
1530615306 MVT ExtVT = VT;
12531253 ; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
12541254 ; NOVL-NEXT: retq
12551255 ;
1256 ; VLBW-LABEL: uitofp_4i1_float:
1257 ; VLBW: ## BB#0:
1258 ; VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1259 ; VLBW-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1260 ; VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1261 ; VLBW-NEXT: vcvtudq2ps %xmm0, %xmm0
1262 ; VLBW-NEXT: retq
1263 ;
1264 ; VLNOBW-LABEL: uitofp_4i1_float:
1265 ; VLNOBW: ## BB#0:
1266 ; VLNOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1267 ; VLNOBW-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1268 ; VLNOBW-NEXT: kshiftlw $12, %k1, %k0
1269 ; VLNOBW-NEXT: kshiftrw $15, %k0, %k0
1270 ; VLNOBW-NEXT: kshiftlw $13, %k1, %k2
1271 ; VLNOBW-NEXT: kshiftrw $15, %k2, %k2
1272 ; VLNOBW-NEXT: kshiftlw $15, %k1, %k3
1273 ; VLNOBW-NEXT: kshiftrw $15, %k3, %k3
1274 ; VLNOBW-NEXT: kshiftlw $14, %k1, %k1
1275 ; VLNOBW-NEXT: kshiftrw $15, %k1, %k1
1276 ; VLNOBW-NEXT: kmovw %k1, %eax
1277 ; VLNOBW-NEXT: andl $1, %eax
1278 ; VLNOBW-NEXT: kmovw %k3, %ecx
1279 ; VLNOBW-NEXT: andl $1, %ecx
1280 ; VLNOBW-NEXT: vmovd %ecx, %xmm0
1281 ; VLNOBW-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1282 ; VLNOBW-NEXT: kmovw %k2, %eax
1283 ; VLNOBW-NEXT: andl $1, %eax
1284 ; VLNOBW-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
1285 ; VLNOBW-NEXT: kmovw %k0, %eax
1286 ; VLNOBW-NEXT: andl $1, %eax
1287 ; VLNOBW-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1288 ; VLNOBW-NEXT: vcvtdq2ps %xmm0, %xmm0
1289 ; VLNOBW-NEXT: retq
1256 ; VL-LABEL: uitofp_4i1_float:
1257 ; VL: ## BB#0:
1258 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1259 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1260 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1261 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
1262 ; VL-NEXT: retq
12901263 %mask = icmp slt <4 x i32> %a, zeroinitializer
12911264 %1 = uitofp <4 x i1> %mask to <4 x float>
12921265 ret <4 x float> %1
13011274 ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
13021275 ; NOVL-NEXT: retq
13031276 ;
1304 ; VLBW-LABEL: uitofp_4i1_double:
1305 ; VLBW: ## BB#0:
1306 ; VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1307 ; VLBW-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1308 ; VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1309 ; VLBW-NEXT: vcvtudq2pd %xmm0, %ymm0
1310 ; VLBW-NEXT: retq
1311 ;
1312 ; VLNOBW-LABEL: uitofp_4i1_double:
1313 ; VLNOBW: ## BB#0:
1314 ; VLNOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1315 ; VLNOBW-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1316 ; VLNOBW-NEXT: kshiftlw $12, %k1, %k0
1317 ; VLNOBW-NEXT: kshiftrw $15, %k0, %k0
1318 ; VLNOBW-NEXT: kshiftlw $13, %k1, %k2
1319 ; VLNOBW-NEXT: kshiftrw $15, %k2, %k2
1320 ; VLNOBW-NEXT: kshiftlw $15, %k1, %k3
1321 ; VLNOBW-NEXT: kshiftrw $15, %k3, %k3
1322 ; VLNOBW-NEXT: kshiftlw $14, %k1, %k1
1323 ; VLNOBW-NEXT: kshiftrw $15, %k1, %k1
1324 ; VLNOBW-NEXT: kmovw %k1, %eax
1325 ; VLNOBW-NEXT: andl $1, %eax
1326 ; VLNOBW-NEXT: kmovw %k3, %ecx
1327 ; VLNOBW-NEXT: andl $1, %ecx
1328 ; VLNOBW-NEXT: vmovd %ecx, %xmm0
1329 ; VLNOBW-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
1330 ; VLNOBW-NEXT: kmovw %k2, %eax
1331 ; VLNOBW-NEXT: andl $1, %eax
1332 ; VLNOBW-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
1333 ; VLNOBW-NEXT: kmovw %k0, %eax
1334 ; VLNOBW-NEXT: andl $1, %eax
1335 ; VLNOBW-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
1336 ; VLNOBW-NEXT: vcvtdq2pd %xmm0, %ymm0
1337 ; VLNOBW-NEXT: retq
1277 ; VL-LABEL: uitofp_4i1_double:
1278 ; VL: ## BB#0:
1279 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1280 ; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
1281 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1282 ; VL-NEXT: vcvtudq2pd %xmm0, %ymm0
1283 ; VL-NEXT: retq
13381284 %mask = icmp slt <4 x i32> %a, zeroinitializer
13391285 %1 = uitofp <4 x i1> %mask to <4 x double>
13401286 ret <4 x double> %1
13571303 ; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
13581304 ; NOVL-NEXT: retq
13591305 ;
1360 ; VLBW-LABEL: uitofp_2i1_float:
1361 ; VLBW: ## BB#0:
1362 ; VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1363 ; VLBW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1364 ; VLBW-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1365 ; VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1366 ; VLBW-NEXT: vcvtudq2ps %xmm0, %xmm0
1367 ; VLBW-NEXT: retq
1368 ;
1369 ; VLNOBW-LABEL: uitofp_2i1_float:
1370 ; VLNOBW: ## BB#0:
1371 ; VLNOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1372 ; VLNOBW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1373 ; VLNOBW-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
1374 ; VLNOBW-NEXT: kshiftlw $15, %k0, %k1
1375 ; VLNOBW-NEXT: kshiftrw $15, %k1, %k1
1376 ; VLNOBW-NEXT: kshiftlw $14, %k0, %k0
1377 ; VLNOBW-NEXT: kshiftrw $15, %k0, %k0
1378 ; VLNOBW-NEXT: kmovw %k0, %eax
1379 ; VLNOBW-NEXT: andl $1, %eax
1380 ; VLNOBW-NEXT: vmovd %eax, %xmm0
1381 ; VLNOBW-NEXT: kmovw %k1, %eax
1382 ; VLNOBW-NEXT: andl $1, %eax
1383 ; VLNOBW-NEXT: vmovd %eax, %xmm1
1384 ; VLNOBW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1385 ; VLNOBW-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1386 ; VLNOBW-NEXT: vcvtdq2ps %xmm0, %xmm0
1387 ; VLNOBW-NEXT: retq
1306 ; VL-LABEL: uitofp_2i1_float:
1307 ; VL: ## BB#0:
1308 ; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1309 ; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1310 ; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1311 ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
1312 ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
1313 ; VL-NEXT: retq
13881314 %mask = icmp ult <2 x i32> %a, zeroinitializer
13891315 %1 = uitofp <2 x i1> %mask to <2 x float>
13901316 ret <2 x float> %1
14011327 ; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
14021328 ; NOVL-NEXT: retq
14031329 ;
1404 ; SKX-LABEL: uitofp_2i1_double:
1405 ; SKX: ## BB#0:
1406 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
1407 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1408 ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1409 ; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1410 ; SKX-NEXT: vcvtuqq2pd %xmm0, %xmm0
1411 ; SKX-NEXT: retq
1412 ;
1413 ; AVX512VL-LABEL: uitofp_2i1_double:
1414 ; AVX512VL: ## BB#0:
1415 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1416 ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1417 ; AVX512VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
1418 ; AVX512VL-NEXT: kshiftlw $15, %k0, %k1
1419 ; AVX512VL-NEXT: kshiftrw $15, %k1, %k1
1420 ; AVX512VL-NEXT: kshiftlw $14, %k0, %k0
1421 ; AVX512VL-NEXT: kshiftrw $15, %k0, %k0
1422 ; AVX512VL-NEXT: kmovw %k0, %eax
1423 ; AVX512VL-NEXT: andl $1, %eax
1424 ; AVX512VL-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0
1425 ; AVX512VL-NEXT: kmovw %k1, %eax
1426 ; AVX512VL-NEXT: andl $1, %eax
1427 ; AVX512VL-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1
1428 ; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1429 ; AVX512VL-NEXT: retq
1430 ;
1431 ; AVX512VLDQ-LABEL: uitofp_2i1_double:
1432 ; AVX512VLDQ: ## BB#0:
1433 ; AVX512VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1434 ; AVX512VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1435 ; AVX512VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
1436 ; AVX512VLDQ-NEXT: kshiftlw $15, %k0, %k1
1437 ; AVX512VLDQ-NEXT: kshiftrw $15, %k1, %k1
1438 ; AVX512VLDQ-NEXT: kshiftlw $14, %k0, %k0
1439 ; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
1440 ; AVX512VLDQ-NEXT: kmovw %k0, %eax
1441 ; AVX512VLDQ-NEXT: andq $1, %rax
1442 ; AVX512VLDQ-NEXT: vmovq %rax, %xmm0
1443 ; AVX512VLDQ-NEXT: kmovw %k1, %eax
1444 ; AVX512VLDQ-NEXT: andq $1, %rax
1445 ; AVX512VLDQ-NEXT: vmovq %rax, %xmm1
1446 ; AVX512VLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1447 ; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
1448 ; AVX512VLDQ-NEXT: retq
1449 ;
1450 ; AVX512VLBW-LABEL: uitofp_2i1_double:
1451 ; AVX512VLBW: ## BB#0:
1452 ; AVX512VLBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
1453 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1454 ; AVX512VLBW-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1455 ; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1456 ; AVX512VLBW-NEXT: vpextrq $1, %xmm0, %rax
1457 ; AVX512VLBW-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
1458 ; AVX512VLBW-NEXT: vmovq %xmm0, %rax
1459 ; AVX512VLBW-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
1460 ; AVX512VLBW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1461 ; AVX512VLBW-NEXT: retq
1330 ; VLDQ-LABEL: uitofp_2i1_double:
1331 ; VLDQ: ## BB#0:
1332 ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1333 ; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1334 ; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1335 ; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1336 ; VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0
1337 ; VLDQ-NEXT: retq
1338 ;
1339 ; VLNODQ-LABEL: uitofp_2i1_double:
1340 ; VLNODQ: ## BB#0:
1341 ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
1342 ; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
1343 ; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
1344 ; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
1345 ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
1346 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1
1347 ; VLNODQ-NEXT: vmovq %xmm0, %rax
1348 ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
1349 ; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1350 ; VLNODQ-NEXT: retq
14621351 %mask = icmp ult <2 x i32> %a, zeroinitializer
14631352 %1 = uitofp <2 x i1> %mask to <2 x double>
14641353 ret <2 x double> %1