llvm.org GIT mirror llvm / 6b1c5fc
Begin adding AVX2 instructions. No selection support yet other than intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143331 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 8 years ago
3 changed file(s) with 920 addition(s) and 146 deletion(s). Raw diff Collapse all Expand all
13601360 }
13611361
13621362 //===----------------------------------------------------------------------===//
1363 // AVX2
1364
1365 // Integer arithmetic ops.
1366 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1367 def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
1368 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1369 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1370 def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
1371 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1372 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1373 def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
1374 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1375 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1376 def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
1377 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1378 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1379 def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
1380 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1381 llvm_v32i8_ty], [IntrNoMem]>;
1382 def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
1383 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1384 llvm_v16i16_ty], [IntrNoMem]>;
1385 def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
1386 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1387 llvm_v32i8_ty], [IntrNoMem]>;
1388 def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
1389 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1390 llvm_v16i16_ty], [IntrNoMem]>;
1391 def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
1392 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1393 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1394 def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
1395 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1396 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1397 def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
1398 Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
1399 llvm_v8i32_ty], [IntrNoMem, Commutative]>;
1400 def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
1401 Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
1402 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1403 def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
1404 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1405 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1406 def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
1407 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1408 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1409 def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
1410 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1411 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1412 def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
1413 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1414 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1415 def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
1416 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1417 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1418 def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
1419 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1420 llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1421 def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
1422 Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
1423 llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1424 }
1425
1426 // Integer shift ops.
1427 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1428 def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
1429 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1430 llvm_v8i16_ty], [IntrNoMem]>;
1431 def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
1432 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1433 llvm_v4i32_ty], [IntrNoMem]>;
1434 def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
1435 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1436 llvm_v2i64_ty], [IntrNoMem]>;
1437 def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
1438 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1439 llvm_v8i16_ty], [IntrNoMem]>;
1440 def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
1441 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1442 llvm_v4i32_ty], [IntrNoMem]>;
1443 def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
1444 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1445 llvm_v2i64_ty], [IntrNoMem]>;
1446 def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
1447 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1448 llvm_v8i16_ty], [IntrNoMem]>;
1449 def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
1450 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1451 llvm_v4i32_ty], [IntrNoMem]>;
1452
1453 def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
1454 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1455 llvm_i32_ty], [IntrNoMem]>;
1456 def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
1457 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1458 llvm_i32_ty], [IntrNoMem]>;
1459 def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
1460 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1461 llvm_i32_ty], [IntrNoMem]>;
1462 def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
1463 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1464 llvm_i32_ty], [IntrNoMem]>;
1465 def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
1466 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1467 llvm_i32_ty], [IntrNoMem]>;
1468 def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
1469 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1470 llvm_i32_ty], [IntrNoMem]>;
1471 def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
1472 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1473 llvm_i32_ty], [IntrNoMem]>;
1474 def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
1475 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1476 llvm_i32_ty], [IntrNoMem]>;
1477
1478 def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">,
1479 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1480 llvm_i32_ty], [IntrNoMem]>;
1481 def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">,
1482 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1483 llvm_i32_ty], [IntrNoMem]>;
1484 def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">,
1485 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1486 llvm_i32_ty], [IntrNoMem]>;
1487 def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
1488 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1489 llvm_i32_ty], [IntrNoMem]>;
1490 }
1491
1492 // Integer comparison ops
1493 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1494 def int_x86_avx2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb256">,
1495 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty],
1496 [IntrNoMem, Commutative]>;
1497 def int_x86_avx2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw256">,
1498 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
1499 [IntrNoMem, Commutative]>;
1500 def int_x86_avx2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd256">,
1501 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
1502 [IntrNoMem, Commutative]>;
1503 def int_x86_avx2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb256">,
1504 Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1505 llvm_v32i8_ty], [IntrNoMem]>;
1506 def int_x86_avx2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw256">,
1507 Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1508 llvm_v16i16_ty], [IntrNoMem]>;
1509 def int_x86_avx2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd256">,
1510 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
1511 llvm_v8i32_ty], [IntrNoMem]>;
1512 }
1513
1514 // Pack ops.
1515 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1516 def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
1517 Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
1518 llvm_v16i16_ty], [IntrNoMem]>;
1519 def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
1520 Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
1521 llvm_v8i32_ty], [IntrNoMem]>;
1522 def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
1523 Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
1524 llvm_v16i16_ty], [IntrNoMem]>;
1525 }
1526
1527 //===----------------------------------------------------------------------===//
13631528 // MMX
13641529
13651530 // Empty MMX state op.
33423342 let ExeDomain = SSEPackedInt in { // SSE integer instructions
33433343
33443344 multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId,
3345 bit IsCommutable = 0, bit Is2Addr = 1> {
3345 RegisterClass RC, PatFrag memop_frag,
3346 X86MemOperand x86memop, bit IsCommutable = 0,
3347 bit Is2Addr = 1> {
33463348 let isCommutable = IsCommutable in
3347 def rr : PDI
3348 (ins VR128:$src1, VR128:$src2),
3349 def rr : PDI),
3350 (ins RC:$src1, RC:$src2),
33493351 !if(Is2Addr,
33503352 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33513353 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3352 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
3353 def rm : PDI
3354 (ins VR128:$src1, i128mem:$src2),
3354 [(set RC:$dst, (IntId RC:$src1, RC:$src2))]>;
3355 def rm : PDI
3356 (ins RC:$src1, x86memop:$src2),
33553357 !if(Is2Addr,
33563358 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33573359 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3358 [(set VR128:$dst, (IntId VR128:$src1,
3359 (bitconvert (memopv2i64 addr:$src2))))]>;
3360 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>;
33603361 }
33613362
33623363 multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm,
33633364 string OpcodeStr, Intrinsic IntId,
3364 Intrinsic IntId2, bit Is2Addr = 1> {
3365 def rr : PDI
3366 (ins VR128:$src1, VR128:$src2),
3365 Intrinsic IntId2, RegisterClass RC,
3366 bit Is2Addr = 1> {
3367 // src2 is always 128-bit
3368 def rr : PDI
3369 (ins RC:$src1, VR128:$src2),
33673370 !if(Is2Addr,
33683371 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33693372 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3370 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
3371 def rm : PDI
3372 (ins VR128:$src1, i128mem:$src2),
3373 [(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>;
3374 def rm : PDI
3375 (ins RC:$src1, i128mem:$src2),
33733376 !if(Is2Addr,
33743377 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33753378 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3376 [(set VR128:$dst, (IntId VR128:$src1,
3377 (bitconvert (memopv2i64 addr:$src2))))]>;
3378 def ri : PDIi8
3379 (ins VR128:$src1, i32i8imm:$src2),
3379 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memopv2i64 addr:$src2))))]>;
3380 def ri : PDIi8
3381 (ins RC:$src1, i32i8imm:$src2),
33803382 !if(Is2Addr,
33813383 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33823384 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3383 [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
3385 [(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>;
33843386 }
33853387
33863388 /// PDI_binop_rm - Simple SSE2 binary operator.
33873389 multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode,
3388 ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
3390 ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
3391 X86MemOperand x86memop, bit IsCommutable = 0,
3392 bit Is2Addr = 1> {
33893393 let isCommutable = IsCommutable in
3390 def rr : PDI
3391 (ins VR128:$src1, VR128:$src2),
3394 def rr : PDI),
3395 (ins RC:$src1, RC:$src2),
33923396 !if(Is2Addr,
33933397 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
33943398 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3395 [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
3396 def rm : PDI
3397 (ins VR128:$src1, i128mem:$src2),
3399 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>;
3400 def rm : PDI
3401 (ins RC:$src1, x86memop:$src2),
33983402 !if(Is2Addr,
33993403 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
34003404 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3401 [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
3402 (bitconvert (memopv2i64 addr:$src2)))))]>;
3405 [(set RC:$dst, (OpVT (OpNode RC:$src1,
3406 (bitconvert (memop_frag addr:$src2)))))]>;
34033407 }
34043408
34053409 /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
34243428 [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
34253429 }
34263430
3431 /// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64.
3432 ///
3433 /// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
3434 /// to collapse (bitconvert VT to VT) into its operand.
3435 ///
3436 multiclass PDI_binop_rm_v4i64 opc, string OpcodeStr, SDNode OpNode,
3437 bit IsCommutable = 0> {
3438 let isCommutable = IsCommutable in
3439 def rr : PDI
3440 (ins VR256:$src1, VR256:$src2),
3441 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3442 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))]>;
3443 def rm : PDI
3444 (ins VR256:$src1, i256mem:$src2),
3445 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3446 [(set VR256:$dst, (OpNode VR256:$src1, (memopv4i64 addr:$src2)))]>;
3447 }
3448
34273449 } // ExeDomain = SSEPackedInt
34283450
34293451 // 128-bit Integer Arithmetic
34303452
34313453 let Predicates = [HasAVX] in {
3432 defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
3433 defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
3434 defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
3454 defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
3455 i128mem, 1, 0 /*3addr*/>, VEX_4V;
3456 defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
3457 i128mem, 1, 0>, VEX_4V;
3458 defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
3459 i128mem, 1, 0>, VEX_4V;
34353460 defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
3436 defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
3437 defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
3438 defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
3439 defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
3461 defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
3462 i128mem, 1, 0>, VEX_4V;
3463 defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
3464 i128mem, 0, 0>, VEX_4V;
3465 defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
3466 i128mem, 0, 0>, VEX_4V;
3467 defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
3468 i128mem, 0, 0>, VEX_4V;
34403469 defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
34413470
34423471 // Intrinsic forms
3443 defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
3444 VEX_4V;
3445 defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
3446 VEX_4V;
3447 defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
3448 VEX_4V;
3449 defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
3450 VEX_4V;
3451 defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
3452 VEX_4V;
3453 defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
3454 VEX_4V;
3455 defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
3456 VEX_4V;
3457 defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
3458 VEX_4V;
3459 defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
3460 VEX_4V;
3461 defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
3462 VEX_4V;
3463 defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
3464 VEX_4V;
3465 defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
3466 VEX_4V;
3467 defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
3468 VEX_4V;
3469 defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
3470 VEX_4V;
3471 defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
3472 VEX_4V;
3473 defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
3474 VEX_4V;
3475 defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
3476 VEX_4V;
3477 defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
3478 VEX_4V;
3479 defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
3480 VEX_4V;
3472 defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
3473 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3474 defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
3475 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3476 defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
3477 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3478 defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
3479 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3480 defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
3481 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3482 defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
3483 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3484 defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
3485 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3486 defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
3487 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3488 defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
3489 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3490 defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
3491 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3492 defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq,
3493 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3494 defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
3495 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3496 defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
3497 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3498 defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
3499 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3500 defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
3501 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3502 defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
3503 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3504 defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
3505 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3506 defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
3507 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3508 defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
3509 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3510 }
3511
3512 let Predicates = [HasAVX2] in {
3513 defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
3514 i256mem, 1, 0>, VEX_4V;
3515 defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
3516 i256mem, 1, 0>, VEX_4V;
3517 defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
3518 i256mem, 1, 0>, VEX_4V;
3519 defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V;
3520 defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
3521 i256mem, 1, 0>, VEX_4V;
3522 defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
3523 i256mem, 0, 0>, VEX_4V;
3524 defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
3525 i256mem, 0, 0>, VEX_4V;
3526 defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
3527 i256mem, 0, 0>, VEX_4V;
3528 defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V;
3529
3530 // Intrinsic forms
3531 defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
3532 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3533 defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
3534 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3535 defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
3536 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3537 defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
3538 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3539 defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
3540 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3541 defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
3542 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3543 defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
3544 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3545 defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
3546 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3547 defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
3548 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3549 defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
3550 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3551 defm VPMULUDQY : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_avx2_pmulu_dq,
3552 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3553 defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
3554 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3555 defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
3556 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3557 defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
3558 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3559 defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
3560 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3561 defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
3562 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3563 defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
3564 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3565 defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
3566 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3567 defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
3568 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
34813569 }
34823570
34833571 let Constraints = "$src1 = $dst" in {
3484 defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
3485 defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
3486 defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
3572 defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
3573 i128mem, 1>;
3574 defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
3575 i128mem, 1>;
3576 defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
3577 i128mem, 1>;
34873578 defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
3488 defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
3489 defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
3490 defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
3491 defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
3579 defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
3580 i128mem, 1>;
3581 defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
3582 i128mem>;
3583 defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
3584 i128mem>;
3585 defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
3586 i128mem>;
34923587 defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
34933588
34943589 // Intrinsic forms
3495 defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
3496 defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
3497 defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
3498 defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
3499 defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
3500 defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
3501 defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
3502 defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
3503 defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
3504 defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
3505 defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
3506 defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
3507 defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
3508 defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
3509 defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
3510 defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
3511 defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
3512 defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
3513 defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
3590 defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
3591 VR128, memopv2i64, i128mem>;
3592 defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
3593 VR128, memopv2i64, i128mem>;
3594 defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
3595 VR128, memopv2i64, i128mem>;
3596 defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
3597 VR128, memopv2i64, i128mem>;
3598 defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
3599 VR128, memopv2i64, i128mem, 1>;
3600 defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
3601 VR128, memopv2i64, i128mem, 1>;
3602 defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
3603 VR128, memopv2i64, i128mem, 1>;
3604 defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
3605 VR128, memopv2i64, i128mem, 1>;
3606 defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
3607 VR128, memopv2i64, i128mem, 1>;
3608 defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
3609 VR128, memopv2i64, i128mem, 1>;
3610 defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq,
3611 VR128, memopv2i64, i128mem, 1>;
3612 defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
3613 VR128, memopv2i64, i128mem, 1>;
3614 defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
3615 VR128, memopv2i64, i128mem, 1>;
3616 defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
3617 VR128, memopv2i64, i128mem, 1>;
3618 defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
3619 VR128, memopv2i64, i128mem, 1>;
3620 defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
3621 VR128, memopv2i64, i128mem, 1>;
3622 defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
3623 VR128, memopv2i64, i128mem, 1>;
3624 defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
3625 VR128, memopv2i64, i128mem, 1>;
3626 defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
3627 VR128, memopv2i64, i128mem, 1>;
35143628
35153629 } // Constraints = "$src1 = $dst"
35163630
35203634
35213635 let Predicates = [HasAVX] in {
35223636 defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
3523 int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
3524 VEX_4V;
3637 int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
3638 VR128, 0>, VEX_4V;
35253639 defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
3526 int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
3527 VEX_4V;
3640 int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
3641 VR128, 0>, VEX_4V;
35283642 defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
3529 int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
3530 VEX_4V;
3643 int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
3644 VR128, 0>, VEX_4V;
35313645
35323646 defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
3533 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
3534 VEX_4V;
3647 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
3648 VR128, 0>, VEX_4V;
35353649 defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
3536 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
3537 VEX_4V;
3650 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
3651 VR128, 0>, VEX_4V;
35383652 defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
3539 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
3540 VEX_4V;
3653 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
3654 VR128, 0>, VEX_4V;
35413655
35423656 defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
3543 int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
3544 VEX_4V;
3657 int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
3658 VR128, 0>, VEX_4V;
35453659 defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
3546 int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
3547 VEX_4V;
3660 int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
3661 VR128, 0>, VEX_4V;
35483662
35493663 defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
35503664 defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
35773691 }
35783692 }
35793693
3694 let Predicates = [HasAVX2] in {
3695 defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
3696 int_x86_avx2_psll_w, int_x86_avx2_pslli_w,
3697 VR256, 0>, VEX_4V;
3698 defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
3699 int_x86_avx2_psll_d, int_x86_avx2_pslli_d,
3700 VR256, 0>, VEX_4V;
3701 defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
3702 int_x86_avx2_psll_q, int_x86_avx2_pslli_q,
3703 VR256, 0>, VEX_4V;
3704
3705 defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
3706 int_x86_avx2_psrl_w, int_x86_avx2_psrli_w,
3707 VR256, 0>, VEX_4V;
3708 defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
3709 int_x86_avx2_psrl_d, int_x86_avx2_psrli_d,
3710 VR256, 0>, VEX_4V;
3711 defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
3712 int_x86_avx2_psrl_q, int_x86_avx2_psrli_q,
3713 VR256, 0>, VEX_4V;
3714
3715 defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
3716 int_x86_avx2_psra_w, int_x86_avx2_psrai_w,
3717 VR256, 0>, VEX_4V;
3718 defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
3719 int_x86_avx2_psra_d, int_x86_avx2_psrai_d,
3720 VR256, 0>, VEX_4V;
3721
3722 defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V;
3723 defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V;
3724 defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V;
3725
3726 let ExeDomain = SSEPackedInt in {
3727 let neverHasSideEffects = 1 in {
3728 // 128-bit logical shifts.
3729 def VPSLLDQYri : PDIi8<0x73, MRM7r,
3730 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3731 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3732 VEX_4V;
3733 def VPSRLDQYri : PDIi8<0x73, MRM3r,
3734 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3735 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3736 VEX_4V;
3737 // PSRADQYri doesn't exist in SSE[1-3].
3738 }
3739 def VPANDNYrr : PDI<0xDF, MRMSrcReg,
3740 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
3741 "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3742 [(set VR256:$dst,
3743 (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V;
3744
3745 def VPANDNYrm : PDI<0xDF, MRMSrcMem,
3746 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
3747 "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3748 [(set VR256:$dst, (X86andnp VR256:$src1,
3749 (memopv4i64 addr:$src2)))]>, VEX_4V;
3750 }
3751 }
3752
35803753 let Constraints = "$src1 = $dst" in {
35813754 defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
3582 int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
3755 int_x86_sse2_psll_w, int_x86_sse2_pslli_w,
3756 VR128>;
35833757 defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
3584 int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
3758 int_x86_sse2_psll_d, int_x86_sse2_pslli_d,
3759 VR128>;
35853760 defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
3586 int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
3761 int_x86_sse2_psll_q, int_x86_sse2_pslli_q,
3762 VR128>;
35873763
35883764 defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
3589 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
3765 int_x86_sse2_psrl_w, int_x86_sse2_psrli_w,
3766 VR128>;
35903767 defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
3591 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
3768 int_x86_sse2_psrl_d, int_x86_sse2_psrli_d,
3769 VR128>;
35923770 defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
3593 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
3771 int_x86_sse2_psrl_q, int_x86_sse2_psrli_q,
3772 VR128>;
35943773
35953774 defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
3596 int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
3775 int_x86_sse2_psra_w, int_x86_sse2_psrai_w,
3776 VR128>;
35973777 defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
3598 int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
3778 int_x86_sse2_psra_d, int_x86_sse2_psrai_d,
3779 VR128>;
35993780
36003781 defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
36013782 defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
36413822 (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
36423823 }
36433824
3825 let Predicates = [HasAVX2] in {
3826 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
3827 (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
3828 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
3829 (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>;
3830 def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2),
3831 (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>;
3832 def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2),
3833 (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>;
3834 }
3835
36443836 let Predicates = [HasSSE2] in {
36453837 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
36463838 (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
36653857 //===---------------------------------------------------------------------===//
36663858
36673859 let Predicates = [HasAVX] in {
3668 defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
3669 0>, VEX_4V;
3670 defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
3671 0>, VEX_4V;
3672 defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
3673 0>, VEX_4V;
3674 defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
3675 0>, VEX_4V;
3676 defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
3677 0>, VEX_4V;
3678 defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
3679 0>, VEX_4V;
3860 defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b,
3861 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3862 defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w,
3863 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3864 defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d,
3865 VR128, memopv2i64, i128mem, 1, 0>, VEX_4V;
3866 defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b,
3867 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3868 defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w,
3869 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3870 defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d,
3871 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
36803872
36813873 def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
36823874 (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
37053897 (VPCMPGTDrm VR128:$src1, addr:$src2)>;
37063898 }
37073899
3900 let Predicates = [HasAVX2] in {
3901 defm VPCMPEQBY : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_avx2_pcmpeq_b,
3902 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3903 defm VPCMPEQWY : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_avx2_pcmpeq_w,
3904 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3905 defm VPCMPEQDY : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_avx2_pcmpeq_d,
3906 VR256, memopv4i64, i256mem, 1, 0>, VEX_4V;
3907 defm VPCMPGTBY : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_avx2_pcmpgt_b,
3908 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3909 defm VPCMPGTWY : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_avx2_pcmpgt_w,
3910 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3911 defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d,
3912 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3913 }
3914
37083915 let Constraints = "$src1 = $dst" in {
3709 defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
3710 defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
3711 defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
3712 defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
3713 defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
3714 defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
3916 defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b,
3917 VR128, memopv2i64, i128mem, 1>;
3918 defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w,
3919 VR128, memopv2i64, i128mem, 1>;
3920 defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d,
3921 VR128, memopv2i64, i128mem, 1>;
3922 defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b,
3923 VR128, memopv2i64, i128mem>;
3924 defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w,
3925 VR128, memopv2i64, i128mem>;
3926 defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d,
3927 VR128, memopv2i64, i128mem>;
37153928 } // Constraints = "$src1 = $dst"
37163929
37173930 let Predicates = [HasSSE2] in {
37483961
37493962 let Predicates = [HasAVX] in {
37503963 defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
3751 0, 0>, VEX_4V;
3964 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
37523965 defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
3753 0, 0>, VEX_4V;
3966 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
37543967 defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
3755 0, 0>, VEX_4V;
3968 VR128, memopv2i64, i128mem, 0, 0>, VEX_4V;
3969 }
3970
3971 let Predicates = [HasAVX2] in {
3972 defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
3973 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3974 defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
3975 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
3976 defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
3977 VR256, memopv4i64, i256mem, 0, 0>, VEX_4V;
37563978 }
37573979
37583980 let Constraints = "$src1 = $dst" in {
3759 defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
3760 defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
3761 defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
3981 defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
3982 VR128, memopv2i64, i128mem>;
3983 defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
3984 VR128, memopv2i64, i128mem>;
3985 defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
3986 VR128, memopv2i64, i128mem>;
37623987 } // Constraints = "$src1 = $dst"
37633988
37643989 //===---------------------------------------------------------------------===//
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
1
2 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
3 ; CHECK: vpackssdw
4 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
5 ret <16 x i16> %res
6 }
7 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
8
9
10 define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
11 ; CHECK: vpacksswb
12 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
13 ret <32 x i8> %res
14 }
15 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
16
17
18 define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
19 ; CHECK: vpackuswb
20 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
21 ret <32 x i8> %res
22 }
23 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
24
25
26 define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
27 ; CHECK: vpaddsb
28 %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
29 ret <32 x i8> %res
30 }
31 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
32
33
34 define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
35 ; CHECK: vpaddsw
36 %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
37 ret <16 x i16> %res
38 }
39 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
40
41
42 define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
43 ; CHECK: vpaddusb
44 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
45 ret <32 x i8> %res
46 }
47 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
48
49
50 define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
51 ; CHECK: vpaddusw
52 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
53 ret <16 x i16> %res
54 }
55 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
56
57
58 define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) {
59 ; CHECK: vpavgb
60 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
61 ret <32 x i8> %res
62 }
63 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
64
65
66 define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
67 ; CHECK: vpavgw
68 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
69 ret <16 x i16> %res
70 }
71 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
72
73
74 define <32 x i8> @test_x86_avx2_pcmpeq_b(<32 x i8> %a0, <32 x i8> %a1) {
75 ; CHECK: vpcmpeqb
76 %res = call <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
77 ret <32 x i8> %res
78 }
79 declare <32 x i8> @llvm.x86.avx2.pcmpeq.b(<32 x i8>, <32 x i8>) nounwind readnone
80
81
82 define <8 x i32> @test_x86_avx2_pcmpeq_d(<8 x i32> %a0, <8 x i32> %a1) {
83 ; CHECK: vpcmpeqd
84 %res = call <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
85 ret <8 x i32> %res
86 }
87 declare <8 x i32> @llvm.x86.avx2.pcmpeq.d(<8 x i32>, <8 x i32>) nounwind readnone
88
89
90 define <16 x i16> @test_x86_avx2_pcmpeq_w(<16 x i16> %a0, <16 x i16> %a1) {
91 ; CHECK: vpcmpeqw
92 %res = call <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
93 ret <16 x i16> %res
94 }
95 declare <16 x i16> @llvm.x86.avx2.pcmpeq.w(<16 x i16>, <16 x i16>) nounwind readnone
96
97
98 define <32 x i8> @test_x86_avx2_pcmpgt_b(<32 x i8> %a0, <32 x i8> %a1) {
99 ; CHECK: vpcmpgtb
100 %res = call <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
101 ret <32 x i8> %res
102 }
103 declare <32 x i8> @llvm.x86.avx2.pcmpgt.b(<32 x i8>, <32 x i8>) nounwind readnone
104
105
106 define <8 x i32> @test_x86_avx2_pcmpgt_d(<8 x i32> %a0, <8 x i32> %a1) {
107 ; CHECK: vpcmpgtd
108 %res = call <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
109 ret <8 x i32> %res
110 }
111 declare <8 x i32> @llvm.x86.avx2.pcmpgt.d(<8 x i32>, <8 x i32>) nounwind readnone
112
113
114 define <16 x i16> @test_x86_avx2_pcmpgt_w(<16 x i16> %a0, <16 x i16> %a1) {
115 ; CHECK: vpcmpgtw
116 %res = call <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
117 ret <16 x i16> %res
118 }
119 declare <16 x i16> @llvm.x86.avx2.pcmpgt.w(<16 x i16>, <16 x i16>) nounwind readnone
120
121
122 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
123 ; CHECK: vpmaddwd
124 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
125 ret <8 x i32> %res
126 }
127 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
128
129
130 define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
131 ; CHECK: vpmaxsw
132 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
133 ret <16 x i16> %res
134 }
135 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
136
137
138 define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
139 ; CHECK: vpmaxub
140 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
141 ret <32 x i8> %res
142 }
143 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
144
145
146 define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
147 ; CHECK: vpminsw
148 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
149 ret <16 x i16> %res
150 }
151 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
152
153
154 define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
155 ; CHECK: vpminub
156 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
157 ret <32 x i8> %res
158 }
159 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
160
161
162 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
163 ; CHECK: vpmulhw
164 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
165 ret <16 x i16> %res
166 }
167 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
168
169
170 define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
171 ; CHECK: vpmulhuw
172 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
173 ret <16 x i16> %res
174 }
175 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
176
177
178 define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
179 ; CHECK: vpmuludq
180 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
181 ret <4 x i64> %res
182 }
183 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
184
185
186 define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
187 ; CHECK: vpsadbw
188 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
189 ret <4 x i64> %res
190 }
191 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
192
193
194 define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
195 ; CHECK: vpslld
196 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
197 ret <8 x i32> %res
198 }
199 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
200
201
202 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
203 ; CHECK: vpslldq
204 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
205 ret <4 x i64> %res
206 }
207 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
208
209
210 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
211 ; CHECK: vpslldq
212 %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
213 ret <4 x i64> %res
214 }
215 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
216
217
218 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
219 ; CHECK: vpsllq
220 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
221 ret <4 x i64> %res
222 }
223 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
224
225
226 define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
227 ; CHECK: vpsllw
228 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
229 ret <16 x i16> %res
230 }
231 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
232
233
234 define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
235 ; CHECK: vpslld
236 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
237 ret <8 x i32> %res
238 }
239 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
240
241
242 define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
243 ; CHECK: vpsllq
244 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
245 ret <4 x i64> %res
246 }
247 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
248
249
250 define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
251 ; CHECK: vpsllw
252 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
253 ret <16 x i16> %res
254 }
255 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
256
257
258 define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
259 ; CHECK: vpsrad
260 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
261 ret <8 x i32> %res
262 }
263 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
264
265
266 define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
267 ; CHECK: vpsraw
268 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
269 ret <16 x i16> %res
270 }
271 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
272
273
274 define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
275 ; CHECK: vpsrad
276 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
277 ret <8 x i32> %res
278 }
279 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
280
281
282 define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
283 ; CHECK: vpsraw
284 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
285 ret <16 x i16> %res
286 }
287 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
288
289
290 define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
291 ; CHECK: vpsrld
292 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
293 ret <8 x i32> %res
294 }
295 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
296
297
298 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
299 ; CHECK: vpsrldq
300 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
301 ret <4 x i64> %res
302 }
303 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
304
305
306 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
307 ; CHECK: vpsrldq
308 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
309 ret <4 x i64> %res
310 }
311 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
312
313
314 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
315 ; CHECK: vpsrlq
316 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
317 ret <4 x i64> %res
318 }
319 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
320
321
322 define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
323 ; CHECK: vpsrlw
324 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
325 ret <16 x i16> %res
326 }
327 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
328
329
330 define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
331 ; CHECK: vpsrld
332 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
333 ret <8 x i32> %res
334 }
335 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
336
337
338 define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
339 ; CHECK: vpsrlq
340 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
341 ret <4 x i64> %res
342 }
343 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
344
345
346 define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
347 ; CHECK: vpsrlw
348 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
349 ret <16 x i16> %res
350 }
351 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
352
353
354 define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
355 ; CHECK: vpsubsb
356 %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
357 ret <32 x i8> %res
358 }
359 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
360
361
362 define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
363 ; CHECK: vpsubsw
364 %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
365 ret <16 x i16> %res
366 }
367 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
368
369
370 define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
371 ; CHECK: vpsubusb
372 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
373 ret <32 x i8> %res
374 }
375 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
376
377
378 define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
379 ; CHECK: vpsubusw
380 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
381 ret <16 x i16> %res
382 }
383 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone