llvm.org GIT mirror llvm / ed42dce
[X86] Remove AVX2 and SSE2 pslldq and psrldq intrinsics. We can represent them in IR with vector shuffles now. All their uses have been removed from clang in favor of shuffles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229640 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 5 years ago
10 changed file(s) with 171 addition(s) and 160 deletion(s). Raw diff Collapse all Expand all
451451 llvm_i32_ty], [IntrNoMem]>;
452452 def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
453453 Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
454 llvm_i32_ty], [IntrNoMem]>;
455
456 def int_x86_sse2_psll_dq :
457 Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
458 llvm_i32_ty], [IntrNoMem]>;
459 def int_x86_sse2_psrl_dq :
460 Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
461454 llvm_i32_ty], [IntrNoMem]>;
462455 }
463456
15791572 Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
15801573 llvm_i32_ty], [IntrNoMem]>;
15811574
1582 def int_x86_avx2_psll_dq :
1583 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1584 llvm_i32_ty], [IntrNoMem]>;
1585 def int_x86_avx2_psrl_dq :
1586 Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1587 llvm_i32_ty], [IntrNoMem]>;
1588
15891575 def int_x86_avx512_mask_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi512">,
15901576 Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
15911577 llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
162162 Name == "x86.avx.vbroadcast.ss" ||
163163 Name == "x86.avx.vbroadcast.ss.256" ||
164164 Name == "x86.avx.vbroadcast.sd.256" ||
165 Name == "x86.sse2.psll.dq" ||
166 Name == "x86.sse2.psrl.dq" ||
167 Name == "x86.avx2.psll.dq" ||
168 Name == "x86.avx2.psrl.dq" ||
165169 Name == "x86.sse2.psll.dq.bs" ||
166170 Name == "x86.sse2.psrl.dq.bs" ||
167171 Name == "x86.avx2.psll.dq.bs" ||
369373 Expr = DIB.createExpression();
370374 }
371375 return MetadataAsValue::get(F->getContext(), Expr);
376 }
377
378 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
379 // to byte shuffles.
380 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
381 Value *Op, unsigned NumLanes,
382 unsigned Shift) {
383 // Each lane is 16 bytes.
384 unsigned NumElts = NumLanes * 16;
385
386 // Bitcast from a 64-bit element type to a byte element type.
387 Op = Builder.CreateBitCast(Op,
388 VectorType::get(Type::getInt8Ty(C), NumElts),
389 "cast");
390 // We'll be shuffling in zeroes.
391 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
392
393 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
394 // we'll just return the zero vector.
395 if (Shift < 16) {
396 SmallVector Idxs;
397 // 256-bit version is split into two 16-byte lanes.
398 for (unsigned l = 0; l != NumElts; l += 16)
399 for (unsigned i = 0; i != 16; ++i) {
400 unsigned Idx = NumElts + i - Shift;
401 if (Idx < NumElts)
402 Idx -= NumElts - 16; // end of lane, switch operand.
403 Idxs.push_back(Builder.getInt32(Idx + l));
404 }
405
406 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
407 }
408
409 // Bitcast back to a 64-bit element type.
410 return Builder.CreateBitCast(Res,
411 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
412 "cast");
413 }
414
415 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
416 // to byte shuffles.
417 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
418 Value *Op, unsigned NumLanes,
419 unsigned Shift) {
420 // Each lane is 16 bytes.
421 unsigned NumElts = NumLanes * 16;
422
423 // Bitcast from a 64-bit element type to a byte element type.
424 Op = Builder.CreateBitCast(Op,
425 VectorType::get(Type::getInt8Ty(C), NumElts),
426 "cast");
427 // We'll be shuffling in zeroes.
428 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
429
430 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
431 // we'll just return the zero vector.
432 if (Shift < 16) {
433 SmallVector Idxs;
434 // 256-bit version is split into two 16-byte lanes.
435 for (unsigned l = 0; l != NumElts; l += 16)
436 for (unsigned i = 0; i != 16; ++i) {
437 unsigned Idx = i + Shift;
438 if (Idx >= 16)
439 Idx += NumElts - 16; // end of lane, switch operand.
440 Idxs.push_back(Builder.getInt32(Idx + l));
441 }
442
443 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
444 }
445
446 // Bitcast back to a 64-bit element type.
447 return Builder.CreateBitCast(Res,
448 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
449 "cast");
372450 }
373451
374452 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
490568 for (unsigned I = 0; I < EltNum; ++I)
491569 Rep = Builder.CreateInsertElement(Rep, Load,
492570 ConstantInt::get(I32Ty, I));
571 } else if (Name == "llvm.x86.sse2.psll.dq") {
572 // 128-bit shift left specified in bits.
573 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
574 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
575 Shift / 8); // Shift is in bits.
576 } else if (Name == "llvm.x86.sse2.psrl.dq") {
577 // 128-bit shift right specified in bits.
578 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
579 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
580 Shift / 8); // Shift is in bits.
581 } else if (Name == "llvm.x86.avx2.psll.dq") {
582 // 256-bit shift left specified in bits.
583 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
584 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
585 Shift / 8); // Shift is in bits.
586 } else if (Name == "llvm.x86.avx2.psrl.dq") {
587 // 256-bit shift right specified in bits.
588 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
589 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
590 Shift / 8); // Shift is in bits.
493591 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
494 Value *Op0 = ConstantVector::getSplat(16, Builder.getInt8(0));
495 Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
496 VectorType::get(Type::getInt8Ty(C),16),
497 "cast");
498
592 // 128-bit shift left specified in bytes.
499593 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
500
501 if (Shift < 16) {
502 SmallVector Idxs;
503 for (unsigned i = 16; i != 32; ++i)
504 Idxs.push_back(Builder.getInt32(i - Shift));
505
506 Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
507 }
508
509 Rep = Builder.CreateBitCast(Op0,
510 VectorType::get(Type::getInt64Ty(C), 2),
511 "cast");
594 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
595 Shift);
512596 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
513 Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
514 VectorType::get(Type::getInt8Ty(C),16),
515 "cast");
516 Value *Op1 = ConstantVector::getSplat(16, Builder.getInt8(0));
517
597 // 128-bit shift right specified in bytes.
518598 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
519
520 if (Shift < 16) {
521 SmallVector Idxs;
522 for (unsigned i = 0; i != 16; ++i)
523 Idxs.push_back(Builder.getInt32(i + Shift));
524
525 Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
526 }
527 Rep = Builder.CreateBitCast(Op1,
528 VectorType::get(Type::getInt64Ty(C), 2),
529 "cast");
599 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
600 Shift);
530601 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
531 Value *Op0 = ConstantVector::getSplat(32, Builder.getInt8(0));
532 Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
533 VectorType::get(Type::getInt8Ty(C),32),
534 "cast");
535
602 // 256-bit shift left specified in bytes.
536603 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
537
538 if (Shift < 16) {
539 SmallVector Idxs;
540 for (unsigned l = 0; l != 32; l += 16)
541 for (unsigned i = 0; i != 16; ++i) {
542 unsigned Idx = 32 + i - Shift;
543 if (Idx < 32) Idx -= 16; // end of lane, switch operand.
544 Idxs.push_back(Builder.getInt32(Idx + l));
545 }
546
547 Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
548 }
549
550 Rep = Builder.CreateBitCast(Op1,
551 VectorType::get(Type::getInt64Ty(C), 4),
552 "cast");
604 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
605 Shift);
553606 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
554 Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
555 VectorType::get(Type::getInt8Ty(C),32),
556 "cast");
557 Value *Op1 = ConstantVector::getSplat(32, Builder.getInt8(0));
558
607 // 256-bit shift right specified in bytes.
559608 unsigned Shift = cast(CI->getArgOperand(1))->getZExtValue();
560
561 if (Shift < 16) {
562 SmallVector Idxs;
563 for (unsigned l = 0; l != 32; l += 16)
564 for (unsigned i = 0; i != 16; ++i) {
565 unsigned Idx = i + Shift;
566 if (Idx >= 16) Idx += 16; // end of lane, switch operand.
567 Idxs.push_back(Builder.getInt32(Idx + l));
568 }
569
570 Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
571 }
572
573 Rep = Builder.CreateBitCast(Op0,
574 VectorType::get(Type::getInt64Ty(C), 4),
575 "cast");
609 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
610 Shift);
576611 } else {
577612 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
578613 if (Name == "llvm.x86.avx.vpermil.pd.256")
42824282 } // Constraints = "$src1 = $dst"
42834283
42844284 let Predicates = [HasAVX] in {
4285 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4286 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4287 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4288 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
42894285 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
42904286 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
42914287 }
42924288
4293 let Predicates = [HasAVX2] in {
4294 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
4295 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4296 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
4297 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4298 }
4299
43004289 let Predicates = [UseSSE2] in {
4301 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4302 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4303 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4304 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
43054290 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
43064291 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
43074292 }
22692269 case llvm::Intrinsic::x86_avx2_pslli_w:
22702270 case llvm::Intrinsic::x86_avx2_pslli_d:
22712271 case llvm::Intrinsic::x86_avx2_pslli_q:
2272 case llvm::Intrinsic::x86_avx2_psll_dq:
22732272 case llvm::Intrinsic::x86_avx2_psrl_w:
22742273 case llvm::Intrinsic::x86_avx2_psrl_d:
22752274 case llvm::Intrinsic::x86_avx2_psrl_q:
22802279 case llvm::Intrinsic::x86_avx2_psrli_q:
22812280 case llvm::Intrinsic::x86_avx2_psrai_w:
22822281 case llvm::Intrinsic::x86_avx2_psrai_d:
2283 case llvm::Intrinsic::x86_avx2_psrl_dq:
22842282 case llvm::Intrinsic::x86_sse2_psll_w:
22852283 case llvm::Intrinsic::x86_sse2_psll_d:
22862284 case llvm::Intrinsic::x86_sse2_psll_q:
22872285 case llvm::Intrinsic::x86_sse2_pslli_w:
22882286 case llvm::Intrinsic::x86_sse2_pslli_d:
22892287 case llvm::Intrinsic::x86_sse2_pslli_q:
2290 case llvm::Intrinsic::x86_sse2_psll_dq:
22912288 case llvm::Intrinsic::x86_sse2_psrl_w:
22922289 case llvm::Intrinsic::x86_sse2_psrl_d:
22932290 case llvm::Intrinsic::x86_sse2_psrl_q:
22982295 case llvm::Intrinsic::x86_sse2_psrli_q:
22992296 case llvm::Intrinsic::x86_sse2_psrai_w:
23002297 case llvm::Intrinsic::x86_sse2_psrai_d:
2301 case llvm::Intrinsic::x86_sse2_psrl_dq:
23022298 case llvm::Intrinsic::x86_mmx_psll_w:
23032299 case llvm::Intrinsic::x86_mmx_psll_d:
23042300 case llvm::Intrinsic::x86_mmx_psll_q:
23332329 // Byte shifts are not implemented.
23342330 // case llvm::Intrinsic::x86_avx512_psll_dq_bs:
23352331 // case llvm::Intrinsic::x86_avx512_psrl_dq_bs:
2336 // case llvm::Intrinsic::x86_avx2_psll_dq_bs:
2337 // case llvm::Intrinsic::x86_avx2_psrl_dq_bs:
2338 // case llvm::Intrinsic::x86_sse2_psll_dq_bs:
2339 // case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
23402332
23412333 case llvm::Intrinsic::x86_sse2_packsswb_128:
23422334 case llvm::Intrinsic::x86_sse2_packssdw_128:
2323 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
2424
2525
26 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
27 ; CHECK: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
28 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
29 ret <2 x i64> %res
30 }
31 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
32
33
34 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
35 ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
36 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
37 ret <2 x i64> %res
38 }
39 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
456456 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
457457
458458
459 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
460 ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
461 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
462 ret <2 x i64> %res
463 }
464 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
465
466
467459 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
468460 ; CHECK: vpsllq
469461 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
542534 ret <4 x i32> %res
543535 }
544536 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
545
546
547 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
548 ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
549 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
550 ret <2 x i64> %res
551 }
552 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
553537
554538
555539 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
4545 ret <4 x i64> %res
4646 }
4747 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
48
49
50 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
51 ; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
52 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
53 ret <4 x i64> %res
54 }
55 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
56
57
58 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
59 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
60 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
61 ret <4 x i64> %res
62 }
63 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
159159 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
160160
161161
162 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
163 ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
164 %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
165 ret <4 x i64> %res
166 }
167 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
168
169
170162 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
171163 ; CHECK: vpsllq
172164 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
245237 ret <8 x i32> %res
246238 }
247239 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
248
249
250 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
251 ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
252 %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
253 ret <4 x i64> %res
254 }
255 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
256240
257241
258242 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=pentium4 -mattr=sse2 | FileCheck %s
1
2 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
3 ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
4 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
5 ret <2 x i64> %res
6 }
7 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
8
9
10 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
11 ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
12 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
13 ret <2 x i64> %res
14 }
15 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
16
17 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
18 ; CHECK: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
19 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
20 ret <2 x i64> %res
21 }
22 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
23
24
25 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
26 ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
27 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
28 ret <2 x i64> %res
29 }
30 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
409409 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
410410
411411
412 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
413 ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
414 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
415 ret <2 x i64> %res
416 }
417 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
418
419
420412 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
421413 ; CHECK: psllq
422414 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
495487 ret <4 x i32> %res
496488 }
497489 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
498
499
500 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
501 ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
502 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
503 ret <2 x i64> %res
504 }
505 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
506490
507491
508492 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {