llvm.org GIT mirror llvm / dc54ec4
Merging r292444: ------------------------------------------------------------------------ r292444 | mkuper | 2017-01-18 15:05:58 -0800 (Wed, 18 Jan 2017) | 7 lines Revert r291670 because it introduces a crash. r291670 doesn't crash on the original testcase from PR31589, but it crashes on a slightly more complex one. PR31589 has the new reproducer. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293070 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 3 years ago
3 changed file(s) with 0 addition(s) and 328 deletion(s). Raw diff Collapse all Expand all
3127131271 return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
3127231272 }
3127331273
31274 /// Check if truncation with saturation form type \p SrcVT to \p DstVT
31275 /// is valid for the given \p Subtarget.
31276 static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
31277 const X86Subtarget &Subtarget) {
31278 if (!Subtarget.hasAVX512())
31279 return false;
31280
31281 // FIXME: Scalar type may be supported if we move it to vector register.
31282 if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
31283 return false;
31284
31285 EVT SrcElVT = SrcVT.getScalarType();
31286 EVT DstElVT = DstVT.getScalarType();
31287 if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
31288 return false;
31289 if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
31290 return false;
31291 if (SrcVT.is512BitVector() || Subtarget.hasVLX())
31292 return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
31293 return false;
31294 }
31295
31296 /// Return true if VPACK* instruction can be used for the given types
31297 /// and it is avalable on \p Subtarget.
31298 static bool
31299 isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
31300 if (Subtarget.hasSSE2())
31301 // v16i16 -> v16i8
31302 if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
31303 return true;
31304 if (Subtarget.hasSSE41())
31305 // v8i32 -> v8i16
31306 if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
31307 return true;
31308 return false;
31309 }
31310
31311 /// Detect a pattern of truncation with saturation:
31312 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
31313 /// Return the source value to be truncated or SDValue() if the pattern was not
31314 /// matched.
31315 static SDValue detectUSatPattern(SDValue In, EVT VT) {
31316 if (In.getOpcode() != ISD::UMIN)
31317 return SDValue();
31318
31319 //Saturation with truncation. We truncate from InVT to VT.
31320 assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
31321 "Unexpected types for truncate operation");
31322
31323 APInt C;
31324 if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
31325 // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
31326 // the element size of the destination type.
31327 return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
31328 SDValue();
31329 }
31330 return SDValue();
31331 }
31332
31333 /// Detect a pattern of truncation with saturation:
31334 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
31335 /// The types should allow to use VPMOVUS* instruction on AVX512.
31336 /// Return the source value to be truncated or SDValue() if the pattern was not
31337 /// matched.
31338 static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
31339 const X86Subtarget &Subtarget) {
31340 if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
31341 return SDValue();
31342 return detectUSatPattern(In, VT);
31343 }
31344
31345 static SDValue
31346 combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
31347 const X86Subtarget &Subtarget) {
31348 SDValue USatVal = detectUSatPattern(In, VT);
31349 if (USatVal) {
31350 if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
31351 return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
31352 if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
31353 SDValue Lo, Hi;
31354 std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
31355 return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
31356 }
31357 }
31358 return SDValue();
31359 }
31360
3136131274 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
3136231275 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
3136331276 /// X86ISD::AVG instruction.
3192331836 return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
3192431837 St->getPointerInfo(), St->getAlignment(),
3192531838 St->getMemOperand()->getFlags());
31926
31927 if (SDValue Val =
31928 detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
31929 return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
31930 dl, Val, St->getBasePtr(),
31931 St->getMemoryVT(), St->getMemOperand(), DAG);
3193231839
3193331840 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3193431841 unsigned NumElems = VT.getVectorNumElements();
3254932456 // Try to detect AVG pattern first.
3255032457 if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
3255132458 return Avg;
32552
32553 // Try to combine truncation with unsigned saturation.
32554 if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
32555 return Val;
3255632459
3255732460 // The bitcast source is a direct mmx result.
3255832461 // Detect bitcasts between i32 to x86mmx
3838 %B = trunc <16 x i16> %A to <16 x i8>
3939 ret <16 x i8> %B
4040 }
41
42 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
43 ; CHECK-LABEL: usat_trunc_wb_256:
44 ; CHECK: # BB#0:
45 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
46 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
47 ; CHECK-NEXT: vzeroupper
48 ; CHECK-NEXT: retq
49 %x3 = icmp ult <16 x i16> %i,
50 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16>
51 %x6 = trunc <16 x i16> %x5 to <16 x i8>
52 ret <16 x i8> %x6
53 }
54
55 define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
56 ; CHECK-LABEL: usat_trunc_dw_256:
57 ; CHECK: # BB#0:
58 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
59 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
60 ; CHECK-NEXT: vzeroupper
61 ; CHECK-NEXT: retq
62 %x3 = icmp ult <8 x i32> %i,
63 %x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32>
64 %x6 = trunc <8 x i32> %x5 to <8 x i16>
65 ret <8 x i16> %x6
66 }
499499 store <8 x i8> %x, <8 x i8>* %res
500500 ret void
501501 }
502
503
504 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
505 ; KNL-LABEL: usat_trunc_wb_256_mem:
506 ; KNL: ## BB#0:
507 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
508 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
509 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
510 ; KNL-NEXT: retq
511 ;
512 ; SKX-LABEL: usat_trunc_wb_256_mem:
513 ; SKX: ## BB#0:
514 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
515 ; SKX-NEXT: retq
516 %x3 = icmp ult <16 x i16> %i,
517 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16>
518 %x6 = trunc <16 x i16> %x5 to <16 x i8>
519 store <16 x i8> %x6, <16 x i8>* %res, align 1
520 ret void
521 }
522
523 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
524 ; KNL-LABEL: usat_trunc_wb_256:
525 ; KNL: ## BB#0:
526 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
527 ; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
528 ; KNL-NEXT: retq
529 ;
530 ; SKX-LABEL: usat_trunc_wb_256:
531 ; SKX: ## BB#0:
532 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
533 ; SKX-NEXT: retq
534 %x3 = icmp ult <16 x i16> %i,
535 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16>
536 %x6 = trunc <16 x i16> %x5 to <16 x i8>
537 ret <16 x i8> %x6
538 }
539
540 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
541 ; KNL-LABEL: usat_trunc_wb_128_mem:
542 ; KNL: ## BB#0:
543 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
544 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
545 ; KNL-NEXT: vmovq %xmm0, (%rdi)
546 ; KNL-NEXT: retq
547 ;
548 ; SKX-LABEL: usat_trunc_wb_128_mem:
549 ; SKX: ## BB#0:
550 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
551 ; SKX-NEXT: retq
552 %x3 = icmp ult <8 x i16> %i,
553 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16>
554 %x6 = trunc <8 x i16> %x5 to <8 x i8>
555 store <8 x i8> %x6, <8 x i8>* %res, align 1
556 ret void
557 }
558
559 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
560 ; ALL-LABEL: usat_trunc_db_512_mem:
561 ; ALL: ## BB#0:
562 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
563 ; ALL-NEXT: retq
564 %x3 = icmp ult <16 x i32> %i,
565 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32>
566 %x6 = trunc <16 x i32> %x5 to <16 x i8>
567 store <16 x i8> %x6, <16 x i8>* %res, align 1
568 ret void
569 }
570
571 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
572 ; ALL-LABEL: usat_trunc_qb_512_mem:
573 ; ALL: ## BB#0:
574 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
575 ; ALL-NEXT: retq
576 %x3 = icmp ult <8 x i64> %i,
577 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
578 %x6 = trunc <8 x i64> %x5 to <8 x i8>
579 store <8 x i8> %x6, <8 x i8>* %res, align 1
580 ret void
581 }
582
583 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
584 ; ALL-LABEL: usat_trunc_qd_512_mem:
585 ; ALL: ## BB#0:
586 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
587 ; ALL-NEXT: retq
588 %x3 = icmp ult <8 x i64> %i,
589 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
590 %x6 = trunc <8 x i64> %x5 to <8 x i32>
591 store <8 x i32> %x6, <8 x i32>* %res, align 1
592 ret void
593 }
594
595 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
596 ; ALL-LABEL: usat_trunc_qw_512_mem:
597 ; ALL: ## BB#0:
598 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
599 ; ALL-NEXT: retq
600 %x3 = icmp ult <8 x i64> %i,
601 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
602 %x6 = trunc <8 x i64> %x5 to <8 x i16>
603 store <8 x i16> %x6, <8 x i16>* %res, align 1
604 ret void
605 }
606
607 define <32 x i8> @usat_trunc_db_1024(<32 x i32> %i) {
608 ; KNL-LABEL: usat_trunc_db_1024:
609 ; KNL: ## BB#0:
610 ; KNL-NEXT: vpmovusdb %zmm0, %xmm0
611 ; KNL-NEXT: vpmovusdb %zmm1, %xmm1
612 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
613 ; KNL-NEXT: retq
614 ;
615 ; SKX-LABEL: usat_trunc_db_1024:
616 ; SKX: ## BB#0:
617 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
618 ; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
619 ; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
620 ; SKX-NEXT: vpmovdw %zmm0, %ymm0
621 ; SKX-NEXT: vpmovdw %zmm1, %ymm1
622 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
623 ; SKX-NEXT: vpmovwb %zmm0, %ymm0
624 ; SKX-NEXT: retq
625 %x3 = icmp ult <32 x i32> %i,
626 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32>
627 %x6 = trunc <32 x i32> %x5 to <32 x i8>
628 ret <32 x i8> %x6
629 }
630
631 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
632 ; KNL-LABEL: usat_trunc_db_1024_mem:
633 ; KNL: ## BB#0:
634 ; KNL-NEXT: vpmovusdb %zmm0, %xmm0
635 ; KNL-NEXT: vpmovusdb %zmm1, %xmm1
636 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
637 ; KNL-NEXT: vmovdqu %ymm0, (%rdi)
638 ; KNL-NEXT: retq
639 ;
640 ; SKX-LABEL: usat_trunc_db_1024_mem:
641 ; SKX: ## BB#0:
642 ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
643 ; SKX-NEXT: vpminud %zmm2, %zmm1, %zmm1
644 ; SKX-NEXT: vpminud %zmm2, %zmm0, %zmm0
645 ; SKX-NEXT: vpmovdw %zmm0, %ymm0
646 ; SKX-NEXT: vpmovdw %zmm1, %ymm1
647 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
648 ; SKX-NEXT: vpmovwb %zmm0, (%rdi)
649 ; SKX-NEXT: retq
650 %x3 = icmp ult <32 x i32> %i,
651 %x5 = select <32 x i1> %x3, <32 x i32> %i, <32 x i32>
652 %x6 = trunc <32 x i32> %x5 to <32 x i8>
653 store <32 x i8>%x6, <32 x i8>* %p, align 1
654 ret void
655 }
656
657 define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) {
658 ; ALL-LABEL: usat_trunc_dw_512:
659 ; ALL: ## BB#0:
660 ; ALL-NEXT: vpmovusdw %zmm0, %ymm0
661 ; ALL-NEXT: retq
662 %x3 = icmp ult <16 x i32> %i,
663 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32>
664 %x6 = trunc <16 x i32> %x5 to <16 x i16>
665 ret <16 x i16> %x6
666 }
667
668 define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
669 ; ALL-LABEL: usat_trunc_wb_128:
670 ; ALL: ## BB#0:
671 ; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
672 ; ALL-NEXT: retq
673 %x3 = icmp ult <8 x i16> %i,
674 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16>
675 %x6 = trunc <8 x i16> %x5 to <8 x i8>
676 ret <8 x i8>%x6
677 }
678
679 define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
680 ; KNL-LABEL: usat_trunc_qw_1024:
681 ; KNL: ## BB#0:
682 ; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
683 ; KNL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
684 ; KNL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
685 ; KNL-NEXT: vpmovqd %zmm0, %ymm0
686 ; KNL-NEXT: vpmovqd %zmm1, %ymm1
687 ; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
688 ; KNL-NEXT: vpmovdw %zmm0, %ymm0
689 ; KNL-NEXT: retq
690 ;
691 ; SKX-LABEL: usat_trunc_qw_1024:
692 ; SKX: ## BB#0:
693 ; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
694 ; SKX-NEXT: vpminuq %zmm2, %zmm1, %zmm1
695 ; SKX-NEXT: vpminuq %zmm2, %zmm0, %zmm0
696 ; SKX-NEXT: vpmovqd %zmm0, %ymm0
697 ; SKX-NEXT: vpmovqd %zmm1, %ymm1
698 ; SKX-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
699 ; SKX-NEXT: vpmovdw %zmm0, %ymm0
700 ; SKX-NEXT: retq
701 %x3 = icmp ult <16 x i64> %i,
702 %x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64>
703 %x6 = trunc <16 x i64> %x5 to <16 x i16>
704 ret <16 x i16> %x6
705 }
706