llvm.org GIT mirror llvm / dcfedc3
Revert r291092 because it introduces a crash. See PR31589 for details. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291478 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Kuperstein 3 years ago
2 changed file(s) with 0 addition(s) and 170 deletion(s). Raw diff Collapse all Expand all
3108031080 return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
3108131081 }
3108231082
31083 /// Check if truncation with saturation form type \p SrcVT to \p DstVT
31084 /// is valid for the given \p Subtarget.
31085 static bool
31086 isSATValidOnSubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
31087 if (!Subtarget.hasAVX512())
31088 return false;
31089 EVT SrcElVT = SrcVT.getScalarType();
31090 EVT DstElVT = DstVT.getScalarType();
31091 if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
31092 return false;
31093 if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
31094 return false;
31095 if (SrcVT.is512BitVector() || Subtarget.hasVLX())
31096 return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
31097 return false;
31098 }
31099
31100 /// Detect a pattern of truncation with saturation:
31101 /// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
31102 /// Return the source value to be truncated or SDValue() if the pattern was not
31103 /// matched or the unsupported on the current target.
31104 static SDValue
31105 detectUSatPattern(SDValue In, EVT VT, const X86Subtarget &Subtarget) {
31106 if (In.getOpcode() != ISD::UMIN)
31107 return SDValue();
31108
31109 EVT InVT = In.getValueType();
31110 // FIXME: Scalar type may be supported if we move it to vector register.
31111 if (!InVT.isVector() || !InVT.isSimple())
31112 return SDValue();
31113
31114 if (!isSATValidOnSubtarget(InVT, VT, Subtarget))
31115 return SDValue();
31116
31117 //Saturation with truncation. We truncate from InVT to VT.
31118 assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() &&
31119 "Unexpected types for truncate operation");
31120
31121 SDValue SrcVal;
31122 APInt C;
31123 if (ISD::isConstantSplatVector(In.getOperand(0).getNode(), C))
31124 SrcVal = In.getOperand(1);
31125 else if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C))
31126 SrcVal = In.getOperand(0);
31127 else
31128 return SDValue();
31129
31130 // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
31131 // the element size of the destination type.
31132 return (C == ((uint64_t)1 << VT.getScalarSizeInBits()) - 1) ?
31133 SrcVal : SDValue();
31134 }
31135
3113631083 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
3113731084 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
3113831085 /// X86ISD::AVG instruction.
3169831645 return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
3169931646 St->getPointerInfo(), St->getAlignment(),
3170031647 St->getMemOperand()->getFlags());
31701
31702 if (SDValue Val =
31703 detectUSatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
31704 return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
31705 dl, Val, St->getBasePtr(),
31706 St->getMemoryVT(), St->getMemOperand(), DAG);
3170731648
3170831649 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3170931650 unsigned NumElems = VT.getVectorNumElements();
3232432265 // Try to detect AVG pattern first.
3232532266 if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
3232632267 return Avg;
32327
32328 // Try the truncation with unsigned saturation.
32329 if (SDValue Val = detectUSatPattern(Src, VT, Subtarget))
32330 return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Val);
3233132268
3233232269 // The bitcast source is a direct mmx result.
3233332270 // Detect bitcasts between i32 to x86mmx
499499 store <8 x i8> %x, <8 x i8>* %res
500500 ret void
501501 }
502
503
504 define void @usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) {
505 ; KNL-LABEL: usat_trunc_wb_256_mem:
506 ; KNL: ## BB#0:
507 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
508 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
509 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
510 ; KNL-NEXT: vmovdqu %xmm0, (%rdi)
511 ; KNL-NEXT: retq
512 ;
513 ; SKX-LABEL: usat_trunc_wb_256_mem:
514 ; SKX: ## BB#0:
515 ; SKX-NEXT: vpmovuswb %ymm0, (%rdi)
516 ; SKX-NEXT: retq
517 %x3 = icmp ult <16 x i16> %i,
518 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16>
519 %x6 = trunc <16 x i16> %x5 to <16 x i8>
520 store <16 x i8> %x6, <16 x i8>* %res, align 1
521 ret void
522 }
523
524 define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
525 ; KNL-LABEL: usat_trunc_wb_256:
526 ; KNL: ## BB#0:
527 ; KNL-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
528 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
529 ; KNL-NEXT: vpmovdb %zmm0, %xmm0
530 ; KNL-NEXT: retq
531 ;
532 ; SKX-LABEL: usat_trunc_wb_256:
533 ; SKX: ## BB#0:
534 ; SKX-NEXT: vpmovuswb %ymm0, %xmm0
535 ; SKX-NEXT: retq
536 %x3 = icmp ult <16 x i16> %i,
537 %x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16>
538 %x6 = trunc <16 x i16> %x5 to <16 x i8>
539 ret <16 x i8> %x6
540 }
541
542 define void @usat_trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) {
543 ; KNL-LABEL: usat_trunc_wb_128_mem:
544 ; KNL: ## BB#0:
545 ; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
546 ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
547 ; KNL-NEXT: vmovq %xmm0, (%rdi)
548 ; KNL-NEXT: retq
549 ;
550 ; SKX-LABEL: usat_trunc_wb_128_mem:
551 ; SKX: ## BB#0:
552 ; SKX-NEXT: vpmovuswb %xmm0, (%rdi)
553 ; SKX-NEXT: retq
554 %x3 = icmp ult <8 x i16> %i,
555 %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16>
556 %x6 = trunc <8 x i16> %x5 to <8 x i8>
557 store <8 x i8> %x6, <8 x i8>* %res, align 1
558 ret void
559 }
560
561 define void @usat_trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) {
562 ; ALL-LABEL: usat_trunc_db_512_mem:
563 ; ALL: ## BB#0:
564 ; ALL-NEXT: vpmovusdb %zmm0, (%rdi)
565 ; ALL-NEXT: retq
566 %x3 = icmp ult <16 x i32> %i,
567 %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32>
568 %x6 = trunc <16 x i32> %x5 to <16 x i8>
569 store <16 x i8> %x6, <16 x i8>* %res, align 1
570 ret void
571 }
572
573 define void @usat_trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) {
574 ; ALL-LABEL: usat_trunc_qb_512_mem:
575 ; ALL: ## BB#0:
576 ; ALL-NEXT: vpmovusqb %zmm0, (%rdi)
577 ; ALL-NEXT: retq
578 %x3 = icmp ult <8 x i64> %i,
579 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
580 %x6 = trunc <8 x i64> %x5 to <8 x i8>
581 store <8 x i8> %x6, <8 x i8>* %res, align 1
582 ret void
583 }
584
585 define void @usat_trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) {
586 ; ALL-LABEL: usat_trunc_qd_512_mem:
587 ; ALL: ## BB#0:
588 ; ALL-NEXT: vpmovusqd %zmm0, (%rdi)
589 ; ALL-NEXT: retq
590 %x3 = icmp ult <8 x i64> %i,
591 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
592 %x6 = trunc <8 x i64> %x5 to <8 x i32>
593 store <8 x i32> %x6, <8 x i32>* %res, align 1
594 ret void
595 }
596
597 define void @usat_trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) {
598 ; ALL-LABEL: usat_trunc_qw_512_mem:
599 ; ALL: ## BB#0:
600 ; ALL-NEXT: vpmovusqw %zmm0, (%rdi)
601 ; ALL-NEXT: retq
602 %x3 = icmp ult <8 x i64> %i,
603 %x5 = select <8 x i1> %x3, <8 x i64> %i, <8 x i64>
604 %x6 = trunc <8 x i64> %x5 to <8 x i16>
605 store <8 x i16> %x6, <8 x i16>* %res, align 1
606 ret void
607 }
608