llvm.org GIT mirror llvm / 1e4c44b
[X86][SSE] Add OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1))) -> MOVMSK+CMP reduction combine git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375463 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 1 year, 1 month ago
2 changed file(s) with 30 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
3952639526 DAG.getBitcast(MVT::v4f32, N1)));
3952739527 }
3952839528
39529 // Match any-of bool scalar reductions into a bitcast/movmsk + cmp.
39530 // TODO: Support multiple SrcOps.
39531 if (VT == MVT::i1) {
39532 SmallVector SrcOps;
39533 if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) &&
39534 SrcOps.size() == 1) {
39535 SDLoc dl(N);
39536 unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
39537 EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
39538 SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
39539 if (Mask) {
39540 APInt AllBits = APInt::getNullValue(NumElts);
39541 return DAG.getSetCC(dl, MVT::i1, Mask,
39542 DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE);
39543 }
39544 }
39545 }
39546
3952939547 if (DCI.isBeforeLegalizeOps())
3953039548 return SDValue();
3953139549
45124512 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
45134513 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
45144514 ; SSE2-NEXT: pand %xmm0, %xmm1
4515 ; SSE2-NEXT: movmskpd %xmm1, %ecx
4516 ; SSE2-NEXT: xorl $3, %ecx
4517 ; SSE2-NEXT: movl %ecx, %eax
4518 ; SSE2-NEXT: shrb %al
4519 ; SSE2-NEXT: orb %cl, %al
4515 ; SSE2-NEXT: movmskpd %xmm1, %eax
4516 ; SSE2-NEXT: xorb $3, %al
4517 ; SSE2-NEXT: setne %al
45204518 ; SSE2-NEXT: retq
45214519 ;
45224520 ; AVX-LABEL: movmsk_or_v2i64:
45234521 ; AVX: # %bb.0:
45244522 ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
4525 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
4526 ; AVX-NEXT: xorl $3, %ecx
4527 ; AVX-NEXT: movl %ecx, %eax
4528 ; AVX-NEXT: shrb %al
4529 ; AVX-NEXT: orb %cl, %al
4523 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4524 ; AVX-NEXT: xorb $3, %al
4525 ; AVX-NEXT: setne %al
45304526 ; AVX-NEXT: retq
45314527 ;
45324528 ; KNL-LABEL: movmsk_or_v2i64:
46674663 ; SSE2-LABEL: movmsk_or_v2f64:
46684664 ; SSE2: # %bb.0:
46694665 ; SSE2-NEXT: cmplepd %xmm0, %xmm1
4670 ; SSE2-NEXT: movmskpd %xmm1, %ecx
4671 ; SSE2-NEXT: movl %ecx, %eax
4672 ; SSE2-NEXT: shrb %al
4673 ; SSE2-NEXT: orb %cl, %al
4666 ; SSE2-NEXT: movmskpd %xmm1, %eax
4667 ; SSE2-NEXT: testb %al, %al
4668 ; SSE2-NEXT: setne %al
46744669 ; SSE2-NEXT: retq
46754670 ;
46764671 ; AVX-LABEL: movmsk_or_v2f64:
46774672 ; AVX: # %bb.0:
46784673 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
4679 ; AVX-NEXT: vmovmskpd %xmm0, %ecx
4680 ; AVX-NEXT: movl %ecx, %eax
4681 ; AVX-NEXT: shrb %al
4682 ; AVX-NEXT: orb %cl, %al
4674 ; AVX-NEXT: vmovmskpd %xmm0, %eax
4675 ; AVX-NEXT: testb %al, %al
4676 ; AVX-NEXT: setne %al
46834677 ; AVX-NEXT: retq
46844678 ;
46854679 ; KNL-LABEL: movmsk_or_v2f64: