llvm.org GIT mirror llvm / f09c389
Merging r371088 and r371095: ------------------------------------------------------------------------ r371088 | spatel | 2019-09-05 18:58:18 +0200 (Thu, 05 Sep 2019) | 1 line [x86] add test for horizontal math bug (PR43225); NFC ------------------------------------------------------------------------ ------------------------------------------------------------------------ r371095 | spatel | 2019-09-05 19:28:17 +0200 (Thu, 05 Sep 2019) | 3 lines [x86] fix horizontal math bug exposed by improved demanded elements analysis (PR43225) https://bugs.llvm.org/show_bug.cgi?id=43225 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@371178 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 2 months ago
2 changed file(s) with 50 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
3359333593 }
3359433594
3359533595 /// Eliminate a redundant shuffle of a horizontal math op.
33596 static SDValue foldShuffleOfHorizOp(SDNode *N) {
33596 static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
3359733597 unsigned Opcode = N->getOpcode();
3359833598 if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
3359933599 if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
3362433624 HOp.getOperand(0) != HOp.getOperand(1))
3362533625 return SDValue();
3362633626
33627 // The shuffle that we are eliminating may have allowed the horizontal op to
33628 // have an undemanded (undefined) operand. Duplicate the other (defined)
33629 // operand to ensure that the results are defined across all lanes without the
33630 // shuffle.
33631 auto updateHOp = [](SDValue HorizOp, SelectionDAG &DAG) {
33632 SDValue X;
33633 if (HorizOp.getOperand(0).isUndef()) {
33634 assert(!HorizOp.getOperand(1).isUndef() && "Not expecting foldable h-op");
33635 X = HorizOp.getOperand(1);
33636 } else if (HorizOp.getOperand(1).isUndef()) {
33637 assert(!HorizOp.getOperand(0).isUndef() && "Not expecting foldable h-op");
33638 X = HorizOp.getOperand(0);
33639 } else {
33640 return HorizOp;
33641 }
33642 return DAG.getNode(HorizOp.getOpcode(), SDLoc(HorizOp),
33643 HorizOp.getValueType(), X, X);
33644 };
33645
3362733646 // When the operands of a horizontal math op are identical, the low half of
3362833647 // the result is the same as the high half. If a target shuffle is also
3362933648 // replicating low and high halves, we don't need the shuffle.
3363433653 assert((HOp.getValueType() == MVT::v2f64 ||
3363533654 HOp.getValueType() == MVT::v4f64) && HOp.getValueType() == VT &&
3363633655 "Unexpected type for h-op");
33637 return HOp;
33656 return updateHOp(HOp, DAG);
3363833657 }
3363933658 return SDValue();
3364033659 }
3364833667 (isTargetShuffleEquivalent(Mask, {0, 0}) ||
3364933668 isTargetShuffleEquivalent(Mask, {0, 1, 0, 1}) ||
3365033669 isTargetShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3})))
33651 return HOp;
33670 return updateHOp(HOp, DAG);
3365233671
3365333672 if (HOp.getValueSizeInBits() == 256 &&
3365433673 (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2}) ||
3365533674 isTargetShuffleEquivalent(Mask, {0, 1, 0, 1, 4, 5, 4, 5}) ||
3365633675 isTargetShuffleEquivalent(
3365733676 Mask, {0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11})))
33658 return HOp;
33677 return updateHOp(HOp, DAG);
3365933678
3366033679 return SDValue();
3366133680 }
3370933728 if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
3371033729 return AddSub;
3371133730
33712 if (SDValue HAddSub = foldShuffleOfHorizOp(N))
33731 if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG))
3371333732 return HAddSub;
3371433733 }
3371533734
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
2
3 ; Eliminating a shuffle means we have to replace an undef operand of a horizontal op.
4
5 define void @PR43225(<4 x double>* %p0, <4 x double>* %p1, <4 x double> %x, <4 x double> %y, <4 x double> %z) nounwind {
6 ; CHECK-LABEL: PR43225:
7 ; CHECK: # %bb.0:
8 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
9 ; CHECK-NEXT: vmovaps (%rsi), %ymm0
10 ; CHECK-NEXT: vhsubpd %ymm2, %ymm2, %ymm0
11 ; CHECK-NEXT: vmovapd %ymm0, (%rdi)
12 ; CHECK-NEXT: vzeroupper
13 ; CHECK-NEXT: retq
14 %t39 = load volatile <4 x double>, <4 x double>* %p0, align 32
15 %shuffle11 = shufflevector <4 x double> %t39, <4 x double> %x, <4 x i32>
16 %t40 = load volatile <4 x double>, <4 x double>* %p1, align 32
17 %t41 = tail call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %shuffle11, <4 x double> %t40)
18 %t42 = tail call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %z, <4 x double> %t41)
19 %shuffle12 = shufflevector <4 x double> %t42, <4 x double> undef, <4 x i32>
20 store volatile <4 x double> %shuffle12, <4 x double>* %p0, align 32
21 ret void
22 }
23
24 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>)
25 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>)