llvm.org GIT mirror llvm / ac2d13f
[PPC] Move the combine "a << (b % (sizeof(a) * 8)) -> (PPCshl a, b)" to the backend. NFC. Summary: Eli pointed out that it's unsafe to combine the shifts to ISD::SHL etc., because those are not defined for b > sizeof(a) * 8, even after some of the combiners run. However, PPCISD::SHL defines that behavior (as the instructions themselves). Move the combination to the backend. The tests in shift_mask.ll still pass. Reviewers: echristo, hfinkel, efriedma, iteratee Subscribers: nemanjai, llvm-commits Differential Revision: https://reviews.llvm.org/D33076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302937 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Shen 3 years ago
5 changed file(s) with 108 addition(s) and 58 deletion(s). Raw diff Collapse all Expand all
20622062 return false;
20632063 }
20642064
2065 // Return true if the instruction that performs a << b actually performs
2066 // a << (b % (sizeof(a) * 8)).
2067 virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
2068 assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
2069 "Expect a shift instruction");
2070 return false;
2071 }
2072
20732065 //===--------------------------------------------------------------------===//
20742066 // Runtime Library hooks
20752067 //
53125312 }
53135313 }
53145314
5315 // If the target supports masking y in (shl, y),
5316 // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
5317 if (TLI.isOperationLegal(ISD::SHL, VT) &&
5318 TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) {
5319 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5320 if (Mask->getZExtValue() == OpSizeInBits - 1) {
5321 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
5322 }
5323 }
5324 }
5325
53265315 ConstantSDNode *N1C = isConstOrConstSplat(N1);
53275316
53285317 // fold (shl c1, c2) -> c1<
55215510 EVT VT = N0.getValueType();
55225511 unsigned OpSizeInBits = VT.getScalarSizeInBits();
55235512
5524 // If the target supports masking y in (sra, y),
5525 // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
5526 if (TLI.isOperationLegal(ISD::SRA, VT) &&
5527 TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) {
5528 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5529 if (Mask->getZExtValue() == OpSizeInBits - 1) {
5530 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
5531 }
5532 }
5533 }
5534
55355513 // Arithmetic shifting an all-sign-bit value is a no-op.
55365514 // fold (sra 0, x) -> 0
55375515 // fold (sra -1, x) -> -1
56855663 SDValue N1 = N->getOperand(1);
56865664 EVT VT = N0.getValueType();
56875665 unsigned OpSizeInBits = VT.getScalarSizeInBits();
5688
5689 // If the target supports masking y in (srl, y),
5690 // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
5691 if (TLI.isOperationLegal(ISD::SRL, VT) &&
5692 TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) {
5693 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
5694 if (Mask->getZExtValue() == OpSizeInBits - 1) {
5695 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
5696 }
5697 }
5698 }
56995666
57005667 // fold vector ops
57015668 if (VT.isVector())
922922 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
923923
924924 // We have target-specific dag combine patterns for the following nodes:
925 setTargetDAGCombine(ISD::SHL);
926 setTargetDAGCombine(ISD::SRA);
927 setTargetDAGCombine(ISD::SRL);
925928 setTargetDAGCombine(ISD::SINT_TO_FP);
926929 setTargetDAGCombine(ISD::BUILD_VECTOR);
927930 if (Subtarget.hasFPCVT())
1131111314 SDLoc dl(N);
1131211315 switch (N->getOpcode()) {
1131311316 default: break;
11317 case ISD::SHL:
11318 return combineSHL(N, DCI);
11319 case ISD::SRA:
11320 return combineSRA(N, DCI);
11321 case ISD::SRL:
11322 return combineSRL(N, DCI);
1131411323 case PPCISD::SHL:
1131511324 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
1131611325 return N->getOperand(0);
1294312952 return Imm.isPosZero();
1294412953 }
1294512954 }
12955
12956 // For vector shift operation op, fold
12957 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
12958 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
12959 SelectionDAG &DAG) {
12960 SDValue N0 = N->getOperand(0);
12961 SDValue N1 = N->getOperand(1);
12962 EVT VT = N0.getValueType();
12963 unsigned OpSizeInBits = VT.getScalarSizeInBits();
12964 unsigned Opcode = N->getOpcode();
12965 unsigned TargetOpcode;
12966
12967 switch (Opcode) {
12968 default:
12969 llvm_unreachable("Unexpected shift operation");
12970 case ISD::SHL:
12971 TargetOpcode = PPCISD::SHL;
12972 break;
12973 case ISD::SRL:
12974 TargetOpcode = PPCISD::SRL;
12975 break;
12976 case ISD::SRA:
12977 TargetOpcode = PPCISD::SRA;
12978 break;
12979 }
12980
12981 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
12982 N1->getOpcode() == ISD::AND)
12983 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
12984 if (Mask->getZExtValue() == OpSizeInBits - 1)
12985 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
12986
12987 return SDValue();
12988 }
12989
12990 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
12991 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
12992 return Value;
12993
12994 return SDValue();
12995 }
12996
12997 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
12998 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
12999 return Value;
13000
13001 return SDValue();
13002 }
13003
13004 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
13005 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
13006 return Value;
13007
13008 return SDValue();
13009 }
116116 /// at function entry, used for PIC code.
117117 GlobalBaseReg,
118118
119 /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
120 /// shift amounts. These nodes are generated by the multi-precision shift
121 /// code.
119 /// These nodes represent PPC shifts.
120 ///
121 /// For scalar types, only the last `n + 1` bits of the shift amounts
122 /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
123 /// for exact behaviors.
124 ///
125 /// For vector types, only the last n bits are used. See vsld.
122126 SRL, SRA, SHL,
123127
124128 /// The combination of sra[wd]i and addze used to implemented signed
9981002 SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
9991003 SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
10001004 SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
1005 SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
1006 SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
1007 SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
10011008
10021009 /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
10031010 /// SETCC with integer subtraction when (1) there is a legal way of doing it
10161023 SDValue
10171024 combineElementTruncationToVectorTruncation(SDNode *N,
10181025 DAGCombinerInfo &DCI) const;
1019
1020 bool supportsModuloShift(ISD::NodeType Inst,
1021 EVT ReturnType) const override {
1022 assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
1023 "Expect a shift instruction");
1024 assert(isOperationLegal(Inst, ReturnType));
1025 return ReturnType.isVector();
1026 }
10271026 };
10281027
10291028 namespace PPC {
986986 (v8i16 (VSLH $vA, $vB))>;
987987 def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
988988 (v4i32 (VSLW $vA, $vB))>;
989 def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
990 (v16i8 (VSLB $vA, $vB))>;
991 def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
992 (v8i16 (VSLH $vA, $vB))>;
993 def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
994 (v4i32 (VSLW $vA, $vB))>;
989995
990996 def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
991997 (v16i8 (VSRB $vA, $vB))>;
993999 (v8i16 (VSRH $vA, $vB))>;
9941000 def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
9951001 (v4i32 (VSRW $vA, $vB))>;
1002 def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
1003 (v16i8 (VSRB $vA, $vB))>;
1004 def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
1005 (v8i16 (VSRH $vA, $vB))>;
1006 def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
1007 (v4i32 (VSRW $vA, $vB))>;
9961008
9971009 def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
9981010 (v16i8 (VSRAB $vA, $vB))>;
9991011 def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
10001012 (v8i16 (VSRAH $vA, $vB))>;
10011013 def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
1014 (v4i32 (VSRAW $vA, $vB))>;
1015 def : Pat<(v16i8 (PPCsra v16i8:$vA, v16i8:$vB)),
1016 (v16i8 (VSRAB $vA, $vB))>;
1017 def : Pat<(v8i16 (PPCsra v8i16:$vA, v8i16:$vB)),
1018 (v8i16 (VSRAH $vA, $vB))>;
1019 def : Pat<(v4i32 (PPCsra v4i32:$vA, v4i32:$vB)),
10021020 (v4i32 (VSRAW $vA, $vB))>;
10031021
10041022 // Float to integer and integer to float conversions
10711089 // Vector shifts
10721090 def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
10731091 def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1074 "vsld $vD, $vA, $vB", IIC_VecGeneral,
1075 [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
1092 "vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
10761093 def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1077 "vsrd $vD, $vA, $vB", IIC_VecGeneral,
1078 [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
1094 "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>;
10791095 def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
1080 "vsrad $vD, $vA, $vB", IIC_VecGeneral,
1081 [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
1096 "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>;
1097
1098 def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)),
1099 (v2i64 (VSLD $vA, $vB))>;
1100 def : Pat<(v2i64 (PPCshl v2i64:$vA, v2i64:$vB)),
1101 (v2i64 (VSLD $vA, $vB))>;
1102 def : Pat<(v2i64 (srl v2i64:$vA, v2i64:$vB)),
1103 (v2i64 (VSRD $vA, $vB))>;
1104 def : Pat<(v2i64 (PPCsrl v2i64:$vA, v2i64:$vB)),
1105 (v2i64 (VSRD $vA, $vB))>;
1106 def : Pat<(v2i64 (sra v2i64:$vA, v2i64:$vB)),
1107 (v2i64 (VSRAD $vA, $vB))>;
1108 def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)),
1109 (v2i64 (VSRAD $vA, $vB))>;
10821110
10831111 // Vector Integer Arithmetic Instructions
10841112 let isCommutable = 1 in {