llvm.org GIT mirror llvm / e54a2b3
[DAGCombiner] visitRotate patch to optimize pair of ROTR/ROTL instructions into one with combined shift operand. For two ROTR operations with shifts C1, C2; combined shift operand will be (C1 + C2) % bitsize. Differential revision: https://reviews.llvm.org/D12833 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307179 91177308-0d34-0410-b5e6-96231b3b80d8 Andrew Zhogin 2 years ago
3 changed file(s) with 24 addition(s) and 10 deletion(s). Raw diff Collapse all Expand all
52785278 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
52795279 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
52805280 }
5281
5282 unsigned NextOp = N0.getOpcode();
5283 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
5284 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR)
5285 if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1))
5286 if (SDNode *C2 =
5287 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
5288 bool SameSide = (N->getOpcode() == NextOp);
5289 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
5290 if (SDValue CombinedShift =
5291 DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) {
5292 unsigned Bitsize = VT.getScalarSizeInBits();
5293 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT);
5294 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
5295 ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode());
5296 return DAG.getNode(
5297 N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm);
5298 }
5299 }
52815300 return SDValue();
52825301 }
52835302
22 ; rotr (rotr x, 4), 6 -> rotr x, 10 -> ror r0, r0, #10
33 define i32 @test1(i32 %x) nounwind readnone {
44 ; CHECK-LABEL: test1:
5 ; CHECK: ror r0, r0, #4
6 ; CHECK: ror r0, r0, #6
5 ; CHECK: ror r0, r0, #10
76 ; CHECK: bx lr
87 entry:
98 %high_part.i = shl i32 %x, 28
1817 ; the same vector test
1918 define <2 x i32> @test2(<2 x i32> %x) nounwind readnone {
2019 ; CHECK-LABEL: test2:
21 ; CHECK: ror r0, r0, #4
22 ; CHECK: ror r1, r1, #4
23 ; CHECK: ror r0, r0, #6
24 ; CHECK: ror r1, r1, #6
20 ; CHECK: ror r0, r0, #10
21 ; CHECK: ror r1, r1, #10
2522 ; CHECK: bx lr
2623 entry:
2724 %high_part.i = shl <2 x i32> %x,
3434 define <4 x i32> @combine_vec_rot_rot_splat(<4 x i32> %x) {
3535 ; XOP-LABEL: combine_vec_rot_rot_splat:
3636 ; XOP: # BB#0:
37 ; XOP-NEXT: vprotd $29, %xmm0, %xmm0
38 ; XOP-NEXT: vprotd $10, %xmm0, %xmm0
37 ; XOP-NEXT: vprotd $7, %xmm0, %xmm0
3938 ; XOP-NEXT: retq
4039 ;
4140 ; AVX512-LABEL: combine_vec_rot_rot_splat:
5958 define <4 x i32> @combine_vec_rot_rot_splat_zero(<4 x i32> %x) {
6059 ; XOP-LABEL: combine_vec_rot_rot_splat_zero:
6160 ; XOP: # BB#0:
62 ; XOP-NEXT: vprotd $31, %xmm0, %xmm0
63 ; XOP-NEXT: vprotd $1, %xmm0, %xmm0
61 ; XOP-NEXT: vprotd $0, %xmm0, %xmm0
6462 ; XOP-NEXT: retq
6563 ;
6664 ; AVX512-LABEL: combine_vec_rot_rot_splat_zero: