llvm.org GIT mirror llvm / 7b4bd48
[X86] Replace AND+IMM64 with SRL/SHL Emit SHRQ/SHLQ instead of ANDQ with a 64 bit constant mask if the result is unused and the mask has only higher/lower bits set. For example, with this patch LLVM emits shrq $41, %rdi je instead of movabsq $0xFFFFFE0000000000, %rcx testq %rcx, %rdi je This reduces number of instructions, code size and register pressure. The transformation is applied only for cases where the mask cannot be encoded as an immediate value within TESTQ instruction. Differential Revision: https://reviews.llvm.org/D28198 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291806 91177308-0d34-0410-b5e6-96231b3b80d8 Nikolai Bozhenov 3 years ago
4 changed file(s) with 111 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
1601716017 }
1601816018 }
1601916019
16020 // Sometimes flags can be set either with an AND or with an SRL/SHL
16021 // instruction. SRL/SHL variant should be preferred for masks longer than this
16022 // number of bits.
16023 const int ShiftToAndMaxMaskWidth = 32;
16024 const bool ZeroCheck = (X86CC == X86::COND_E || X86CC == X86::COND_NE);
16025
1602016026 // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
1602116027 // which may be the result of a CAST. We use the variable 'Op', which is the
1602216028 // non-casted variable when we check for possible users.
1606516071 // If we have a constant logical shift that's only used in a comparison
1606616072 // against zero turn it into an equivalent AND. This allows turning it into
1606716073 // a TEST instruction later.
16068 if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
16074 if (ZeroCheck && Op->hasOneUse() &&
1606916075 isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
1607016076 EVT VT = Op.getValueType();
1607116077 unsigned BitWidth = VT.getSizeInBits();
1607516081 APInt Mask = ArithOp.getOpcode() == ISD::SRL
1607616082 ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
1607716083 : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
16078 if (!Mask.isSignedIntN(32)) // Avoid large immediates.
16084 if (!Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
1607916085 break;
1608016086 Op = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
1608116087 DAG.getConstant(Mask, dl, VT));
1608416090
1608516091 case ISD::AND:
1608616092 // If the primary 'and' result isn't used, don't bother using X86ISD::AND,
16087 // because a TEST instruction will be better.
16093 // because a TEST instruction will be better. However, AND should be
16094 // preferred if the instruction can be combined into ANDN.
1608816095 if (!hasNonFlagsUse(Op)) {
1608916096 SDValue Op0 = ArithOp->getOperand(0);
1609016097 SDValue Op1 = ArithOp->getOperand(1);
1609116098 EVT VT = ArithOp.getValueType();
1609216099 bool isAndn = isBitwiseNot(Op0) || isBitwiseNot(Op1);
1609316100 bool isLegalAndnType = VT == MVT::i32 || VT == MVT::i64;
16094
16095 // But if we can combine this into an ANDN operation, then create an AND
16096 // now and allow it to be pattern matched into an ANDN.
16097 if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType)
16101 bool isProperAndn = isAndn && isLegalAndnType && Subtarget.hasBMI();
16102
16103 // If we cannot select an ANDN instruction, check if we can replace
16104 // AND+IMM64 with a shift before giving up. This is possible for masks
16105 // like 0xFF000000 or 0x00FFFFFF and if we care only about the zero flag.
16106 if (!isProperAndn) {
16107 if (!ZeroCheck)
16108 break;
16109
16110 assert(!isa(Op0) && "AND node isn't canonicalized");
16111 auto *CN = dyn_cast(Op1);
16112 if (!CN)
16113 break;
16114
16115 const APInt &Mask = CN->getAPIntValue();
16116 if (Mask.isSignedIntN(ShiftToAndMaxMaskWidth))
16117 break; // Prefer TEST instruction.
16118
16119 unsigned BitWidth = Mask.getBitWidth();
16120 unsigned LeadingOnes = Mask.countLeadingOnes();
16121 unsigned TrailingZeros = Mask.countTrailingZeros();
16122
16123 if (LeadingOnes + TrailingZeros == BitWidth) {
16124 assert(TrailingZeros < VT.getSizeInBits() &&
16125 "Shift amount should be less than the type width");
16126 MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
16127 SDValue ShAmt = DAG.getConstant(TrailingZeros, dl, ShTy);
16128 Op = DAG.getNode(ISD::SRL, dl, VT, Op0, ShAmt);
16129 break;
16130 }
16131
16132 unsigned LeadingZeros = Mask.countLeadingZeros();
16133 unsigned TrailingOnes = Mask.countTrailingOnes();
16134
16135 if (LeadingZeros + TrailingOnes == BitWidth) {
16136 assert(LeadingZeros < VT.getSizeInBits() &&
16137 "Shift amount should be less than the type width");
16138 MVT ShTy = getScalarShiftAmountTy(DAG.getDataLayout(), VT);
16139 SDValue ShAmt = DAG.getConstant(LeadingZeros, dl, ShTy);
16140 Op = DAG.getNode(ISD::SHL, dl, VT, Op0, ShAmt);
16141 break;
16142 }
16143
1609816144 break;
16145 }
1609916146 }
1610016147 LLVM_FALLTHROUGH;
1610116148 case ISD::SUB:
1611516162 case ISD::XOR: Opcode = X86ISD::XOR; break;
1611616163 case ISD::AND: Opcode = X86ISD::AND; break;
1611716164 case ISD::OR: {
16118 if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
16165 if (!NeedTruncation && ZeroCheck) {
1611916166 if (SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG))
1612016167 return EFLAGS;
1612116168 }
88 ; CHECK: # BB#0:
99 ; CHECK-NEXT: movq %rdi, %rax
1010 ; CHECK-NEXT: orq %rsi, %rax
11 ; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
12 ; CHECK-NEXT: testq %rcx, %rax
11 ; CHECK-NEXT: shrq $32, %rax
1312 ; CHECK-NEXT: je .LBB0_1
1413 ; CHECK-NEXT: # BB#2:
1514 ; CHECK-NEXT: movq %rdi, %rax
3130 ; CHECK: # BB#0:
3231 ; CHECK-NEXT: movq %rdi, %rax
3332 ; CHECK-NEXT: orq %rsi, %rax
34 ; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
35 ; CHECK-NEXT: testq %rcx, %rax
33 ; CHECK-NEXT: shrq $32, %rax
3634 ; CHECK-NEXT: je .LBB1_1
3735 ; CHECK-NEXT: # BB#2:
3836 ; CHECK-NEXT: movq %rdi, %rax
5654 ; CHECK: # BB#0:
5755 ; CHECK-NEXT: movq %rdi, %rax
5856 ; CHECK-NEXT: orq %rsi, %rax
59 ; CHECK-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
60 ; CHECK-NEXT: testq %rcx, %rax
57 ; CHECK-NEXT: shrq $32, %rax
6158 ; CHECK-NEXT: je .LBB2_1
6259 ; CHECK-NEXT: # BB#2:
6360 ; CHECK-NEXT: movq %rdi, %rax
2121 define i64 @div64(i64 %a, i64 %b) {
2222 entry:
2323 ; CHECK-LABEL: div64:
24 ; CHECK-DAG: movabsq $-4294967296, [[REGMSK:%[a-z]+]]
25 ; CHECK-DAG: orq %{{.*}}, [[REG:%[a-z]+]]
26 ; CHECK: testq [[REGMSK]], [[REG]]
24 ; CHECK: orq %{{.*}}, [[REG:%[a-z]+]]
25 ; CHECK: shrq $32, [[REG]]
2726 ; CHECK: divl
2827 ;
2928 %div = sdiv i64 %a, %b
280280 ; CHECK: setne
281281 ; CHECK: testl
282282 ; CHECK: setne
283 }
283 }
284
285 define i32 @test21(i64 %val) {
286 %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
287 %cmp = icmp ne i64 %and, 0
288 %ret = zext i1 %cmp to i32
289 ret i32 %ret
290
291 ; CHECK-LABEL: test21
292 ; CHECK: shrq $41, %rdi
293 ; CHECK-NOT: test
294 ; CHECK: setne %al
295 ; CHECK: retq
296 }
297
298 ; AND-to-SHR transformation is enabled for eq/ne condition codes only.
299 define i32 @test22(i64 %val) {
300 %and = and i64 %val, -2199023255552 ; 0xFFFFFE0000000000
301 %cmp = icmp ult i64 %and, 0
302 %ret = zext i1 %cmp to i32
303 ret i32 %ret
304
305 ; CHECK-LABEL: test22
306 ; CHECK-NOT: shrq $41
307 ; CHECK: retq
308 }
309
310 define i32 @test23(i64 %val) {
311 %and = and i64 %val, -1048576 ; 0xFFFFFFFFFFF00000
312 %cmp = icmp ne i64 %and, 0
313 %ret = zext i1 %cmp to i32
314 ret i32 %ret
315
316 ; CHECK-LABEL: test23
317 ; CHECK: testq $-1048576, %rdi
318 ; CHECK: setne %al
319 ; CHECK: retq
320 }
321
322 define i32 @test24(i64 %val) {
323 %and = and i64 %val, 281474976710655 ; 0x0000FFFFFFFFFFFF
324 %cmp = icmp ne i64 %and, 0
325 %ret = zext i1 %cmp to i32
326 ret i32 %ret
327
328 ; CHECK-LABEL: test24
329 ; CHECK: shlq $16, %rdi
330 ; CHECK-NOT: test
331 ; CHECK: setne %al
332 ; CHECK: retq
333 }