llvm.org GIT mirror llvm / 202be06
X86: If we have an instruction that sets a flag and a zero test on the input of that instruction try to eliminate the test. For example tzcntl %edi, %ebx testl %edi, %edi je .label can be rewritten into tzcntl %edi, %ebx jb .label A minor complication is that tzcnt sets CF instead of ZF when the input is zero, we have to rewrite users of the flags from ZF to CF. Currently we recognize patterns using lzcnt, tzcnt and popcnt. Differential Revision: http://reviews.llvm.org/D3454 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208788 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 5 years ago
2 changed file(s) with 139 addition(s) and 5 deletion(s). Raw diff Collapse all Expand all
35583558 }
35593559 }
35603560
3561 /// isUseDefConvertible - check whether the use can be converted
3562 /// to remove a comparison against zero.
3563 static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
3564 switch (MI->getOpcode()) {
3565 default: return X86::COND_INVALID;
3566 case X86::LZCNT16rr: case X86::LZCNT16rm:
3567 case X86::LZCNT32rr: case X86::LZCNT32rm:
3568 case X86::LZCNT64rr: case X86::LZCNT64rm:
3569 return X86::COND_B;
3570 case X86::POPCNT16rr:case X86::POPCNT16rm:
3571 case X86::POPCNT32rr:case X86::POPCNT32rm:
3572 case X86::POPCNT64rr:case X86::POPCNT64rm:
3573 return X86::COND_E;
3574 case X86::TZCNT16rr: case X86::TZCNT16rm:
3575 case X86::TZCNT32rr: case X86::TZCNT32rm:
3576 case X86::TZCNT64rr: case X86::TZCNT64rm:
3577 return X86::COND_B;
3578 }
3579 }
3580
35613581 /// optimizeCompareInstr - Check if there exists an earlier instruction that
35623582 /// operates on the same source operands and sets flags in the same way as
35633583 /// Compare; remove Compare if possible.
36243644 // If we are comparing against zero, check whether we can use MI to update
36253645 // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
36263646 bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
3627 if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
3628 !isDefConvertible(MI)))
3647 if (IsCmpZero && MI->getParent() != CmpInstr->getParent())
36293648 return false;
3649
3650 // If we have a use of the source register between the def and our compare
3651 // instruction we can eliminate the compare iff the use sets EFLAGS in the
3652 // right way.
3653 bool ShouldUpdateCC = false;
3654 X86::CondCode NewCC = X86::COND_INVALID;
3655 if (IsCmpZero && !isDefConvertible(MI)) {
3656 // Scan forward from the use until we hit the use we're looking for or the
3657 // compare instruction.
3658 for (MachineBasicBlock::iterator J = MI;; ++J) {
3659 // Do we have a convertible instruction?
3660 NewCC = isUseDefConvertible(J);
3661 if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
3662 J->getOperand(1).getReg() == SrcReg) {
3663 assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
3664 ShouldUpdateCC = true; // Update CC later on.
3665 // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
3666 // with the new def.
3667 MI = Def = J;
3668 break;
3669 }
3670
3671 if (J == I)
3672 return false;
3673 }
3674 }
36303675
36313676 // We are searching for an earlier instruction that can make CmpInstr
36323677 // redundant and that instruction will be saved in Sub.
37253770 // CF and OF are used, we can't perform this optimization.
37263771 return false;
37273772 }
3773
3774 // If we're updating the condition code check if we have to reverse the
3775 // condition.
3776 if (ShouldUpdateCC)
3777 switch (OldCC) {
3778 default:
3779 return false;
3780 case X86::COND_E:
3781 break;
3782 case X86::COND_NE:
3783 NewCC = GetOppositeBranchCondition(NewCC);
3784 break;
3785 }
37283786 } else if (IsSwapped) {
37293787 // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
37303788 // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
37313789 // We swap the condition code and synthesize the new opcode.
3732 X86::CondCode NewCC = getSwappedCondition(OldCC);
3790 NewCC = getSwappedCondition(OldCC);
37333791 if (NewCC == X86::COND_INVALID) return false;
3734
3792 }
3793
3794 if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
37353795 // Synthesize the new opcode.
37363796 bool HasMemoryOperand = Instr.hasOneMemOperand();
37373797 unsigned NewOpc;
None ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt | FileCheck %s
0 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt,+lzcnt | FileCheck %s
11 declare void @foo(i32)
2 declare void @foo32(i32)
23 declare void @foo64(i64)
34
45 ; CHECK-LABEL: neg:
188189 return:
189190 ret void
190191 }
192
193 ; CHECK-LABEL: testCTZ
194 ; CHECK: tzcntq
195 ; CHECK-NOT: test
196 ; CHECK: cmovaeq
197 declare i64 @llvm.cttz.i64(i64, i1)
198 define i64 @testCTZ(i64 %v) nounwind {
199 %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
200 %tobool = icmp eq i64 %v, 0
201 %cond = select i1 %tobool, i64 255, i64 %cnt
202 ret i64 %cond
203 }
204
205 ; CHECK-LABEL: testCTZ2
206 ; CHECK: tzcntl
207 ; CHECK-NEXT: jb
208 ; CHECK: jmp foo
209 declare i32 @llvm.cttz.i32(i32, i1)
210 define void @testCTZ2(i32 %v) nounwind {
211 %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
212 %cmp = icmp eq i32 %v, 0
213 br i1 %cmp, label %return, label %bb
214
215 bb:
216 tail call void @foo(i32 %cnt)
217 br label %return
218
219 return:
220 tail call void @foo32(i32 %cnt)
221 ret void
222 }
223
224 ; CHECK-LABEL: testCTZ3
225 ; CHECK: tzcntl
226 ; CHECK-NEXT: jae
227 ; CHECK: jmp foo
228 define void @testCTZ3(i32 %v) nounwind {
229 %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
230 %cmp = icmp ne i32 %v, 0
231 br i1 %cmp, label %return, label %bb
232
233 bb:
234 tail call void @foo(i32 %cnt)
235 br label %return
236
237 return:
238 tail call void @foo32(i32 %cnt)
239 ret void
240 }
241
242 ; CHECK-LABEL: testCLZ
243 ; CHECK: lzcntq
244 ; CHECK-NOT: test
245 ; CHECK: cmovaeq
246 declare i64 @llvm.ctlz.i64(i64, i1)
247 define i64 @testCLZ(i64 %v) nounwind {
248 %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
249 %tobool = icmp ne i64 %v, 0
250 %cond = select i1 %tobool, i64 %cnt, i64 255
251 ret i64 %cond
252 }
253
254 ; CHECK-LABEL: testPOPCNT
255 ; CHECK: popcntq
256 ; CHECK-NOT: test
257 ; CHECK: cmovneq
258 declare i64 @llvm.ctpop.i64(i64)
259 define i64 @testPOPCNT(i64 %v) nounwind {
260 %cnt = tail call i64 @llvm.ctpop.i64(i64 %v)
261 %tobool = icmp ne i64 %v, 0
262 %cond = select i1 %tobool, i64 %cnt, i64 255
263 ret i64 %cond
264 }