llvm.org GIT mirror llvm / 7a8ed97
For PR9438: --- Merging r127350 into '.': D test/CodeGen/X86/2009-03-11-CoalescerBug.ll --- Merging r127351 into '.': A test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll U test/CodeGen/X86/fold-pcmpeqd-2.ll U lib/CodeGen/SimpleRegisterCoalescing.cpp git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_29@127384 91177308-0d34-0410-b5e6-96231b3b80d8 Bill Wendling 9 years ago
4 changed file(s) with 37 addition(s) and 91 deletion(s). Raw diff Collapse all Expand all
10371037 const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
10381038 unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
10391039 unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
1040 if (Length > Threshold &&
1041 std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
1042 mri_->use_nodbg_end()) * Threshold < Length) {
1040 if (Length > Threshold) {
10431041 // Before giving up coalescing, if definition of source is defined by
10441042 // trivial computation, try rematerializing it.
10451043 if (!CP.isFlipped() &&
+0
-85
test/CodeGen/X86/2009-03-11-CoalescerBug.ll less more
None ; RUN: llc < %s -mtriple=i386-apple-darwin9 -stats |& grep regcoalescing | grep commuting
1
2 @lookupTable5B = external global [64 x i32], align 32 ; <[64 x i32]*> [#uses=1]
3 @lookupTable3B = external global [16 x i32], align 32 ; <[16 x i32]*> [#uses=1]
4 @disparity0 = external global i32 ; [#uses=5]
5 @disparity1 = external global i32 ; [#uses=3]
6
7 define i32 @calc(i32 %theWord, i32 %k) nounwind {
8 entry:
9 %0 = lshr i32 %theWord, 3 ; [#uses=1]
10 %1 = and i32 %0, 31 ; [#uses=1]
11 %2 = shl i32 %k, 5 ; [#uses=1]
12 %3 = or i32 %1, %2 ; [#uses=1]
13 %4 = and i32 %theWord, 7 ; [#uses=1]
14 %5 = shl i32 %k, 3 ; [#uses=1]
15 %6 = or i32 %5, %4 ; [#uses=1]
16 %7 = getelementptr [64 x i32]* @lookupTable5B, i32 0, i32 %3 ; [#uses=1]
17 %8 = load i32* %7, align 4 ; [#uses=5]
18 %9 = getelementptr [16 x i32]* @lookupTable3B, i32 0, i32 %6 ; [#uses=1]
19 %10 = load i32* %9, align 4 ; [#uses=5]
20 %11 = and i32 %8, 65536 ; [#uses=1]
21 %12 = icmp eq i32 %11, 0 ; [#uses=1]
22 br i1 %12, label %bb1, label %bb
23
24 bb: ; preds = %entry
25 %13 = and i32 %8, 994 ; [#uses=1]
26 %14 = load i32* @disparity0, align 4 ; [#uses=2]
27 store i32 %14, i32* @disparity1, align 4
28 br label %bb8
29
30 bb1: ; preds = %entry
31 %15 = lshr i32 %8, 18 ; [#uses=1]
32 %16 = and i32 %15, 1 ; [#uses=1]
33 %17 = load i32* @disparity0, align 4 ; [#uses=4]
34 %18 = icmp eq i32 %16, %17 ; [#uses=1]
35 %not = select i1 %18, i32 0, i32 994 ; [#uses=1]
36 %.masked = and i32 %8, 994 ; [#uses=1]
37 %result.1 = xor i32 %not, %.masked ; [#uses=2]
38 %19 = and i32 %8, 524288 ; [#uses=1]
39 %20 = icmp eq i32 %19, 0 ; [#uses=1]
40 br i1 %20, label %bb7, label %bb6
41
42 bb6: ; preds = %bb1
43 %21 = xor i32 %17, 1 ; [#uses=2]
44 store i32 %21, i32* @disparity1, align 4
45 br label %bb8
46
47 bb7: ; preds = %bb1
48 store i32 %17, i32* @disparity1, align 4
49 br label %bb8
50
51 bb8: ; preds = %bb7, %bb6, %bb
52 %22 = phi i32 [ %17, %bb7 ], [ %21, %bb6 ], [ %14, %bb ] ; [#uses=4]
53 %result.0 = phi i32 [ %result.1, %bb7 ], [ %result.1, %bb6 ], [ %13, %bb ] ; [#uses=2]
54 %23 = and i32 %10, 65536 ; [#uses=1]
55 %24 = icmp eq i32 %23, 0 ; [#uses=1]
56 br i1 %24, label %bb10, label %bb9
57
58 bb9: ; preds = %bb8
59 %25 = and i32 %10, 29 ; [#uses=1]
60 %26 = or i32 %result.0, %25 ; [#uses=1]
61 store i32 %22, i32* @disparity0, align 4
62 ret i32 %26
63
64 bb10: ; preds = %bb8
65 %27 = lshr i32 %10, 18 ; [#uses=1]
66 %28 = and i32 %27, 1 ; [#uses=1]
67 %29 = icmp eq i32 %28, %22 ; [#uses=1]
68 %not13 = select i1 %29, i32 0, i32 29 ; [#uses=1]
69 %.masked20 = and i32 %10, 29 ; [#uses=1]
70 %.pn = xor i32 %not13, %.masked20 ; [#uses=1]
71 %result.3 = or i32 %.pn, %result.0 ; [#uses=2]
72 %30 = and i32 %10, 524288 ; [#uses=1]
73 %31 = icmp eq i32 %30, 0 ; [#uses=1]
74 br i1 %31, label %bb17, label %bb16
75
76 bb16: ; preds = %bb10
77 %32 = xor i32 %22, 1 ; [#uses=1]
78 store i32 %32, i32* @disparity0, align 4
79 ret i32 %result.3
80
81 bb17: ; preds = %bb10
82 store i32 %22, i32* @disparity0, align 4
83 ret i32 %result.3
84 }
0 ; RUN: llc -mcpu=yonah < %s
1 ; PR9438
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
3 target triple = "i386-unknown-freebsd9.0"
4
5 ; The 'call fastcc' ties down %ebx, %ecx, and %edx.
6 ; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
7 ; The coalescer can easily overallocate physical registers,
8 ; and register allocation fails.
9
10 declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
11
12 define i32 @cvtchar(i8* nocapture %sp) nounwind {
13 %temp.i = alloca [2 x i8], align 1
14 %tmp1 = load i8* %sp, align 1
15 %div = udiv i8 %tmp1, 10
16 %rem = urem i8 %div, 10
17 %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
18 store i8 %rem, i8* %arrayidx.i, align 1
19 %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
20 ret i32 undef
21 }
None ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
0 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
22
3 ; This testcase should need to spill the -1 value on x86-32,
3 ; This testcase should need to spill the -1 value on both x86-32 and x86-64,
44 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
55 ; should use a constant-pool load instead.
6
7 ; Constant pool all-ones vector:
8 ; CHECK: .long 4294967295
9 ; CHECK-NEXT: .long 4294967295
10 ; CHECK-NEXT: .long 4294967295
11 ; CHECK-NEXT: .long 4294967295
12
13 ; No pcmpeqd instructions, everybody uses the constant pool.
14 ; CHECK: program_1:
15 ; CHECK-NOT: pcmpeqd
616
717 %struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
818 %struct._cl_image_format_t = type <{ i32, i32, i32 }>
5666 %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1]
5767 %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
5868 %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1]
69 call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
5970 %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1]
6071 %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1]
6172 %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]