llvm.org GIT mirror llvm / b26c772
Kill and collapse outstanding DomainValues. DomainValues that are only used by "don't care" instructions are now collapsed to the first possible execution domain after all basic blocks have been processed. This typically means the PS domain on x86. For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are completely collapsed to the PS domain instead of containing a mix of execution domains created by isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144037 91177308-0d34-0410-b5e6-96231b3b80d8 Jakob Stoklund Olesen 8 years ago
8 changed file(s) with 58 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
509509 leaveBasicBlock(MBB);
510510 }
511511
512 // Clear the LiveOuts vectors. Should we also collapse any remaining
513 // DomainValues?
514 for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
515 i != e; ++i)
516 delete[] i->second;
512 // Clear the LiveOuts vectors and collapse any remaining DomainValues.
513 for (ReversePostOrderTraversal::rpo_iterator
514 MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
515 LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
516 if (FI == LiveOuts.end())
517 continue;
518 assert(FI->second && "Null entry");
519 // The DomainValue is collapsed when the last reference is killed.
520 LiveRegs = FI->second;
521 for (unsigned i = 0, e = NumRegs; i != e; ++i)
522 if (LiveRegs[i])
523 Kill(i);
524 delete[] LiveRegs;
525 }
517526 LiveOuts.clear();
518527 Avail.clear();
519528 Allocator.DestroyAll();
314314
315315
316316 define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
317 ; CHECK: test_x86_sse2_movnt_dq
317318 ; CHECK: movl
318319 ; CHECK: vmovntdq
319 call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
320 ; add operation forces the execution domain.
321 %a2 = add <2 x i64> %a1,
322 call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
320323 ret void
321324 }
322325 declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
323326
324327
325328 define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
329 ; CHECK test_x86_sse2_movnt_pd
326330 ; CHECK: movl
327331 ; CHECK: vmovntpd
328 call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
332 ; fadd operation forces the execution domain.
333 %a2 = fadd <2 x double> %a1,
334 call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
329335 ret void
330336 }
331337 declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
332338
333339
334340 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
341 ; CHECK: test_x86_sse2_mul_sd
335342 ; CHECK: vmulsd
336343 %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
337344 ret <2 x double> %res
748755
749756
750757 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
758 ; CHECK: test_x86_sse2_storel_dq
751759 ; CHECK: movl
752760 ; CHECK: vmovq
753761 call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
757765
758766
759767 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
768 ; CHECK: test_x86_sse2_storeu_dq
760769 ; CHECK: movl
761770 ; CHECK: vmovdqu
762771 call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
766775
767776
768777 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
778 ; CHECK: test_x86_sse2_storeu_pd
769779 ; CHECK: movl
770780 ; CHECK: vmovupd
771 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
781 %a2 = fadd <2 x double> %a1,
782 call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
772783 ret void
773784 }
774785 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
775786
776787
777788 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
789 ; CHECK: test_x86_sse2_sub_sd
778790 ; CHECK: vsubsd
779791 %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
780792 ret <2 x double> %res
164164 ; CHECK: vpandn %xmm
165165 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
166166 entry:
167 %y = xor <2 x i64> %a,
167 ; Force the execution domain with an add.
168 %a2 = add <2 x i64> %a,
169 %y = xor <2 x i64> %a2,
168170 %x = and <2 x i64> %a, %y
169171 ret <2 x i64> %x
170172 }
172174 ; CHECK: vpand %xmm
173175 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
174176 entry:
175 %x = and <2 x i64> %a, %b
177 ; Force the execution domain with an add.
178 %a2 = add <2 x i64> %a,
179 %x = and <2 x i64> %a2, %b
176180 ret <2 x i64> %x
177181 }
178182
22 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
33 ; CHECK: movntps
44 %cast = bitcast i8* %B to <4 x float>*
5 store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
5 %A2 = fadd <4 x float> %A,
6 store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
67 ; CHECK: movntdq
78 %cast1 = bitcast i8* %B to <2 x i64>*
8 store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
9 %E2 = add <2 x i64> %E,
10 store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
911 ; CHECK: movntpd
1012 %cast2 = bitcast i8* %B to <2 x double>*
11 store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
13 %C2 = fadd <2 x double> %C,
14 store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
1215 ; CHECK: movnti
1316 %cast3 = bitcast i8* %B to i32*
1417 store i32 %D, i32* %cast3, align 16, !nontemporal !0
0 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
11 ; CHECK-NOT: movapd
22 ; CHECK: movaps
3 ; CHECK-NOT: movaps
4 ; CHECK: movapd
3 ; CHECK-NOT: movapd
4 ; CHECK: movaps
55 ; CHECK-NOT: movap
66
77 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
2525 ret void
2626 }
2727
28 ; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
29 ; mixed domains here.
28 ; Without forcing instructions, fall back to the preferred PS domain.
3029 ; CHECK: vsel_i64
3130 ; CHECK: xorps
32 ; CHECK: pand
31 ; CHECK: andps
3332 ; CHECK: andnps
3433 ; CHECK: orps
3534 ; CHECK: ret
4241 ret void
4342 }
4443
45 ; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
46 ; mixed domains here.
44 ; Without forcing instructions, fall back to the preferred PS domain.
4745 ; CHECK: vsel_double
4846 ; CHECK: xorps
49 ; CHECK: pand
47 ; CHECK: andps
5048 ; CHECK: andnps
5149 ; CHECK: orps
5250 ; CHECK: ret
53
5451
5552 define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
5653 %A = load <4 x double>* %v1
143143 %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
144144 ret <2 x double> %tmp7
145145 ; CHECK: test11:
146 ; CHECK: movapd 4(%esp), %xmm0
146 ; CHECK: movaps 4(%esp), %xmm0
147147 }
148148
149149 define void @test12() nounwind {
None ; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
1 ; RUN: grep movq %t | count 1
2 ; RUN: grep pshufd %t | count 1
3 ; RUN: grep movupd %t | count 1
4 ; RUN: grep pshufhw %t | count 1
0 ; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
51
2 ; CHECK: test_v4sf
3 ; CHECK: movq 8(%esp)
4 ; CHECK: pshufd $80
65 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
76 %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
87 %tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
1211 ret void
1312 }
1413
14 ; CHECK: test_v2sd
15 ; CHECK: movups 8(%esp)
16 ; CHECK: movaps
1517 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
1618 %tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
1719 %tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1]
1921 ret void
2022 }
2123
24 ; CHECK: test_v8i16
25 ; CHECK: pshufhw $-58
26 ; CHECK: movdqa
2227 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
2328 %tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1]
2429 %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8]