llvm.org GIT mirror llvm / 638e97f
Teach the DAGCombiner how to fold 'vselect' dag nodes according to the following two rules: 1) fold (vselect (build_vector AllOnes), A, B) -> A 2) fold (vselect (build_vector AllZeros), A, B) -> B git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198777 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 6 years ago
5 changed file(s) with 77 addition(s) and 28 deletion(s). Raw diff Collapse all Expand all
44014401 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
44024402 }
44034403
4404 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
4405 if (ISD::isBuildVectorAllOnes(N0.getNode()))
4406 return N1;
4407 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
4408 if (ISD::isBuildVectorAllZeros(N0.getNode()))
4409 return N2;
4410
44044411 return SDValue();
44054412 }
44064413
3939
4040 define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
4141 ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
42 %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
43 %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
42 %tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
43 %tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
4444 %tmp3 = or <8 x i8> %tmp1, %tmp2
4545 ret <8 x i8> %tmp3
4646 }
4747
4848 define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
49 ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
50 %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
51 %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0 >
49 ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
50 %tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
51 %tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
5252 %tmp3 = or <16 x i8> %tmp1, %tmp2
5353 ret <16 x i8> %tmp3
5454 }
443443 %tmp2 = or <2 x i64> %a, %tmp1
444444 ret <2 x i64> %tmp2
445445 }
446
446447 define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
447448 ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
448 %tmp1 = and <2 x i32> %a, < i32 -1, i32 -1 >
449 %tmp2 = and <2 x i32> %b, < i32 0, i32 0 >
449 %tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
450 %tmp2 = and <2 x i32> %b, < i32 0, i32 -1 >
450451 %tmp3 = or <2 x i32> %tmp1, %tmp2
451452 ret <2 x i32> %tmp3
452453 }
454455
455456 define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
456457 ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
457 %tmp1 = and <4 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1 >
458 %tmp2 = and <4 x i16> %b, < i16 0, i16 0,i16 0, i16 0 >
458 %tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
459 %tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
459460 %tmp3 = or <4 x i16> %tmp1, %tmp2
460461 ret <4 x i16> %tmp3
461462 }
462463
463464 define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
464465 ;CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
465 %tmp1 = and <1 x i64> %a, < i64 -1 >
466 %tmp2 = and <1 x i64> %b, < i64 0 >
466 %tmp1 = and <1 x i64> %a, < i64 -16 >
467 %tmp2 = and <1 x i64> %b, < i64 15 >
467468 %tmp3 = or <1 x i64> %tmp1, %tmp2
468469 ret <1 x i64> %tmp3
469470 }
470471
471472 define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
472473 ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
473 %tmp1 = and <4 x i32> %a, < i32 -1, i32 -1, i32 -1, i32 -1 >
474 %tmp2 = and <4 x i32> %b, < i32 0, i32 0, i32 0, i32 0 >
474 %tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
475 %tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
475476 %tmp3 = or <4 x i32> %tmp1, %tmp2
476477 ret <4 x i32> %tmp3
477478 }
478479
479480 define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
480481 ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
481 %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 -1,i16 -1, i16 -1, i16 -1, i16 -1,i16 -1 >
482 %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0 >
482 %tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
483 %tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
483484 %tmp3 = or <8 x i16> %tmp1, %tmp2
484485 ret <8 x i16> %tmp3
485486 }
486487
487488 define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
488489 ;CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
489 %tmp1 = and <2 x i64> %a, < i64 -1, i64 -1 >
490 %tmp2 = and <2 x i64> %b, < i64 0, i64 0 >
490 %tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
491 %tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
491492 %tmp3 = or <2 x i64> %tmp1, %tmp2
492493 ret <2 x i64> %tmp3
493494 }
None ; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
0 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
11
2 target triple = "x86_64-unknown-linux-gnu"
3
4 ; Make sure that we don't crash when legalizng vselect and vsetcc and that
2 ; Make sure that we don't crash when legalizing vselect and vsetcc and that
53 ; we are able to generate vector blend instructions.
64
7 ; CHECK: simple_widen
8 ; CHECK: blend
5 ; CHECK-LABEL: simple_widen
6 ; CHECK-NOT: blend
97 ; CHECK: ret
108 define void @simple_widen() {
119 entry:
1412 ret void
1513 }
1614
17 ; CHECK: complex_inreg_work
15 ; CHECK-LABEL: complex_inreg_work
1816 ; CHECK: blend
1917 ; CHECK: ret
2018
2624 ret void
2725 }
2826
29 ; CHECK: zero_test
30 ; CHECK: blend
27 ; CHECK-LABEL: zero_test
28 ; CHECK: xorps %xmm0, %xmm0
3129 ; CHECK: ret
3230
3331 define void @zero_test() {
3735 ret void
3836 }
3937
40 ; CHECK: full_test
38 ; CHECK-LABEL: full_test
4139 ; CHECK: blend
4240 ; CHECK: ret
4341
0 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
11
22 ; CHECK-LABEL: test
3 ; CHECK: vmovdqu32
3 ; CHECK: vpxord
44 ; CHECK: ret
55 define <16 x i32> @test() {
66 entry:
129129 ; CHECK-NOT: psraw
130130 ; CHECK: ret
131131
132 ; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
132133
134 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
135 %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b
136 ret <4 x float> %1
137 }
138 ; CHECK-LABEL: test14
139 ; CHECK-NOT: psllw
140 ; CHECK-NOT: psraw
141 ; CHECK-NOT: pcmpeq
142 ; CHECK: ret
143
144 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
145 %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b
146 ret <8 x i16> %1
147 }
148 ; CHECK-LABEL: test15
149 ; CHECK-NOT: psllw
150 ; CHECK-NOT: psraw
151 ; CHECK-NOT: pcmpeq
152 ; CHECK: ret
153
154 ; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
155
156 define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
157 %1 = select <4 x i1> , <4 x float> %a, <4 x float> %b
158 ret <4 x float> %1
159 }
160 ; CHECK-LABEL: test16
161 ; CHECK-NOT: psllw
162 ; CHECK-NOT: psraw
163 ; CHECK-NOT: xorps
164 ; CHECK: ret
165
166 define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
167 %1 = select <8 x i1> , <8 x i16> %a, <8 x i16> %b
168 ret <8 x i16> %1
169 }
170 ; CHECK-LABEL: test17
171 ; CHECK-NOT: psllw
172 ; CHECK-NOT: psraw
173 ; CHECK-NOT: xorps
174 ; CHECK: ret
175