llvm.org GIT mirror llvm / 106b797
[X86] Add extra rules for combining vselect dag nodes into movsd. This improves the fix committed at revision 199683 adding the following new target specific combine rules: 1) fold (v4i32: vselect <0,0,-1,-1>, A, B) -> (v4i32 (bitcast (movsd (v2i64 (bitcast A)), (v2i64 (bitcast B))) )) 2) fold (v4f32: vselect <0,0,-1,-1>, A, B) -> (v4f32 (bitcast (movsd (v2f64 (bitcast A)), (v2f64 (bitcast B))) )) 3) fold (v4i32: vselect <-1,-1,0,0>, A, B) -> (v4i32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) )) 4) fold (v4f32: vselect <-1,-1,0,0>, A, B) -> (v4f32 (bitcast (movsd (v2i64 (bitcast B)), (v2i64 (bitcast A))) )) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200324 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 6 years ago
3 changed file(s) with 75 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
1732317323 return getTargetShuffleNode(X86ISD::MOVSS, DL, VT, A, B, DAG);
1732417324 return getTargetShuffleNode(X86ISD::MOVSD, DL, VT, A, B, DAG);
1732517325 }
17326
17327 if (Subtarget->hasSSE2() && (VT == MVT::v4i32 || VT == MVT::v4f32)) {
17328 // fold (v4i32: vselect <0,0,-1,-1>, A, B) ->
17329 // (v4i32 (bitcast (movsd (v2i64 (bitcast A)),
17330 // (v2i64 (bitcast B)))))
17331 //
17332 // fold (v4f32: vselect <0,0,-1,-1>, A, B) ->
17333 // (v4f32 (bitcast (movsd (v2f64 (bitcast A)),
17334 // (v2f64 (bitcast B)))))
17335 //
17336 // fold (v4i32: vselect <-1,-1,0,0>, A, B) ->
17337 // (v4i32 (bitcast (movsd (v2i64 (bitcast B)),
17338 // (v2i64 (bitcast A)))))
17339 //
17340 // fold (v4f32: vselect <-1,-1,0,0>, A, B) ->
17341 // (v4f32 (bitcast (movsd (v2f64 (bitcast B)),
17342 // (v2f64 (bitcast A)))))
17343
17344 CanFold = (isZero(Cond.getOperand(0)) &&
17345 isZero(Cond.getOperand(1)) &&
17346 isAllOnes(Cond.getOperand(2)) &&
17347 isAllOnes(Cond.getOperand(3)));
17348
17349 if (!CanFold && isAllOnes(Cond.getOperand(0)) &&
17350 isAllOnes(Cond.getOperand(1)) &&
17351 isZero(Cond.getOperand(2)) &&
17352 isZero(Cond.getOperand(3))) {
17353 CanFold = true;
17354 std::swap(LHS, RHS);
17355 }
17356
17357 if (CanFold) {
17358 EVT NVT = (VT == MVT::v4i32) ? MVT::v2i64 : MVT::v2f64;
17359 SDValue NewA = DAG.getNode(ISD::BITCAST, DL, NVT, LHS);
17360 SDValue NewB = DAG.getNode(ISD::BITCAST, DL, NVT, RHS);
17361 SDValue Select = getTargetShuffleNode(X86ISD::MOVSD, DL, NVT, NewA,
17362 NewB, DAG);
17363 return DAG.getNode(ISD::BITCAST, DL, VT, Select);
17364 }
17365 }
1732617366 }
1732717367 }
1732817368
1212 ;CHECK: blendvps
1313 ;CHECK: ret
1414 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
15 %vsel = select <4 x i1> false>, <4 x i8> %v1, <4 x i8> %v2
15 %vsel = select <4 x i1> true>, <4 x i8> %v1, <4 x i8> %v2
1616 ret <4 x i8> %vsel
1717 }
1818
2929 ;CHECK: blendvps
3030 ;CHECK: ret
3131 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
32 %vsel = select <4 x i1> false>, <4 x i32> %v1, <4 x i32> %v2
32 %vsel = select <4 x i1> true>, <4 x i32> %v1, <4 x i32> %v2
3333 ret <4 x i32> %vsel
3434 }
3535
0 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=sse2 | FileCheck %s
1
2 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
3 %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B
4 ret <4 x i32> %select
5 }
6 ; CHECK-LABEL: test1
7 ; CHECK: movsd
8 ; CHECK: ret
9
10 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
11 %select = select <4 x i1>, <4 x i32> %A, <4 x i32> %B
12 ret <4 x i32> %select
13 }
14 ; CHECK-LABEL: test2
15 ; CHECK: movsd
16 ; CHECK-NEXT: ret
17
18 define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
19 %select = select <4 x i1>, <4 x float> %A, <4 x float> %B
20 ret <4 x float> %select
21 }
22 ; CHECK-LABEL: test3
23 ; CHECK: movsd
24 ; CHECK: ret
25
26 define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
27 %select = select <4 x i1>, <4 x float> %A, <4 x float> %B
28 ret <4 x float> %select
29 }
30 ; CHECK-LABEL: test4
31 ; CHECK: movsd
32 ; CHECK-NEXT: ret