llvm.org GIT mirror llvm / cae1ea6
[X86] Always prefer to lower a VECTOR_SHUFFLE into a BLENDI instead of SHUFP (or VPERM2X128). This patch teaches method 'LowerVECTOR_SHUFFLE' to give higher precedence to the check for 'isBlendMask'; the idea is that, when possible, we should firstly check if a shuffle performs a blend, and in case, try to lower it into a BLENDI instead of selecting a SHUFP or (worse) a VPERM2X128. In general: - AVX VBLENDPS/D always have better latency and throughput than VPERM2F128; - BLENDPS/D instructions tend to always have better 'reciprocal throughput' than the equivalent SHUFPS/D; - Both BLENDPS/D and SHUFPS/D are often decoded into the same number of m-ops; however, a m-op obtained from a BLENDPS/D can be scheduled to more than one execution port. This patch: - Moves the check for 'isBlendMask' immediately before the check for 'isSHUFPMask' within method 'LowerVECTOR_SHUFFLE'; - Updates existing tests for sse/avx shuffle/blend instructions to verify that we select (v)blendps/d when possible (instead of (v)shufps/d or vperm2f128). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211720 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 5 years ago
7 changed file(s) with 17 addition(s) and 17 deletion(s). Raw diff Collapse all Expand all
83368336 getShufflePSHUFLWImmediate(SVOp),
83378337 DAG);
83388338
8339 unsigned MaskValue;
8340 if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
8341 &MaskValue))
8342 return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
8343
83398344 if (isSHUFPMask(M, VT))
83408345 return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
83418346 getShuffleSHUFImmediate(SVOp), DAG);
83728377 if (isVPERM2X128Mask(M, VT, HasFp256))
83738378 return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
83748379 V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
8375
8376 unsigned MaskValue;
8377 if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
8378 &MaskValue))
8379 return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
83808380
83818381 if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
83828382 return getINSERTPS(SVOp, dl, DAG);
53735373 // - the 1st and 3rd element from the first input vector (the 'fsub' node);
53745374 // - the 2nd and 4th element from the second input vector (the 'fadd' node).
53755375
5376 def : Pat<(v4f64 (X86Shufp (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
5377 (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
5376 def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
5377 (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
53785378 (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
53795379 def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
53805380 (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
109109
110110 ;CHECK-LABEL: vsel_double4:
111111 ;CHECK-NOT: vinsertf128
112 ;CHECK: vshufpd $10
112 ;CHECK: vblendpd $10
113113 ;CHECK-NEXT: ret
114114 define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
115115 %vsel = select <4 x i1> , <4 x double> %v1, <4 x double> %v2
2424 %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
2525 ret <4 x i64> %c
2626 ; CHECK-LABEL: test3:
27 ; CHECK: vperm2f128
27 ; CHECK: vblendpd
2828 ; CHECK: ret
2929 }
3030
88 }
99
1010 ; CHECK: _B
11 ; CHECK: vperm2f128 $48
11 ; CHECK: vblendps $240
1212 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
1313 entry:
1414 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32>
3131 ret <8 x i32> %shuffle
3232 }
3333
34 ; CHECK: vshufpd $10, %ymm
34 ; CHECK: vblendpd $10, %ymm
3535 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
3636 entry:
3737 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
3838 ret <4 x double> %shuffle
3939 }
4040
41 ; CHECK: vshufpd $10, (%{{.*}}), %ymm
41 ; CHECK: vblendpd $10, (%{{.*}}), %ymm
4242 define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
4343 entry:
4444 %a2 = load <4 x double>* %a
4747 ret <4 x double> %shuffle
4848 }
4949
50 ; CHECK: vshufpd $10, %ymm
50 ; CHECK: vblendpd $10, %ymm
5151 define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
5252 entry:
5353 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32>
5454 ret <4 x i64> %shuffle
5555 }
5656
57 ; CHECK: vshufpd $10, (%{{.*}}), %ymm
57 ; CHECK: vblendpd $10, (%{{.*}}), %ymm
5858 define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
5959 entry:
6060 %a2 = load <4 x i64>* %a
7070 ret <8 x float> %shuffle
7171 }
7272
73 ; CHECK: vshufpd $2, %ymm
73 ; CHECK: vblendpd $2, %ymm
7474 define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
7575 entry:
7676 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32>
7373 }
7474 ; CHECK-LABEL: test6
7575 ; CHECK-NOT: xorps
76 ; CHECK: shufps
76 ; CHECK: blendps $12
7777 ; CHECK-NEXT: ret
7878
7979
8585 }
8686 ; CHECK-LABEL: test7
8787 ; CHECK-NOT: xorps
88 ; CHECK: shufps
88 ; CHECK: blendps $12
8989 ; CHECK-NEXT: ret
9090
9191