llvm.org GIT mirror llvm / fd0096a
[X86] Fix a bug in the lowering of BLENDI introduced in r209043. ISD::VSELECT mask uses 1 to identify the first argument and 0 to identify the second argument. On the other hand, BLENDI uses 0 to identify the first argument and 1 to identify the second argument. Fix the generation of the blend mask to account for this difference. The bug did not show up with r209043, because we were not checking for the actual arguments of the blend instruction! This commit also fixes the test cases. Note: The same mask works for the BLENDr variant because the arguments are swapped during instruction selection (see the BLENDXXrr patterns). <rdar://problem/16975435> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209324 91177308-0d34-0410-b5e6-96231b3b80d8 Quentin Colombet 5 years ago
3 changed file(s) with 48 addition(s) and 12 deletion(s). Raw diff Collapse all Expand all
79797979 return SDValue();
79807980 }
79817981
7982 // This function assumes its argument is a BUILD_VECTOR of constand or
7982 // This function assumes its argument is a BUILD_VECTOR of constants or
79837983 // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
79847984 // true.
79857985 static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
80038003 Lane2Cond = !isZero(SndLaneEltCond);
80048004
80058005 if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
8006 MaskValue |= !!Lane1Cond << i;
8006 // Lane1Cond != 0, means we want the first argument.
8007 // Lane1Cond == 0, means we want the second argument.
8008 // The encoding of this argument is 0 for the first argument, 1
8009 // for the second. Therefore, invert the condition.
8010 MaskValue |= !Lane1Cond << i;
80078011 else if (Lane1Cond < 0)
8008 MaskValue |= !!Lane2Cond << i;
8012 MaskValue |= !Lane2Cond << i;
80098013 else
80108014 return false;
80118015 }
22 ; AVX128 tests:
33
44 ;CHECK-LABEL: vsel_float:
5 ;CHECK: vblendps $5
5 ; select mask is .
6 ; Big endian representation is 0101 = 5.
7 ; '1' means takes the first argument, '0' means takes the second argument.
8 ; This is the opposite of the intel syntax, thus we expect
9 ; the inverted mask: 1010 = 10.
10 ; According to the ABI:
11 ; v1 is in xmm0 => first argument is xmm0.
12 ; v2 is in xmm1 => second argument is xmm1.
13 ; result is in xmm0 => destination argument.
14 ;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
615 ;CHECK: ret
716 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
817 %vsel = select <4 x i1> , <4 x float> %v1, <4 x float> %v2
1120
1221
1322 ;CHECK-LABEL: vsel_i32:
14 ;CHECK: vblendps $5
23 ;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
1524 ;CHECK: ret
1625 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
1726 %vsel = select <4 x i1> , <4 x i32> %v1, <4 x i32> %v2
5160
5261 ;CHECK-LABEL: vsel_float8:
5362 ;CHECK-NOT: vinsertf128
54 ;CHECK: vblendps $17
63 ;
64 ; which translates into the boolean mask (big endian representation):
65 ; 00010001 = 17.
66 ; '1' means takes the first argument, '0' means takes the second argument.
67 ; This is the opposite of the intel syntax, thus we expect
68 ; the inverted mask: 11101110 = 238.
69 ;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
5570 ;CHECK: ret
5671 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
5772 %vsel = select <8 x i1> , <8 x float> %v1, <8 x float> %v2
6075
6176 ;CHECK-LABEL: vsel_i328:
6277 ;CHECK-NOT: vinsertf128
63 ;CHECK: vblendps $17
78 ;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0
6479 ;CHECK-NEXT: ret
6580 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
6681 %vsel = select <8 x i1> , <8 x i32> %v1, <8 x i32> %v2
6883 }
6984
7085 ;CHECK-LABEL: vsel_double8:
71 ;CHECK: vblendpd $1
72 ;CHECK: vblendpd $1
86 ; select mask is 2x: 0001 => intel mask: ~0001 = 14
87 ; ABI:
88 ; v1 is in ymm0 and ymm1.
89 ; v2 is in ymm2 and ymm3.
90 ; result is in ymm0 and ymm1.
91 ; Compute the low part: res.low = blend v1.low, v2.low, blendmask
92 ;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
93 ; Compute the high part.
94 ;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
7395 ;CHECK: ret
7496 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
7597 %vsel = select <8 x i1> , <8 x double> %v1, <8 x double> %v2
7799 }
78100
79101 ;CHECK-LABEL: vsel_i648:
80 ;CHECK: vblendpd $1
81 ;CHECK: vblendpd $1
102 ;CHECK: vblendpd $14, %ymm2, %ymm0, %ymm0
103 ;CHECK: vblendpd $14, %ymm3, %ymm1, %ymm1
82104 ;CHECK: ret
83105 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
84106 %vsel = select <8 x i1> , <8 x i64> %v1, <8 x i64> %v2
2121 }
2222
2323 ;CHECK-LABEL: vsel_8xi16:
24 ;CHECK: pblendw $17
24 ; The select mask is
25 ;
26 ; which translates into the boolean mask (big endian representation):
27 ; 00010001 = 17.
28 ; '1' means takes the first argument, '0' means takes the second argument.
29 ; This is the opposite of the intel syntax, thus we expect
30 ; the inverted mask: 11101110 = 238.
31 ; According to the ABI:
32 ; v1 is in xmm0 => first argument is xmm0.
33 ; v2 is in xmm1 => second argument is xmm1.
34 ;CHECK: pblendw $238, %xmm1, %xmm0
2535 ;CHECK: ret
2636 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
2737 %vsel = select <8 x i1> , <8 x i16> %v1, <8 x i16> %v2