llvm.org GIT mirror llvm / 0ee4a48
[x86] add tests for potential vector narrowing optimization (PR32790) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302910 91177308-0d34-0410-b5e6-96231b3b80d8 Sanjay Patel 3 years ago
1 changed file(s) with 111 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ
7
8 ; AVX1 has support for 256-bit bitwise logic because the FP variants were included.
9 ; If using those ops requires extra insert/extract though, it's probably not worth it.
10
11 define <8 x i32> @PR32790(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
12 ; SSE-LABEL: PR32790:
13 ; SSE: # BB#0:
14 ; SSE-NEXT: paddd %xmm2, %xmm0
15 ; SSE-NEXT: paddd %xmm3, %xmm1
16 ; SSE-NEXT: pand %xmm5, %xmm1
17 ; SSE-NEXT: pand %xmm4, %xmm0
18 ; SSE-NEXT: psubd %xmm6, %xmm0
19 ; SSE-NEXT: psubd %xmm7, %xmm1
20 ; SSE-NEXT: retq
21 ;
22 ; AVX1-LABEL: PR32790:
23 ; AVX1: # BB#0:
24 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
25 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
26 ; AVX1-NEXT: vpaddd %xmm4, %xmm5, %xmm4
27 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
28 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
29 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
30 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
31 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
32 ; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm1
33 ; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0
34 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
35 ; AVX1-NEXT: retq
36 ;
37 ; AVX2-LABEL: PR32790:
38 ; AVX2: # BB#0:
39 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
40 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
41 ; AVX2-NEXT: vpsubd %ymm3, %ymm0, %ymm0
42 ; AVX2-NEXT: retq
43 ;
44 ; AVX512-LABEL: PR32790:
45 ; AVX512: # BB#0:
46 ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
47 ; AVX512-NEXT: vpand %ymm2, %ymm0, %ymm0
48 ; AVX512-NEXT: vpsubd %ymm3, %ymm0, %ymm0
49 ; AVX512-NEXT: retq
50 %add = add <8 x i32> %a, %b
51 %and = and <8 x i32> %add, %c
52 %sub = sub <8 x i32> %and, %d
53 ret <8 x i32> %sub
54 }
55
56 ; In a more extreme case, even the later AVX targets should avoid extract/insert just
57 ; because 256-bit ops are supported.
58
59 define <4 x i32> @do_not_use_256bit_op(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
60 ; SSE-LABEL: do_not_use_256bit_op:
61 ; SSE: # BB#0:
62 ; SSE-NEXT: pand %xmm3, %xmm1
63 ; SSE-NEXT: pand %xmm2, %xmm0
64 ; SSE-NEXT: psubd %xmm1, %xmm0
65 ; SSE-NEXT: retq
66 ;
67 ; AVX1-LABEL: do_not_use_256bit_op:
68 ; AVX1: # BB#0:
69 ; AVX1-NEXT: # kill: %XMM2 %XMM2 %YMM2
70 ; AVX1-NEXT: # kill: %XMM0 %XMM0 %YMM0
71 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
72 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
73 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
74 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
75 ; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
76 ; AVX1-NEXT: vzeroupper
77 ; AVX1-NEXT: retq
78 ;
79 ; AVX2-LABEL: do_not_use_256bit_op:
80 ; AVX2: # BB#0:
81 ; AVX2-NEXT: # kill: %XMM2 %XMM2 %YMM2
82 ; AVX2-NEXT: # kill: %XMM0 %XMM0 %YMM0
83 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
84 ; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
85 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
86 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
87 ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
88 ; AVX2-NEXT: vzeroupper
89 ; AVX2-NEXT: retq
90 ;
91 ; AVX512-LABEL: do_not_use_256bit_op:
92 ; AVX512: # BB#0:
93 ; AVX512-NEXT: # kill: %XMM2 %XMM2 %YMM2
94 ; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0
95 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
96 ; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
97 ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
98 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
99 ; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0
100 ; AVX512-NEXT: vzeroupper
101 ; AVX512-NEXT: retq
102 %concat1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32>
103 %concat2 = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32>
104 %and = and <8 x i32> %concat1, %concat2
105 %extract1 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32>
106 %extract2 = shufflevector <8 x i32> %and, <8 x i32> undef, <4 x i32>
107 %sub = sub <4 x i32> %extract1, %extract2
108 ret <4 x i32> %sub
109 }
110