llvm.org GIT mirror llvm / d7f7b7e
[X86][AVX] Tests tidyup Cleanup/regenerate some tests for some upcoming patches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255432 91177308-0d34-0410-b5e6-96231b3b80d8 Simon Pilgrim 4 years ago
2 changed file(s) with 68 addition(s) and 69 deletion(s). Raw diff Collapse all Expand all
11
22 define void @endless_loop() {
33 ; CHECK-LABEL: endless_loop:
4 ; CHECK-NEXT: # BB#0:
5 ; CHECK-NEXT: vmovaps (%eax), %ymm0
6 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
7 ; CHECK-NEXT: vmovsldup %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,2]
8 ; CHECK-NEXT: vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0]
9 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
10 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11 ; CHECK-NEXT: vblendps $128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
12 ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
13 ; CHECK-NEXT: vblendps $1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
14 ; CHECK-NEXT: vmovaps %ymm0, (%eax)
15 ; CHECK-NEXT: vmovaps %ymm1, (%eax)
16 ; CHECK-NEXT: vzeroupper
17 ; CHECK-NEXT: retl
4 ; CHECK-NEXT: # BB#0:
5 ; CHECK-NEXT: vmovaps (%eax), %ymm0
6 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
7 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
8 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
9 ; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
10 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
11 ; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
12 ; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
13 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
14 ; CHECK-NEXT: vmovaps %ymm0, (%eax)
15 ; CHECK-NEXT: vmovaps %ymm1, (%eax)
16 ; CHECK-NEXT: vzeroupper
17 ; CHECK-NEXT: retl
1818 entry:
1919 %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
2020 %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32>
None target triple = "x86_64-unknown-unknown"
1
2 ; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
32
43 ; When extracting multiple consecutive elements from a larger
54 ; vector into a smaller one, do it efficiently. We should use
87
98 ; Extracting the low elements only requires using the right kind of store.
109 define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
10 ; CHECK-LABEL: low_v8f32_to_v4f32:
11 ; CHECK: # BB#0:
12 ; CHECK-NEXT: vmovaps %xmm0, (%rdi)
13 ; CHECK-NEXT: vzeroupper
14 ; CHECK-NEXT: retq
1115 %ext0 = extractelement <8 x float> %v, i32 0
1216 %ext1 = extractelement <8 x float> %v, i32 1
1317 %ext2 = extractelement <8 x float> %v, i32 2
1822 %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
1923 store <4 x float> %ins3, <4 x float>* %ptr, align 16
2024 ret void
21
22 ; CHECK-LABEL: low_v8f32_to_v4f32
23 ; CHECK: vmovaps
24 ; CHECK-NEXT: vzeroupper
25 ; CHECK-NEXT: retq
2625 }
2726
2827 ; Extracting the high elements requires just one AVX instruction.
2928 define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
29 ; CHECK-LABEL: high_v8f32_to_v4f32:
30 ; CHECK: # BB#0:
31 ; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
32 ; CHECK-NEXT: vzeroupper
33 ; CHECK-NEXT: retq
3034 %ext0 = extractelement <8 x float> %v, i32 4
3135 %ext1 = extractelement <8 x float> %v, i32 5
3236 %ext2 = extractelement <8 x float> %v, i32 6
3741 %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
3842 store <4 x float> %ins3, <4 x float>* %ptr, align 16
3943 ret void
40
41 ; CHECK-LABEL: high_v8f32_to_v4f32
42 ; CHECK: vextractf128
43 ; CHECK-NEXT: vzeroupper
44 ; CHECK-NEXT: retq
4544 }
4645
4746 ; Make sure element type doesn't alter the codegen. Note that
4847 ; if we were actually using the vector in this function and
4948 ; have AVX2, we should generate vextracti128 (the int version).
5049 define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
50 ; CHECK-LABEL: high_v8i32_to_v4i32:
51 ; CHECK: # BB#0:
52 ; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
53 ; CHECK-NEXT: vzeroupper
54 ; CHECK-NEXT: retq
5155 %ext0 = extractelement <8 x i32> %v, i32 4
5256 %ext1 = extractelement <8 x i32> %v, i32 5
5357 %ext2 = extractelement <8 x i32> %v, i32 6
5862 %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
5963 store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
6064 ret void
61
62 ; CHECK-LABEL: high_v8i32_to_v4i32
63 ; CHECK: vextractf128
64 ; CHECK-NEXT: vzeroupper
65 ; CHECK-NEXT: retq
6665 }
6766
6867 ; Make sure that element size doesn't alter the codegen.
6968 define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
69 ; CHECK-LABEL: high_v4f64_to_v2f64:
70 ; CHECK: # BB#0:
71 ; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
72 ; CHECK-NEXT: vzeroupper
73 ; CHECK-NEXT: retq
7074 %ext0 = extractelement <4 x double> %v, i32 2
7175 %ext1 = extractelement <4 x double> %v, i32 3
7276 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
7377 %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
7478 store <2 x double> %ins1, <2 x double>* %ptr, align 16
7579 ret void
76
77 ; CHECK-LABEL: high_v4f64_to_v2f64
78 ; CHECK: vextractf128
79 ; CHECK-NEXT: vzeroupper
80 ; CHECK-NEXT: retq
8180 }
8281
8382 ; PR25320 Make sure that a widened (possibly legalized) vector correctly zero-extends upper elements.
8483 ; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
8584
8685 define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
86 ; CHECK-LABEL: legal_vzmovl_2i32_8i32:
87 ; CHECK: # BB#0:
88 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
89 ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
90 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
91 ; CHECK-NEXT: vmovaps %ymm0, (%rsi)
92 ; CHECK-NEXT: vzeroupper
93 ; CHECK-NEXT: retq
8794 %ld = load <2 x i32>, <2 x i32>* %in, align 8
8895 %ext = extractelement <2 x i32> %ld, i64 0
8996 %ins = insertelement <8 x i32> , i32 %ext, i64 0
9097 store <8 x i32> %ins, <8 x i32>* %out, align 32
9198 ret void
92
93 ; CHECK-LABEL: legal_vzmovl_2i32_8i32
94 ; CHECK: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
95 ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
96 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
97 ; CHECK-NEXT: vmovaps %ymm0, (%rsi)
98 ; CHECK-NEXT: vzeroupper
99 ; CHECK-NEXT: retq
10099 }
101100
102101 define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
102 ; CHECK-LABEL: legal_vzmovl_2i64_4i64:
103 ; CHECK: # BB#0:
104 ; CHECK-NEXT: vmovupd (%rdi), %xmm0
105 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
106 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
107 ; CHECK-NEXT: vmovapd %ymm0, (%rsi)
108 ; CHECK-NEXT: vzeroupper
109 ; CHECK-NEXT: retq
103110 %ld = load <2 x i64>, <2 x i64>* %in, align 8
104111 %ext = extractelement <2 x i64> %ld, i64 0
105112 %ins = insertelement <4 x i64> , i64 %ext, i64 0
106113 store <4 x i64> %ins, <4 x i64>* %out, align 32
107114 ret void
108
109 ; CHECK-LABEL: legal_vzmovl_2i64_4i64
110 ; CHECK: vmovupd (%rdi), %xmm0
111 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
112 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
113 ; CHECK-NEXT: vmovapd %ymm0, (%rsi)
114 ; CHECK-NEXT: vzeroupper
115 ; CHECK-NEXT: retq
116115 }
117116
118117 define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
118 ; CHECK-LABEL: legal_vzmovl_2f32_8f32:
119 ; CHECK: # BB#0:
120 ; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
121 ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
122 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
123 ; CHECK-NEXT: vmovaps %ymm0, (%rsi)
124 ; CHECK-NEXT: vzeroupper
125 ; CHECK-NEXT: retq
119126 %ld = load <2 x float>, <2 x float>* %in, align 8
120127 %ext = extractelement <2 x float> %ld, i64 0
121128 %ins = insertelement <8 x float> , float %ext, i64 0
122129 store <8 x float> %ins, <8 x float>* %out, align 32
123130 ret void
124
125 ; CHECK-LABEL: legal_vzmovl_2f32_8f32
126 ; CHECK: vmovq {{.*#+}} xmm0 = mem[0],zero
127 ; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
128 ; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
129 ; CHECK-NEXT: vmovaps %ymm0, (%rsi)
130 ; CHECK-NEXT: vzeroupper
131 ; CHECK-NEXT: retq
132131 }
133132
134133 define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
134 ; CHECK-LABEL: legal_vzmovl_2f64_4f64:
135 ; CHECK: # BB#0:
136 ; CHECK-NEXT: vmovupd (%rdi), %xmm0
137 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
138 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
139 ; CHECK-NEXT: vmovapd %ymm0, (%rsi)
140 ; CHECK-NEXT: vzeroupper
141 ; CHECK-NEXT: retq
135142 %ld = load <2 x double>, <2 x double>* %in, align 8
136143 %ext = extractelement <2 x double> %ld, i64 0
137144 %ins = insertelement <4 x double> , double %ext, i64 0
138145 store <4 x double> %ins, <4 x double>* %out, align 32
139146 ret void
140
141 ; CHECK-LABEL: legal_vzmovl_2f64_4f64
142 ; CHECK: vmovupd (%rdi), %xmm0
143 ; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
144 ; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
145 ; CHECK-NEXT: vmovapd %ymm0, (%rsi)
146 ; CHECK-NEXT: vzeroupper
147 ; CHECK-NEXT: retq
148147 }