llvm.org GIT mirror llvm / 83815ae
Merge a bunch of NEON tests into larger files so they run faster. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@83667 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
89 changed file(s) with 3747 addition(s) and 3871 deletion(s). Raw diff Collapse all Expand all
134134 declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
135135 declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
136136 declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
137
138 define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
139 ;CHECK: vabals8:
140 ;CHECK: vabal.s8
141 %tmp1 = load <8 x i16>* %A
142 %tmp2 = load <8 x i8>* %B
143 %tmp3 = load <8 x i8>* %C
144 %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
145 ret <8 x i16> %tmp4
146 }
147
148 define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
149 ;CHECK: vabals16:
150 ;CHECK: vabal.s16
151 %tmp1 = load <4 x i32>* %A
152 %tmp2 = load <4 x i16>* %B
153 %tmp3 = load <4 x i16>* %C
154 %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
155 ret <4 x i32> %tmp4
156 }
157
158 define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
159 ;CHECK: vabals32:
160 ;CHECK: vabal.s32
161 %tmp1 = load <2 x i64>* %A
162 %tmp2 = load <2 x i32>* %B
163 %tmp3 = load <2 x i32>* %C
164 %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
165 ret <2 x i64> %tmp4
166 }
167
168 define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
169 ;CHECK: vabalu8:
170 ;CHECK: vabal.u8
171 %tmp1 = load <8 x i16>* %A
172 %tmp2 = load <8 x i8>* %B
173 %tmp3 = load <8 x i8>* %C
174 %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
175 ret <8 x i16> %tmp4
176 }
177
178 define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
179 ;CHECK: vabalu16:
180 ;CHECK: vabal.u16
181 %tmp1 = load <4 x i32>* %A
182 %tmp2 = load <4 x i16>* %B
183 %tmp3 = load <4 x i16>* %C
184 %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
185 ret <4 x i32> %tmp4
186 }
187
188 define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
189 ;CHECK: vabalu32:
190 ;CHECK: vabal.u32
191 %tmp1 = load <2 x i64>* %A
192 %tmp2 = load <2 x i32>* %B
193 %tmp3 = load <2 x i32>* %C
194 %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
195 ret <2 x i64> %tmp4
196 }
197
198 declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
199 declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
200 declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
201
202 declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
203 declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
204 declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+0
-69
test/CodeGen/ARM/vabal.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
3 ;CHECK: vabals8:
4 ;CHECK: vabal.s8
5 %tmp1 = load <8 x i16>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = load <8 x i8>* %C
8 %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
9 ret <8 x i16> %tmp4
10 }
11
12 define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
13 ;CHECK: vabals16:
14 ;CHECK: vabal.s16
15 %tmp1 = load <4 x i32>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = load <4 x i16>* %C
18 %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
19 ret <4 x i32> %tmp4
20 }
21
22 define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
23 ;CHECK: vabals32:
24 ;CHECK: vabal.s32
25 %tmp1 = load <2 x i64>* %A
26 %tmp2 = load <2 x i32>* %B
27 %tmp3 = load <2 x i32>* %C
28 %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
29 ret <2 x i64> %tmp4
30 }
31
32 define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
33 ;CHECK: vabalu8:
34 ;CHECK: vabal.u8
35 %tmp1 = load <8 x i16>* %A
36 %tmp2 = load <8 x i8>* %B
37 %tmp3 = load <8 x i8>* %C
38 %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
39 ret <8 x i16> %tmp4
40 }
41
42 define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
43 ;CHECK: vabalu16:
44 ;CHECK: vabal.u16
45 %tmp1 = load <4 x i32>* %A
46 %tmp2 = load <4 x i16>* %B
47 %tmp3 = load <4 x i16>* %C
48 %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
49 ret <4 x i32> %tmp4
50 }
51
52 define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
53 ;CHECK: vabalu32:
54 ;CHECK: vabal.u32
55 %tmp1 = load <2 x i64>* %A
56 %tmp2 = load <2 x i32>* %B
57 %tmp3 = load <2 x i32>* %C
58 %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
59 ret <2 x i64> %tmp4
60 }
61
62 declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
63 declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
64 declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
65
66 declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
67 declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
68 declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
144144 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
145145
146146 declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
147
148 define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
149 ;CHECK: vabdls8:
150 ;CHECK: vabdl.s8
151 %tmp1 = load <8 x i8>* %A
152 %tmp2 = load <8 x i8>* %B
153 %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
154 ret <8 x i16> %tmp3
155 }
156
157 define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
158 ;CHECK: vabdls16:
159 ;CHECK: vabdl.s16
160 %tmp1 = load <4 x i16>* %A
161 %tmp2 = load <4 x i16>* %B
162 %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
163 ret <4 x i32> %tmp3
164 }
165
166 define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
167 ;CHECK: vabdls32:
168 ;CHECK: vabdl.s32
169 %tmp1 = load <2 x i32>* %A
170 %tmp2 = load <2 x i32>* %B
171 %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
172 ret <2 x i64> %tmp3
173 }
174
175 define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
176 ;CHECK: vabdlu8:
177 ;CHECK: vabdl.u8
178 %tmp1 = load <8 x i8>* %A
179 %tmp2 = load <8 x i8>* %B
180 %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
181 ret <8 x i16> %tmp3
182 }
183
184 define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
185 ;CHECK: vabdlu16:
186 ;CHECK: vabdl.u16
187 %tmp1 = load <4 x i16>* %A
188 %tmp2 = load <4 x i16>* %B
189 %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
190 ret <4 x i32> %tmp3
191 }
192
193 define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
194 ;CHECK: vabdlu32:
195 ;CHECK: vabdl.u32
196 %tmp1 = load <2 x i32>* %A
197 %tmp2 = load <2 x i32>* %B
198 %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
199 ret <2 x i64> %tmp3
200 }
201
202 declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
203 declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
204 declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
205
206 declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
207 declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
208 declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+0
-63
test/CodeGen/ARM/vabdl.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vabdls8:
4 ;CHECK: vabdl.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i16> %tmp3
9 }
10
11 define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vabdls16:
13 ;CHECK: vabdl.s16
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i32> %tmp3
18 }
19
20 define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vabdls32:
22 ;CHECK: vabdl.s32
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i64> %tmp3
27 }
28
29 define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vabdlu8:
31 ;CHECK: vabdl.u8
32 %tmp1 = load <8 x i8>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i16> %tmp3
36 }
37
38 define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vabdlu16:
40 ;CHECK: vabdl.u16
41 %tmp1 = load <4 x i16>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i32> %tmp3
45 }
46
47 define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vabdlu32:
49 ;CHECK: vabdl.u32
50 %tmp1 = load <2 x i32>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i64> %tmp3
54 }
55
56 declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
57 declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
58 declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
59
60 declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
61 declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
62 declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
7373 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
7474 declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
7575
76 define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
77 ;CHECK: vqabss8:
78 ;CHECK: vqabs.s8
79 %tmp1 = load <8 x i8>* %A
80 %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
81 ret <8 x i8> %tmp2
82 }
83
84 define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
85 ;CHECK: vqabss16:
86 ;CHECK: vqabs.s16
87 %tmp1 = load <4 x i16>* %A
88 %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
89 ret <4 x i16> %tmp2
90 }
91
92 define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
93 ;CHECK: vqabss32:
94 ;CHECK: vqabs.s32
95 %tmp1 = load <2 x i32>* %A
96 %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
97 ret <2 x i32> %tmp2
98 }
99
100 define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
101 ;CHECK: vqabsQs8:
102 ;CHECK: vqabs.s8
103 %tmp1 = load <16 x i8>* %A
104 %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
105 ret <16 x i8> %tmp2
106 }
107
108 define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
109 ;CHECK: vqabsQs16:
110 ;CHECK: vqabs.s16
111 %tmp1 = load <8 x i16>* %A
112 %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
113 ret <8 x i16> %tmp2
114 }
115
116 define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
117 ;CHECK: vqabsQs32:
118 ;CHECK: vqabs.s32
119 %tmp1 = load <4 x i32>* %A
120 %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
121 ret <4 x i32> %tmp2
122 }
123
124 declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
125 declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
126 declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
127
128 declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
129 declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
130 declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
+0
-22
test/CodeGen/ARM/vacge.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
3 ;CHECK: vacgef32:
4 ;CHECK: vacge.f32
5 %tmp1 = load <2 x float>* %A
6 %tmp2 = load <2 x float>* %B
7 %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
8 ret <2 x i32> %tmp3
9 }
10
11 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
12 ;CHECK: vacgeQf32:
13 ;CHECK: vacge.f32
14 %tmp1 = load <4 x float>* %A
15 %tmp2 = load <4 x float>* %B
16 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
17 ret <4 x i32> %tmp3
18 }
19
20 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
21 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+0
-22
test/CodeGen/ARM/vacgt.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
3 ;CHECK: vacgtf32:
4 ;CHECK: vacgt.f32
5 %tmp1 = load <2 x float>* %A
6 %tmp2 = load <2 x float>* %B
7 %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
8 ret <2 x i32> %tmp3
9 }
10
11 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
12 ;CHECK: vacgtQf32:
13 ;CHECK: vacgt.f32
14 %tmp1 = load <4 x float>* %A
15 %tmp2 = load <4 x float>* %B
16 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
17 ret <4 x i32> %tmp3
18 }
19
20 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
21 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
8888 %tmp3 = add <4 x float> %tmp1, %tmp2
8989 ret <4 x float> %tmp3
9090 }
91
92 define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
93 ;CHECK: vaddhni16:
94 ;CHECK: vaddhn.i16
95 %tmp1 = load <8 x i16>* %A
96 %tmp2 = load <8 x i16>* %B
97 %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
98 ret <8 x i8> %tmp3
99 }
100
101 define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
102 ;CHECK: vaddhni32:
103 ;CHECK: vaddhn.i32
104 %tmp1 = load <4 x i32>* %A
105 %tmp2 = load <4 x i32>* %B
106 %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
107 ret <4 x i16> %tmp3
108 }
109
110 define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
111 ;CHECK: vaddhni64:
112 ;CHECK: vaddhn.i64
113 %tmp1 = load <2 x i64>* %A
114 %tmp2 = load <2 x i64>* %B
115 %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
116 ret <2 x i32> %tmp3
117 }
118
119 declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
120 declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
121 declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
122
123 define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
124 ;CHECK: vraddhni16:
125 ;CHECK: vraddhn.i16
126 %tmp1 = load <8 x i16>* %A
127 %tmp2 = load <8 x i16>* %B
128 %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
129 ret <8 x i8> %tmp3
130 }
131
132 define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
133 ;CHECK: vraddhni32:
134 ;CHECK: vraddhn.i32
135 %tmp1 = load <4 x i32>* %A
136 %tmp2 = load <4 x i32>* %B
137 %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
138 ret <4 x i16> %tmp3
139 }
140
141 define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
142 ;CHECK: vraddhni64:
143 ;CHECK: vraddhn.i64
144 %tmp1 = load <2 x i64>* %A
145 %tmp2 = load <2 x i64>* %B
146 %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
147 ret <2 x i32> %tmp3
148 }
149
150 declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
151 declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
152 declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
153
154 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
155 ;CHECK: vaddls8:
156 ;CHECK: vaddl.s8
157 %tmp1 = load <8 x i8>* %A
158 %tmp2 = load <8 x i8>* %B
159 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
160 ret <8 x i16> %tmp3
161 }
162
163 define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
164 ;CHECK: vaddls16:
165 ;CHECK: vaddl.s16
166 %tmp1 = load <4 x i16>* %A
167 %tmp2 = load <4 x i16>* %B
168 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
169 ret <4 x i32> %tmp3
170 }
171
172 define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
173 ;CHECK: vaddls32:
174 ;CHECK: vaddl.s32
175 %tmp1 = load <2 x i32>* %A
176 %tmp2 = load <2 x i32>* %B
177 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
178 ret <2 x i64> %tmp3
179 }
180
181 define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
182 ;CHECK: vaddlu8:
183 ;CHECK: vaddl.u8
184 %tmp1 = load <8 x i8>* %A
185 %tmp2 = load <8 x i8>* %B
186 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
187 ret <8 x i16> %tmp3
188 }
189
190 define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
191 ;CHECK: vaddlu16:
192 ;CHECK: vaddl.u16
193 %tmp1 = load <4 x i16>* %A
194 %tmp2 = load <4 x i16>* %B
195 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
196 ret <4 x i32> %tmp3
197 }
198
199 define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
200 ;CHECK: vaddlu32:
201 ;CHECK: vaddl.u32
202 %tmp1 = load <2 x i32>* %A
203 %tmp2 = load <2 x i32>* %B
204 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
205 ret <2 x i64> %tmp3
206 }
207
208 declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
209 declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
210 declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
211
212 declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
213 declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
214 declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
215
216 define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
217 ;CHECK: vaddws8:
218 ;CHECK: vaddw.s8
219 %tmp1 = load <8 x i16>* %A
220 %tmp2 = load <8 x i8>* %B
221 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
222 ret <8 x i16> %tmp3
223 }
224
225 define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
226 ;CHECK: vaddws16:
227 ;CHECK: vaddw.s16
228 %tmp1 = load <4 x i32>* %A
229 %tmp2 = load <4 x i16>* %B
230 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
231 ret <4 x i32> %tmp3
232 }
233
234 define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
235 ;CHECK: vaddws32:
236 ;CHECK: vaddw.s32
237 %tmp1 = load <2 x i64>* %A
238 %tmp2 = load <2 x i32>* %B
239 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
240 ret <2 x i64> %tmp3
241 }
242
243 define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
244 ;CHECK: vaddwu8:
245 ;CHECK: vaddw.u8
246 %tmp1 = load <8 x i16>* %A
247 %tmp2 = load <8 x i8>* %B
248 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
249 ret <8 x i16> %tmp3
250 }
251
252 define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
253 ;CHECK: vaddwu16:
254 ;CHECK: vaddw.u16
255 %tmp1 = load <4 x i32>* %A
256 %tmp2 = load <4 x i16>* %B
257 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
258 ret <4 x i32> %tmp3
259 }
260
261 define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
262 ;CHECK: vaddwu32:
263 ;CHECK: vaddw.u32
264 %tmp1 = load <2 x i64>* %A
265 %tmp2 = load <2 x i32>* %B
266 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
267 ret <2 x i64> %tmp3
268 }
269
270 declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
271 declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
272 declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
273
274 declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
275 declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
276 declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+0
-32
test/CodeGen/ARM/vaddhn.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
3 ;CHECK: vaddhni16:
4 ;CHECK: vaddhn.i16
5 %tmp1 = load <8 x i16>* %A
6 %tmp2 = load <8 x i16>* %B
7 %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
12 ;CHECK: vaddhni32:
13 ;CHECK: vaddhn.i32
14 %tmp1 = load <4 x i32>* %A
15 %tmp2 = load <4 x i32>* %B
16 %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
21 ;CHECK: vaddhni64:
22 ;CHECK: vaddhn.i64
23 %tmp1 = load <2 x i64>* %A
24 %tmp2 = load <2 x i64>* %B
25 %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
26 ret <2 x i32> %tmp3
27 }
28
29 declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
30 declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
31 declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+0
-63
test/CodeGen/ARM/vaddl.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vaddls8:
4 ;CHECK: vaddl.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i16> %tmp3
9 }
10
11 define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vaddls16:
13 ;CHECK: vaddl.s16
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i32> %tmp3
18 }
19
20 define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vaddls32:
22 ;CHECK: vaddl.s32
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i64> %tmp3
27 }
28
29 define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vaddlu8:
31 ;CHECK: vaddl.u8
32 %tmp1 = load <8 x i8>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i16> %tmp3
36 }
37
38 define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vaddlu16:
40 ;CHECK: vaddl.u16
41 %tmp1 = load <4 x i16>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i32> %tmp3
45 }
46
47 define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vaddlu32:
49 ;CHECK: vaddl.u32
50 %tmp1 = load <2 x i32>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i64> %tmp3
54 }
55
56 declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
57 declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
58 declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
59
60 declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
61 declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
62 declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+0
-63
test/CodeGen/ARM/vaddw.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vaddws8:
4 ;CHECK: vaddw.s8
5 %tmp1 = load <8 x i16>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i16> %tmp3
9 }
10
11 define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vaddws16:
13 ;CHECK: vaddw.s16
14 %tmp1 = load <4 x i32>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i32> %tmp3
18 }
19
20 define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vaddws32:
22 ;CHECK: vaddw.s32
23 %tmp1 = load <2 x i64>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i64> %tmp3
27 }
28
29 define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vaddwu8:
31 ;CHECK: vaddw.u8
32 %tmp1 = load <8 x i16>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i16> %tmp3
36 }
37
38 define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vaddwu16:
40 ;CHECK: vaddw.u16
41 %tmp1 = load <4 x i32>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i32> %tmp3
45 }
46
47 define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vaddwu32:
49 ;CHECK: vaddw.u32
50 %tmp1 = load <2 x i64>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i64> %tmp3
54 }
55
56 declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
57 declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
58 declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
59
60 declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
61 declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
62 declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
+0
-73
test/CodeGen/ARM/vand.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: v_andi8:
4 ;CHECK: vand
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = and <8 x i8> %tmp1, %tmp2
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: v_andi16:
13 ;CHECK: vand
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = and <4 x i16> %tmp1, %tmp2
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: v_andi32:
22 ;CHECK: vand
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = and <2 x i32> %tmp1, %tmp2
26 ret <2 x i32> %tmp3
27 }
28
29 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
30 ;CHECK: v_andi64:
31 ;CHECK: vand
32 %tmp1 = load <1 x i64>* %A
33 %tmp2 = load <1 x i64>* %B
34 %tmp3 = and <1 x i64> %tmp1, %tmp2
35 ret <1 x i64> %tmp3
36 }
37
38 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
39 ;CHECK: v_andQi8:
40 ;CHECK: vand
41 %tmp1 = load <16 x i8>* %A
42 %tmp2 = load <16 x i8>* %B
43 %tmp3 = and <16 x i8> %tmp1, %tmp2
44 ret <16 x i8> %tmp3
45 }
46
47 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: v_andQi16:
49 ;CHECK: vand
50 %tmp1 = load <8 x i16>* %A
51 %tmp2 = load <8 x i16>* %B
52 %tmp3 = and <8 x i16> %tmp1, %tmp2
53 ret <8 x i16> %tmp3
54 }
55
56 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
57 ;CHECK: v_andQi32:
58 ;CHECK: vand
59 %tmp1 = load <4 x i32>* %A
60 %tmp2 = load <4 x i32>* %B
61 %tmp3 = and <4 x i32> %tmp1, %tmp2
62 ret <4 x i32> %tmp3
63 }
64
65 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
66 ;CHECK: v_andQi64:
67 ;CHECK: vand
68 %tmp1 = load <2 x i64>* %A
69 %tmp2 = load <2 x i64>* %B
70 %tmp3 = and <2 x i64> %tmp1, %tmp2
71 ret <2 x i64> %tmp3
72 }
+0
-81
test/CodeGen/ARM/vbic.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: v_bici8:
4 ;CHECK: vbic
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
8 %tmp4 = and <8 x i8> %tmp1, %tmp3
9 ret <8 x i8> %tmp4
10 }
11
12 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
13 ;CHECK: v_bici16:
14 ;CHECK: vbic
15 %tmp1 = load <4 x i16>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
18 %tmp4 = and <4 x i16> %tmp1, %tmp3
19 ret <4 x i16> %tmp4
20 }
21
22 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
23 ;CHECK: v_bici32:
24 ;CHECK: vbic
25 %tmp1 = load <2 x i32>* %A
26 %tmp2 = load <2 x i32>* %B
27 %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
28 %tmp4 = and <2 x i32> %tmp1, %tmp3
29 ret <2 x i32> %tmp4
30 }
31
32 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
33 ;CHECK: v_bici64:
34 ;CHECK: vbic
35 %tmp1 = load <1 x i64>* %A
36 %tmp2 = load <1 x i64>* %B
37 %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
38 %tmp4 = and <1 x i64> %tmp1, %tmp3
39 ret <1 x i64> %tmp4
40 }
41
42 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
43 ;CHECK: v_bicQi8:
44 ;CHECK: vbic
45 %tmp1 = load <16 x i8>* %A
46 %tmp2 = load <16 x i8>* %B
47 %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
48 %tmp4 = and <16 x i8> %tmp1, %tmp3
49 ret <16 x i8> %tmp4
50 }
51
52 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
53 ;CHECK: v_bicQi16:
54 ;CHECK: vbic
55 %tmp1 = load <8 x i16>* %A
56 %tmp2 = load <8 x i16>* %B
57 %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
58 %tmp4 = and <8 x i16> %tmp1, %tmp3
59 ret <8 x i16> %tmp4
60 }
61
62 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
63 ;CHECK: v_bicQi32:
64 ;CHECK: vbic
65 %tmp1 = load <4 x i32>* %A
66 %tmp2 = load <4 x i32>* %B
67 %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
68 %tmp4 = and <4 x i32> %tmp1, %tmp3
69 ret <4 x i32> %tmp4
70 }
71
72 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
73 ;CHECK: v_bicQi64:
74 ;CHECK: vbic
75 %tmp1 = load <2 x i64>* %A
76 %tmp2 = load <2 x i64>* %B
77 %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
78 %tmp4 = and <2 x i64> %tmp1, %tmp3
79 ret <2 x i64> %tmp4
80 }
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: v_andi8:
4 ;CHECK: vand
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = and <8 x i8> %tmp1, %tmp2
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: v_andi16:
13 ;CHECK: vand
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = and <4 x i16> %tmp1, %tmp2
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: v_andi32:
22 ;CHECK: vand
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = and <2 x i32> %tmp1, %tmp2
26 ret <2 x i32> %tmp3
27 }
28
29 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
30 ;CHECK: v_andi64:
31 ;CHECK: vand
32 %tmp1 = load <1 x i64>* %A
33 %tmp2 = load <1 x i64>* %B
34 %tmp3 = and <1 x i64> %tmp1, %tmp2
35 ret <1 x i64> %tmp3
36 }
37
38 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
39 ;CHECK: v_andQi8:
40 ;CHECK: vand
41 %tmp1 = load <16 x i8>* %A
42 %tmp2 = load <16 x i8>* %B
43 %tmp3 = and <16 x i8> %tmp1, %tmp2
44 ret <16 x i8> %tmp3
45 }
46
47 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: v_andQi16:
49 ;CHECK: vand
50 %tmp1 = load <8 x i16>* %A
51 %tmp2 = load <8 x i16>* %B
52 %tmp3 = and <8 x i16> %tmp1, %tmp2
53 ret <8 x i16> %tmp3
54 }
55
56 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
57 ;CHECK: v_andQi32:
58 ;CHECK: vand
59 %tmp1 = load <4 x i32>* %A
60 %tmp2 = load <4 x i32>* %B
61 %tmp3 = and <4 x i32> %tmp1, %tmp2
62 ret <4 x i32> %tmp3
63 }
64
65 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
66 ;CHECK: v_andQi64:
67 ;CHECK: vand
68 %tmp1 = load <2 x i64>* %A
69 %tmp2 = load <2 x i64>* %B
70 %tmp3 = and <2 x i64> %tmp1, %tmp2
71 ret <2 x i64> %tmp3
72 }
73
74 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
75 ;CHECK: v_bici8:
76 ;CHECK: vbic
77 %tmp1 = load <8 x i8>* %A
78 %tmp2 = load <8 x i8>* %B
79 %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
80 %tmp4 = and <8 x i8> %tmp1, %tmp3
81 ret <8 x i8> %tmp4
82 }
83
84 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
85 ;CHECK: v_bici16:
86 ;CHECK: vbic
87 %tmp1 = load <4 x i16>* %A
88 %tmp2 = load <4 x i16>* %B
89 %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
90 %tmp4 = and <4 x i16> %tmp1, %tmp3
91 ret <4 x i16> %tmp4
92 }
93
94 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
95 ;CHECK: v_bici32:
96 ;CHECK: vbic
97 %tmp1 = load <2 x i32>* %A
98 %tmp2 = load <2 x i32>* %B
99 %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
100 %tmp4 = and <2 x i32> %tmp1, %tmp3
101 ret <2 x i32> %tmp4
102 }
103
104 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
105 ;CHECK: v_bici64:
106 ;CHECK: vbic
107 %tmp1 = load <1 x i64>* %A
108 %tmp2 = load <1 x i64>* %B
109 %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
110 %tmp4 = and <1 x i64> %tmp1, %tmp3
111 ret <1 x i64> %tmp4
112 }
113
114 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
115 ;CHECK: v_bicQi8:
116 ;CHECK: vbic
117 %tmp1 = load <16 x i8>* %A
118 %tmp2 = load <16 x i8>* %B
119 %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
120 %tmp4 = and <16 x i8> %tmp1, %tmp3
121 ret <16 x i8> %tmp4
122 }
123
124 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
125 ;CHECK: v_bicQi16:
126 ;CHECK: vbic
127 %tmp1 = load <8 x i16>* %A
128 %tmp2 = load <8 x i16>* %B
129 %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
130 %tmp4 = and <8 x i16> %tmp1, %tmp3
131 ret <8 x i16> %tmp4
132 }
133
134 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
135 ;CHECK: v_bicQi32:
136 ;CHECK: vbic
137 %tmp1 = load <4 x i32>* %A
138 %tmp2 = load <4 x i32>* %B
139 %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
140 %tmp4 = and <4 x i32> %tmp1, %tmp3
141 ret <4 x i32> %tmp4
142 }
143
144 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
145 ;CHECK: v_bicQi64:
146 ;CHECK: vbic
147 %tmp1 = load <2 x i64>* %A
148 %tmp2 = load <2 x i64>* %B
149 %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
150 %tmp4 = and <2 x i64> %tmp1, %tmp3
151 ret <2 x i64> %tmp4
152 }
153
154 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
155 ;CHECK: v_eori8:
156 ;CHECK: veor
157 %tmp1 = load <8 x i8>* %A
158 %tmp2 = load <8 x i8>* %B
159 %tmp3 = xor <8 x i8> %tmp1, %tmp2
160 ret <8 x i8> %tmp3
161 }
162
163 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
164 ;CHECK: v_eori16:
165 ;CHECK: veor
166 %tmp1 = load <4 x i16>* %A
167 %tmp2 = load <4 x i16>* %B
168 %tmp3 = xor <4 x i16> %tmp1, %tmp2
169 ret <4 x i16> %tmp3
170 }
171
172 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
173 ;CHECK: v_eori32:
174 ;CHECK: veor
175 %tmp1 = load <2 x i32>* %A
176 %tmp2 = load <2 x i32>* %B
177 %tmp3 = xor <2 x i32> %tmp1, %tmp2
178 ret <2 x i32> %tmp3
179 }
180
181 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
182 ;CHECK: v_eori64:
183 ;CHECK: veor
184 %tmp1 = load <1 x i64>* %A
185 %tmp2 = load <1 x i64>* %B
186 %tmp3 = xor <1 x i64> %tmp1, %tmp2
187 ret <1 x i64> %tmp3
188 }
189
190 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
191 ;CHECK: v_eorQi8:
192 ;CHECK: veor
193 %tmp1 = load <16 x i8>* %A
194 %tmp2 = load <16 x i8>* %B
195 %tmp3 = xor <16 x i8> %tmp1, %tmp2
196 ret <16 x i8> %tmp3
197 }
198
199 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
200 ;CHECK: v_eorQi16:
201 ;CHECK: veor
202 %tmp1 = load <8 x i16>* %A
203 %tmp2 = load <8 x i16>* %B
204 %tmp3 = xor <8 x i16> %tmp1, %tmp2
205 ret <8 x i16> %tmp3
206 }
207
208 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
209 ;CHECK: v_eorQi32:
210 ;CHECK: veor
211 %tmp1 = load <4 x i32>* %A
212 %tmp2 = load <4 x i32>* %B
213 %tmp3 = xor <4 x i32> %tmp1, %tmp2
214 ret <4 x i32> %tmp3
215 }
216
217 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
218 ;CHECK: v_eorQi64:
219 ;CHECK: veor
220 %tmp1 = load <2 x i64>* %A
221 %tmp2 = load <2 x i64>* %B
222 %tmp3 = xor <2 x i64> %tmp1, %tmp2
223 ret <2 x i64> %tmp3
224 }
225
226 define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
227 ;CHECK: v_mvni8:
228 ;CHECK: vmvn
229 %tmp1 = load <8 x i8>* %A
230 %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
231 ret <8 x i8> %tmp2
232 }
233
234 define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
235 ;CHECK: v_mvni16:
236 ;CHECK: vmvn
237 %tmp1 = load <4 x i16>* %A
238 %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
239 ret <4 x i16> %tmp2
240 }
241
242 define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
243 ;CHECK: v_mvni32:
244 ;CHECK: vmvn
245 %tmp1 = load <2 x i32>* %A
246 %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
247 ret <2 x i32> %tmp2
248 }
249
250 define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
251 ;CHECK: v_mvni64:
252 ;CHECK: vmvn
253 %tmp1 = load <1 x i64>* %A
254 %tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
255 ret <1 x i64> %tmp2
256 }
257
258 define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
259 ;CHECK: v_mvnQi8:
260 ;CHECK: vmvn
261 %tmp1 = load <16 x i8>* %A
262 %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
263 ret <16 x i8> %tmp2
264 }
265
266 define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
267 ;CHECK: v_mvnQi16:
268 ;CHECK: vmvn
269 %tmp1 = load <8 x i16>* %A
270 %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
271 ret <8 x i16> %tmp2
272 }
273
274 define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
275 ;CHECK: v_mvnQi32:
276 ;CHECK: vmvn
277 %tmp1 = load <4 x i32>* %A
278 %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
279 ret <4 x i32> %tmp2
280 }
281
282 define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
283 ;CHECK: v_mvnQi64:
284 ;CHECK: vmvn
285 %tmp1 = load <2 x i64>* %A
286 %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
287 ret <2 x i64> %tmp2
288 }
289
290 define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
291 ;CHECK: v_orri8:
292 ;CHECK: vorr
293 %tmp1 = load <8 x i8>* %A
294 %tmp2 = load <8 x i8>* %B
295 %tmp3 = or <8 x i8> %tmp1, %tmp2
296 ret <8 x i8> %tmp3
297 }
298
299 define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
300 ;CHECK: v_orri16:
301 ;CHECK: vorr
302 %tmp1 = load <4 x i16>* %A
303 %tmp2 = load <4 x i16>* %B
304 %tmp3 = or <4 x i16> %tmp1, %tmp2
305 ret <4 x i16> %tmp3
306 }
307
308 define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
309 ;CHECK: v_orri32:
310 ;CHECK: vorr
311 %tmp1 = load <2 x i32>* %A
312 %tmp2 = load <2 x i32>* %B
313 %tmp3 = or <2 x i32> %tmp1, %tmp2
314 ret <2 x i32> %tmp3
315 }
316
317 define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
318 ;CHECK: v_orri64:
319 ;CHECK: vorr
320 %tmp1 = load <1 x i64>* %A
321 %tmp2 = load <1 x i64>* %B
322 %tmp3 = or <1 x i64> %tmp1, %tmp2
323 ret <1 x i64> %tmp3
324 }
325
326 define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
327 ;CHECK: v_orrQi8:
328 ;CHECK: vorr
329 %tmp1 = load <16 x i8>* %A
330 %tmp2 = load <16 x i8>* %B
331 %tmp3 = or <16 x i8> %tmp1, %tmp2
332 ret <16 x i8> %tmp3
333 }
334
335 define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
336 ;CHECK: v_orrQi16:
337 ;CHECK: vorr
338 %tmp1 = load <8 x i16>* %A
339 %tmp2 = load <8 x i16>* %B
340 %tmp3 = or <8 x i16> %tmp1, %tmp2
341 ret <8 x i16> %tmp3
342 }
343
344 define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
345 ;CHECK: v_orrQi32:
346 ;CHECK: vorr
347 %tmp1 = load <4 x i32>* %A
348 %tmp2 = load <4 x i32>* %B
349 %tmp3 = or <4 x i32> %tmp1, %tmp2
350 ret <4 x i32> %tmp3
351 }
352
353 define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
354 ;CHECK: v_orrQi64:
355 ;CHECK: vorr
356 %tmp1 = load <2 x i64>* %A
357 %tmp2 = load <2 x i64>* %B
358 %tmp3 = or <2 x i64> %tmp1, %tmp2
359 ret <2 x i64> %tmp3
360 }
361
362 define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
363 ;CHECK: v_orni8:
364 ;CHECK: vorn
365 %tmp1 = load <8 x i8>* %A
366 %tmp2 = load <8 x i8>* %B
367 %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
368 %tmp4 = or <8 x i8> %tmp1, %tmp3
369 ret <8 x i8> %tmp4
370 }
371
372 define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
373 ;CHECK: v_orni16:
374 ;CHECK: vorn
375 %tmp1 = load <4 x i16>* %A
376 %tmp2 = load <4 x i16>* %B
377 %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
378 %tmp4 = or <4 x i16> %tmp1, %tmp3
379 ret <4 x i16> %tmp4
380 }
381
382 define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
383 ;CHECK: v_orni32:
384 ;CHECK: vorn
385 %tmp1 = load <2 x i32>* %A
386 %tmp2 = load <2 x i32>* %B
387 %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
388 %tmp4 = or <2 x i32> %tmp1, %tmp3
389 ret <2 x i32> %tmp4
390 }
391
392 define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
393 ;CHECK: v_orni64:
394 ;CHECK: vorn
395 %tmp1 = load <1 x i64>* %A
396 %tmp2 = load <1 x i64>* %B
397 %tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
398 %tmp4 = or <1 x i64> %tmp1, %tmp3
399 ret <1 x i64> %tmp4
400 }
401
402 define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
403 ;CHECK: v_ornQi8:
404 ;CHECK: vorn
405 %tmp1 = load <16 x i8>* %A
406 %tmp2 = load <16 x i8>* %B
407 %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
408 %tmp4 = or <16 x i8> %tmp1, %tmp3
409 ret <16 x i8> %tmp4
410 }
411
412 define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
413 ;CHECK: v_ornQi16:
414 ;CHECK: vorn
415 %tmp1 = load <8 x i16>* %A
416 %tmp2 = load <8 x i16>* %B
417 %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
418 %tmp4 = or <8 x i16> %tmp1, %tmp3
419 ret <8 x i16> %tmp4
420 }
421
422 define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
423 ;CHECK: v_ornQi32:
424 ;CHECK: vorn
425 %tmp1 = load <4 x i32>* %A
426 %tmp2 = load <4 x i32>* %B
427 %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
428 %tmp4 = or <4 x i32> %tmp1, %tmp3
429 ret <4 x i32> %tmp4
430 }
431
432 define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
433 ;CHECK: v_ornQi64:
434 ;CHECK: vorn
435 %tmp1 = load <2 x i64>* %A
436 %tmp2 = load <2 x i64>* %B
437 %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
438 %tmp4 = or <2 x i64> %tmp1, %tmp3
439 ret <2 x i64> %tmp4
440 }
441
442 define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
443 ;CHECK: vtsti8:
444 ;CHECK: vtst.i8
445 %tmp1 = load <8 x i8>* %A
446 %tmp2 = load <8 x i8>* %B
447 %tmp3 = and <8 x i8> %tmp1, %tmp2
448 %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
449 %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
450 ret <8 x i8> %tmp5
451 }
452
453 define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
454 ;CHECK: vtsti16:
455 ;CHECK: vtst.i16
456 %tmp1 = load <4 x i16>* %A
457 %tmp2 = load <4 x i16>* %B
458 %tmp3 = and <4 x i16> %tmp1, %tmp2
459 %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
460 %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
461 ret <4 x i16> %tmp5
462 }
463
464 define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
465 ;CHECK: vtsti32:
466 ;CHECK: vtst.i32
467 %tmp1 = load <2 x i32>* %A
468 %tmp2 = load <2 x i32>* %B
469 %tmp3 = and <2 x i32> %tmp1, %tmp2
470 %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
471 %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
472 ret <2 x i32> %tmp5
473 }
474
475 define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
476 ;CHECK: vtstQi8:
477 ;CHECK: vtst.i8
478 %tmp1 = load <16 x i8>* %A
479 %tmp2 = load <16 x i8>* %B
480 %tmp3 = and <16 x i8> %tmp1, %tmp2
481 %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
482 %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
483 ret <16 x i8> %tmp5
484 }
485
486 define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
487 ;CHECK: vtstQi16:
488 ;CHECK: vtst.i16
489 %tmp1 = load <8 x i16>* %A
490 %tmp2 = load <8 x i16>* %B
491 %tmp3 = and <8 x i16> %tmp1, %tmp2
492 %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
493 %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
494 ret <8 x i16> %tmp5
495 }
496
497 define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
498 ;CHECK: vtstQi32:
499 ;CHECK: vtst.i32
500 %tmp1 = load <4 x i32>* %A
501 %tmp2 = load <4 x i32>* %B
502 %tmp3 = and <4 x i32> %tmp1, %tmp2
503 %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
504 %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
505 ret <4 x i32> %tmp5
506 }
138138 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
139139 ret <4 x i32> %tmp4
140140 }
141
142 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
143 ;CHECK: vacgef32:
144 ;CHECK: vacge.f32
145 %tmp1 = load <2 x float>* %A
146 %tmp2 = load <2 x float>* %B
147 %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
148 ret <2 x i32> %tmp3
149 }
150
151 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
152 ;CHECK: vacgeQf32:
153 ;CHECK: vacge.f32
154 %tmp1 = load <4 x float>* %A
155 %tmp2 = load <4 x float>* %B
156 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
157 ret <4 x i32> %tmp3
158 }
159
160 declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
161 declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
138138 %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
139139 ret <4 x i32> %tmp4
140140 }
141
142 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
143 ;CHECK: vacgtf32:
144 ;CHECK: vacgt.f32
145 %tmp1 = load <2 x float>* %A
146 %tmp2 = load <2 x float>* %B
147 %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
148 ret <2 x i32> %tmp3
149 }
150
151 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
152 ;CHECK: vacgtQf32:
153 ;CHECK: vacgt.f32
154 %tmp1 = load <4 x float>* %A
155 %tmp2 = load <4 x float>* %B
156 %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
157 ret <4 x i32> %tmp3
158 }
159
160 declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
161 declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+0
-57
test/CodeGen/ARM/vcls.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
3 ;CHECK: vclss8:
4 ;CHECK: vcls.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
7 ret <8 x i8> %tmp2
8 }
9
10 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
11 ;CHECK: vclss16:
12 ;CHECK: vcls.s16
13 %tmp1 = load <4 x i16>* %A
14 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
15 ret <4 x i16> %tmp2
16 }
17
18 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
19 ;CHECK: vclss32:
20 ;CHECK: vcls.s32
21 %tmp1 = load <2 x i32>* %A
22 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
23 ret <2 x i32> %tmp2
24 }
25
26 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
27 ;CHECK: vclsQs8:
28 ;CHECK: vcls.s8
29 %tmp1 = load <16 x i8>* %A
30 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
31 ret <16 x i8> %tmp2
32 }
33
34 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
35 ;CHECK: vclsQs16:
36 ;CHECK: vcls.s16
37 %tmp1 = load <8 x i16>* %A
38 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
39 ret <8 x i16> %tmp2
40 }
41
42 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
43 ;CHECK: vclsQs32:
44 ;CHECK: vcls.s32
45 %tmp1 = load <4 x i32>* %A
46 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
47 ret <4 x i32> %tmp2
48 }
49
50 declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
51 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
52 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
53
54 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
55 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
56 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
+0
-57
test/CodeGen/ARM/vclz.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
3 ;CHECK: vclz8:
4 ;CHECK: vclz.i8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
7 ret <8 x i8> %tmp2
8 }
9
10 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
11 ;CHECK: vclz16:
12 ;CHECK: vclz.i16
13 %tmp1 = load <4 x i16>* %A
14 %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
15 ret <4 x i16> %tmp2
16 }
17
18 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
19 ;CHECK: vclz32:
20 ;CHECK: vclz.i32
21 %tmp1 = load <2 x i32>* %A
22 %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
23 ret <2 x i32> %tmp2
24 }
25
26 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
27 ;CHECK: vclzQ8:
28 ;CHECK: vclz.i8
29 %tmp1 = load <16 x i8>* %A
30 %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
31 ret <16 x i8> %tmp2
32 }
33
34 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
35 ;CHECK: vclzQ16:
36 ;CHECK: vclz.i16
37 %tmp1 = load <8 x i16>* %A
38 %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
39 ret <8 x i16> %tmp2
40 }
41
42 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
43 ;CHECK: vclzQ32:
44 ;CHECK: vclz.i32
45 %tmp1 = load <4 x i32>* %A
46 %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
47 ret <4 x i32> %tmp2
48 }
49
50 declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
51 declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
52 declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
53
54 declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
55 declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
56 declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
1717
1818 declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
1919 declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
20
21 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
22 ;CHECK: vclz8:
23 ;CHECK: vclz.i8
24 %tmp1 = load <8 x i8>* %A
25 %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
26 ret <8 x i8> %tmp2
27 }
28
29 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
30 ;CHECK: vclz16:
31 ;CHECK: vclz.i16
32 %tmp1 = load <4 x i16>* %A
33 %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
34 ret <4 x i16> %tmp2
35 }
36
37 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
38 ;CHECK: vclz32:
39 ;CHECK: vclz.i32
40 %tmp1 = load <2 x i32>* %A
41 %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
42 ret <2 x i32> %tmp2
43 }
44
45 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
46 ;CHECK: vclzQ8:
47 ;CHECK: vclz.i8
48 %tmp1 = load <16 x i8>* %A
49 %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
50 ret <16 x i8> %tmp2
51 }
52
53 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
54 ;CHECK: vclzQ16:
55 ;CHECK: vclz.i16
56 %tmp1 = load <8 x i16>* %A
57 %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
58 ret <8 x i16> %tmp2
59 }
60
61 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
62 ;CHECK: vclzQ32:
63 ;CHECK: vclz.i32
64 %tmp1 = load <4 x i32>* %A
65 %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
66 ret <4 x i32> %tmp2
67 }
68
69 declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
70 declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
71 declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
72
73 declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
74 declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
75 declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
76
77 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
78 ;CHECK: vclss8:
79 ;CHECK: vcls.s8
80 %tmp1 = load <8 x i8>* %A
81 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
82 ret <8 x i8> %tmp2
83 }
84
85 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
86 ;CHECK: vclss16:
87 ;CHECK: vcls.s16
88 %tmp1 = load <4 x i16>* %A
89 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
90 ret <4 x i16> %tmp2
91 }
92
93 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
94 ;CHECK: vclss32:
95 ;CHECK: vcls.s32
96 %tmp1 = load <2 x i32>* %A
97 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
98 ret <2 x i32> %tmp2
99 }
100
101 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
102 ;CHECK: vclsQs8:
103 ;CHECK: vcls.s8
104 %tmp1 = load <16 x i8>* %A
105 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
106 ret <16 x i8> %tmp2
107 }
108
109 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
110 ;CHECK: vclsQs16:
111 ;CHECK: vcls.s16
112 %tmp1 = load <8 x i16>* %A
113 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
114 ret <8 x i16> %tmp2
115 }
116
117 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
118 ;CHECK: vclsQs32:
119 ;CHECK: vcls.s32
120 %tmp1 = load <4 x i32>* %A
121 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
122 ret <4 x i32> %tmp2
123 }
124
125 declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
126 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
127 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
128
129 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
130 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
131 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
6262 %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
6363 ret <4 x float> %tmp2
6464 }
65
66 define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
67 ;CHECK: vcvt_n_f32tos32:
68 ;CHECK: vcvt.s32.f32
69 %tmp1 = load <2 x float>* %A
70 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
71 ret <2 x i32> %tmp2
72 }
73
74 define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
75 ;CHECK: vcvt_n_f32tou32:
76 ;CHECK: vcvt.u32.f32
77 %tmp1 = load <2 x float>* %A
78 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
79 ret <2 x i32> %tmp2
80 }
81
82 define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
83 ;CHECK: vcvt_n_s32tof32:
84 ;CHECK: vcvt.f32.s32
85 %tmp1 = load <2 x i32>* %A
86 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
87 ret <2 x float> %tmp2
88 }
89
90 define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
91 ;CHECK: vcvt_n_u32tof32:
92 ;CHECK: vcvt.f32.u32
93 %tmp1 = load <2 x i32>* %A
94 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
95 ret <2 x float> %tmp2
96 }
97
98 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
99 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
100 declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
101 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
102
103 define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
104 ;CHECK: vcvtQ_n_f32tos32:
105 ;CHECK: vcvt.s32.f32
106 %tmp1 = load <4 x float>* %A
107 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
108 ret <4 x i32> %tmp2
109 }
110
111 define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
112 ;CHECK: vcvtQ_n_f32tou32:
113 ;CHECK: vcvt.u32.f32
114 %tmp1 = load <4 x float>* %A
115 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
116 ret <4 x i32> %tmp2
117 }
118
119 define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
120 ;CHECK: vcvtQ_n_s32tof32:
121 ;CHECK: vcvt.f32.s32
122 %tmp1 = load <4 x i32>* %A
123 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
124 ret <4 x float> %tmp2
125 }
126
127 define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
128 ;CHECK: vcvtQ_n_u32tof32:
129 ;CHECK: vcvt.f32.u32
130 %tmp1 = load <4 x i32>* %A
131 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
132 ret <4 x float> %tmp2
133 }
134
135 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
136 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
137 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
138 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
139
+0
-76
test/CodeGen/ARM/vcvt_n.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
3 ;CHECK: vcvt_f32tos32:
4 ;CHECK: vcvt.s32.f32
5 %tmp1 = load <2 x float>* %A
6 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
7 ret <2 x i32> %tmp2
8 }
9
10 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
11 ;CHECK: vcvt_f32tou32:
12 ;CHECK: vcvt.u32.f32
13 %tmp1 = load <2 x float>* %A
14 %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
15 ret <2 x i32> %tmp2
16 }
17
18 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
19 ;CHECK: vcvt_s32tof32:
20 ;CHECK: vcvt.f32.s32
21 %tmp1 = load <2 x i32>* %A
22 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
23 ret <2 x float> %tmp2
24 }
25
26 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
27 ;CHECK: vcvt_u32tof32:
28 ;CHECK: vcvt.f32.u32
29 %tmp1 = load <2 x i32>* %A
30 %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
31 ret <2 x float> %tmp2
32 }
33
34 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
35 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
36 declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
37 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
38
39 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
40 ;CHECK: vcvtQ_f32tos32:
41 ;CHECK: vcvt.s32.f32
42 %tmp1 = load <4 x float>* %A
43 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
44 ret <4 x i32> %tmp2
45 }
46
47 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
48 ;CHECK: vcvtQ_f32tou32:
49 ;CHECK: vcvt.u32.f32
50 %tmp1 = load <4 x float>* %A
51 %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
52 ret <4 x i32> %tmp2
53 }
54
55 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
56 ;CHECK: vcvtQ_s32tof32:
57 ;CHECK: vcvt.f32.s32
58 %tmp1 = load <4 x i32>* %A
59 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
60 ret <4 x float> %tmp2
61 }
62
63 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
64 ;CHECK: vcvtQ_u32tof32:
65 ;CHECK: vcvt.f32.u32
66 %tmp1 = load <4 x i32>* %A
67 %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
68 ret <4 x float> %tmp2
69 }
70
71 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
72 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
73 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
74 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
75
178178 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
179179 ret <4 x float> %tmp2
180180 }
181
182 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
183 ;CHECK: vduplane8:
184 ;CHECK: vdup.8
185 %tmp1 = load <8 x i8>* %A
186 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
187 ret <8 x i8> %tmp2
188 }
189
190 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
191 ;CHECK: vduplane16:
192 ;CHECK: vdup.16
193 %tmp1 = load <4 x i16>* %A
194 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
195 ret <4 x i16> %tmp2
196 }
197
198 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
199 ;CHECK: vduplane32:
200 ;CHECK: vdup.32
201 %tmp1 = load <2 x i32>* %A
202 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
203 ret <2 x i32> %tmp2
204 }
205
206 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
207 ;CHECK: vduplanefloat:
208 ;CHECK: vdup.32
209 %tmp1 = load <2 x float>* %A
210 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
211 ret <2 x float> %tmp2
212 }
213
214 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
215 ;CHECK: vduplaneQ8:
216 ;CHECK: vdup.8
217 %tmp1 = load <8 x i8>* %A
218 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
219 ret <16 x i8> %tmp2
220 }
221
222 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
223 ;CHECK: vduplaneQ16:
224 ;CHECK: vdup.16
225 %tmp1 = load <4 x i16>* %A
226 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
227 ret <8 x i16> %tmp2
228 }
229
230 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
231 ;CHECK: vduplaneQ32:
232 ;CHECK: vdup.32
233 %tmp1 = load <2 x i32>* %A
234 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
235 ret <4 x i32> %tmp2
236 }
237
238 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
239 ;CHECK: vduplaneQfloat:
240 ;CHECK: vdup.32
241 %tmp1 = load <2 x float>* %A
242 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
243 ret <4 x float> %tmp2
244 }
245
246 define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
247 entry:
248 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32>
249 ret <2 x i64> %0
250 }
251
252 define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
253 entry:
254 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32>
255 ret <2 x i64> %0
256 }
257
258 define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
259 entry:
260 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32>
261 ret <2 x double> %0
262 }
263
264 define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
265 entry:
266 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32>
267 ret <2 x double> %0
268 }
+0
-89
test/CodeGen/ARM/vdup_lane.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
3 ;CHECK: vduplane8:
4 ;CHECK: vdup.8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
7 ret <8 x i8> %tmp2
8 }
9
10 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
11 ;CHECK: vduplane16:
12 ;CHECK: vdup.16
13 %tmp1 = load <4 x i16>* %A
14 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
15 ret <4 x i16> %tmp2
16 }
17
18 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
19 ;CHECK: vduplane32:
20 ;CHECK: vdup.32
21 %tmp1 = load <2 x i32>* %A
22 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
23 ret <2 x i32> %tmp2
24 }
25
26 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
27 ;CHECK: vduplanefloat:
28 ;CHECK: vdup.32
29 %tmp1 = load <2 x float>* %A
30 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
31 ret <2 x float> %tmp2
32 }
33
34 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
35 ;CHECK: vduplaneQ8:
36 ;CHECK: vdup.8
37 %tmp1 = load <8 x i8>* %A
38 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
39 ret <16 x i8> %tmp2
40 }
41
42 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
43 ;CHECK: vduplaneQ16:
44 ;CHECK: vdup.16
45 %tmp1 = load <4 x i16>* %A
46 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
47 ret <8 x i16> %tmp2
48 }
49
50 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
51 ;CHECK: vduplaneQ32:
52 ;CHECK: vdup.32
53 %tmp1 = load <2 x i32>* %A
54 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
55 ret <4 x i32> %tmp2
56 }
57
58 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
59 ;CHECK: vduplaneQfloat:
60 ;CHECK: vdup.32
61 %tmp1 = load <2 x float>* %A
62 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
63 ret <4 x float> %tmp2
64 }
65
66 define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
67 entry:
68 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32>
69 ret <2 x i64> %0
70 }
71
72 define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
73 entry:
74 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32>
75 ret <2 x i64> %0
76 }
77
78 define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
79 entry:
80 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32>
81 ret <2 x double> %0
82 }
83
84 define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
85 entry:
86 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32>
87 ret <2 x double> %0
88 }
+0
-73
test/CodeGen/ARM/veor.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: v_eori8:
4 ;CHECK: veor
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = xor <8 x i8> %tmp1, %tmp2
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: v_eori16:
13 ;CHECK: veor
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = xor <4 x i16> %tmp1, %tmp2
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: v_eori32:
22 ;CHECK: veor
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = xor <2 x i32> %tmp1, %tmp2
26 ret <2 x i32> %tmp3
27 }
28
29 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
30 ;CHECK: v_eori64:
31 ;CHECK: veor
32 %tmp1 = load <1 x i64>* %A
33 %tmp2 = load <1 x i64>* %B
34 %tmp3 = xor <1 x i64> %tmp1, %tmp2
35 ret <1 x i64> %tmp3
36 }
37
38 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
39 ;CHECK: v_eorQi8:
40 ;CHECK: veor
41 %tmp1 = load <16 x i8>* %A
42 %tmp2 = load <16 x i8>* %B
43 %tmp3 = xor <16 x i8> %tmp1, %tmp2
44 ret <16 x i8> %tmp3
45 }
46
47 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
48 ;CHECK: v_eorQi16:
49 ;CHECK: veor
50 %tmp1 = load <8 x i16>* %A
51 %tmp2 = load <8 x i16>* %B
52 %tmp3 = xor <8 x i16> %tmp1, %tmp2
53 ret <8 x i16> %tmp3
54 }
55
56 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
57 ;CHECK: v_eorQi32:
58 ;CHECK: veor
59 %tmp1 = load <4 x i32>* %A
60 %tmp2 = load <4 x i32>* %B
61 %tmp3 = xor <4 x i32> %tmp1, %tmp2
62 ret <4 x i32> %tmp3
63 }
64
65 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
66 ;CHECK: v_eorQi64:
67 ;CHECK: veor
68 %tmp1 = load <2 x i64>* %A
69 %tmp2 = load <2 x i64>* %B
70 %tmp3 = xor <2 x i64> %tmp1, %tmp2
71 ret <2 x i64> %tmp3
72 }
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
0 ; RUN: llc < %s -mattr=+neon | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
2 target triple = "thumbv7-elf"
13
24 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
35 ;CHECK: vget_lanes8:
9092 %tmp3 = extractelement <4 x i32> %tmp2, i32 1
9193 ret i32 %tmp3
9294 }
95
96 define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
97 entry:
98 ; CHECK: vmov.u16 r0, d0[1]
99 %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
100 %out_uint16_t = alloca i16 ; [#uses=1]
101 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
102 %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
103 %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1]
104 store i16 %1, i16* %out_uint16_t, align 2
105 br label %return
106
107 return: ; preds = %entry
108 ret void
109 }
110
111 define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
112 entry:
113 ; CHECK: vmov.u8 r0, d0[1]
114 %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
115 %out_uint8_t = alloca i8 ; [#uses=1]
116 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
117 %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
118 %1 = extractelement <8 x i8> %0, i32 1 ; [#uses=1]
119 store i8 %1, i8* %out_uint8_t, align 1
120 br label %return
121
122 return: ; preds = %entry
123 ret void
124 }
125
126 define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
127 entry:
128 ; CHECK: vmov.u16 r0, d0[1]
129 %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
130 %out_uint16_t = alloca i16 ; [#uses=1]
131 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
132 %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
133 %1 = extractelement <8 x i16> %0, i32 1 ; [#uses=1]
134 store i16 %1, i16* %out_uint16_t, align 2
135 br label %return
136
137 return: ; preds = %entry
138 ret void
139 }
140
141 define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
142 entry:
143 ; CHECK: vmov.u8 r0, d0[1]
144 %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
145 %out_uint8_t = alloca i8 ; [#uses=1]
146 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
147 %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
148 %1 = extractelement <16 x i8> %0, i32 1 ; [#uses=1]
149 store i8 %1, i8* %out_uint8_t, align 1
150 br label %return
151
152 return: ; preds = %entry
153 ret void
154 }
155
156 define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
157 ;CHECK: vset_lane8:
158 ;CHECK: vmov.8
159 %tmp1 = load <8 x i8>* %A
160 %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
161 ret <8 x i8> %tmp2
162 }
163
164 define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
165 ;CHECK: vset_lane16:
166 ;CHECK: vmov.16
167 %tmp1 = load <4 x i16>* %A
168 %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
169 ret <4 x i16> %tmp2
170 }
171
172 define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
173 ;CHECK: vset_lane32:
174 ;CHECK: vmov.32
175 %tmp1 = load <2 x i32>* %A
176 %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
177 ret <2 x i32> %tmp2
178 }
179
180 define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
181 ;CHECK: vsetQ_lane8:
182 ;CHECK: vmov.8
183 %tmp1 = load <16 x i8>* %A
184 %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
185 ret <16 x i8> %tmp2
186 }
187
188 define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
189 ;CHECK: vsetQ_lane16:
190 ;CHECK: vmov.16
191 %tmp1 = load <8 x i16>* %A
192 %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
193 ret <8 x i16> %tmp2
194 }
195
196 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
197 ;CHECK: vsetQ_lane32:
198 ;CHECK: vmov.32
199 %tmp1 = load <4 x i32>* %A
200 %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
201 ret <4 x i32> %tmp2
202 }
203
204 define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
205 ;CHECK: test_vset_lanef32:
206 ;CHECK: fcpys
207 ;CHECK: fcpys
208 entry:
209 %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
210 ret <2 x float> %0
211 }
+0
-63
test/CodeGen/ARM/vget_lane2.ll less more
None ; RUN: llc < %s -mattr=+neon | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
2 target triple = "thumbv7-elf"
3
4 define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
5 entry:
6 ; CHECK: vmov.u16 r0, d0[1]
7 %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
8 %out_uint16_t = alloca i16 ; [#uses=1]
9 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
10 %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
11 %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1]
12 store i16 %1, i16* %out_uint16_t, align 2
13 br label %return
14
15 return: ; preds = %entry
16 ret void
17 }
18
19 define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
20 entry:
21 ; CHECK: vmov.u8 r0, d0[1]
22 %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
23 %out_uint8_t = alloca i8 ; [#uses=1]
24 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
25 %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
26 %1 = extractelement <8 x i8> %0, i32 1 ; [#uses=1]
27 store i8 %1, i8* %out_uint8_t, align 1
28 br label %return
29
30 return: ; preds = %entry
31 ret void
32 }
33
34 define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
35 entry:
36 ; CHECK: vmov.u16 r0, d0[1]
37 %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
38 %out_uint16_t = alloca i16 ; [#uses=1]
39 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
40 %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
41 %1 = extractelement <8 x i16> %0, i32 1 ; [#uses=1]
42 store i16 %1, i16* %out_uint16_t, align 2
43 br label %return
44
45 return: ; preds = %entry
46 ret void
47 }
48
49 define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
50 entry:
51 ; CHECK: vmov.u8 r0, d0[1]
52 %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
53 %out_uint8_t = alloca i8 ; [#uses=1]
54 %"alloca point" = bitcast i32 0 to i32 ; [#uses=0]
55 %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
56 %1 = extractelement <16 x i8> %0, i32 1 ; [#uses=1]
57 store i8 %1, i8* %out_uint8_t, align 1
58 br label %return
59
60 return: ; preds = %entry
61 ret void
62 }
122122 declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
123123 declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
124124 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
125
126 define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
127 ;CHECK: vrhadds8:
128 ;CHECK: vrhadd.s8
129 %tmp1 = load <8 x i8>* %A
130 %tmp2 = load <8 x i8>* %B
131 %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
132 ret <8 x i8> %tmp3
133 }
134
135 define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
136 ;CHECK: vrhadds16:
137 ;CHECK: vrhadd.s16
138 %tmp1 = load <4 x i16>* %A
139 %tmp2 = load <4 x i16>* %B
140 %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
141 ret <4 x i16> %tmp3
142 }
143
144 define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
145 ;CHECK: vrhadds32:
146 ;CHECK: vrhadd.s32
147 %tmp1 = load <2 x i32>* %A
148 %tmp2 = load <2 x i32>* %B
149 %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
150 ret <2 x i32> %tmp3
151 }
152
153 define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
154 ;CHECK: vrhaddu8:
155 ;CHECK: vrhadd.u8
156 %tmp1 = load <8 x i8>* %A
157 %tmp2 = load <8 x i8>* %B
158 %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
159 ret <8 x i8> %tmp3
160 }
161
162 define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
163 ;CHECK: vrhaddu16:
164 ;CHECK: vrhadd.u16
165 %tmp1 = load <4 x i16>* %A
166 %tmp2 = load <4 x i16>* %B
167 %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
168 ret <4 x i16> %tmp3
169 }
170
171 define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
172 ;CHECK: vrhaddu32:
173 ;CHECK: vrhadd.u32
174 %tmp1 = load <2 x i32>* %A
175 %tmp2 = load <2 x i32>* %B
176 %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
177 ret <2 x i32> %tmp3
178 }
179
180 define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
181 ;CHECK: vrhaddQs8:
182 ;CHECK: vrhadd.s8
183 %tmp1 = load <16 x i8>* %A
184 %tmp2 = load <16 x i8>* %B
185 %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
186 ret <16 x i8> %tmp3
187 }
188
189 define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
190 ;CHECK: vrhaddQs16:
191 ;CHECK: vrhadd.s16
192 %tmp1 = load <8 x i16>* %A
193 %tmp2 = load <8 x i16>* %B
194 %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
195 ret <8 x i16> %tmp3
196 }
197
198 define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
199 ;CHECK: vrhaddQs32:
200 ;CHECK: vrhadd.s32
201 %tmp1 = load <4 x i32>* %A
202 %tmp2 = load <4 x i32>* %B
203 %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
204 ret <4 x i32> %tmp3
205 }
206
207 define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
208 ;CHECK: vrhaddQu8:
209 ;CHECK: vrhadd.u8
210 %tmp1 = load <16 x i8>* %A
211 %tmp2 = load <16 x i8>* %B
212 %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
213 ret <16 x i8> %tmp3
214 }
215
216 define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
217 ;CHECK: vrhaddQu16:
218 ;CHECK: vrhadd.u16
219 %tmp1 = load <8 x i16>* %A
220 %tmp2 = load <8 x i16>* %B
221 %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
222 ret <8 x i16> %tmp3
223 }
224
225 define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
226 ;CHECK: vrhaddQu32:
227 ;CHECK: vrhadd.u32
228 %tmp1 = load <4 x i32>* %A
229 %tmp2 = load <4 x i32>* %B
230 %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
231 ret <4 x i32> %tmp3
232 }
233
234 declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
235 declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
236 declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
237
238 declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
239 declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
240 declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
241
242 declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
243 declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
244 declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
245
246 declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
247 declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
248 declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+0
-147
test/CodeGen/ARM/vmax.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vmaxs8:
4 ;CHECK: vmax.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vmaxs16:
13 ;CHECK: vmax.s16
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vmaxs32:
22 ;CHECK: vmax.s32
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i32> %tmp3
27 }
28
29 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vmaxu8:
31 ;CHECK: vmax.u8
32 %tmp1 = load <8 x i8>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i8> %tmp3
36 }
37
38 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vmaxu16:
40 ;CHECK: vmax.u16
41 %tmp1 = load <4 x i16>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i16> %tmp3
45 }
46
47 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vmaxu32:
49 ;CHECK: vmax.u32
50 %tmp1 = load <2 x i32>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i32> %tmp3
54 }
55
56 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
57 ;CHECK: vmaxf32:
58 ;CHECK: vmax.f32
59 %tmp1 = load <2 x float>* %A
60 %tmp2 = load <2 x float>* %B
61 %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
62 ret <2 x float> %tmp3
63 }
64
65 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
66 ;CHECK: vmaxQs8:
67 ;CHECK: vmax.s8
68 %tmp1 = load <16 x i8>* %A
69 %tmp2 = load <16 x i8>* %B
70 %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
71 ret <16 x i8> %tmp3
72 }
73
74 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
75 ;CHECK: vmaxQs16:
76 ;CHECK: vmax.s16
77 %tmp1 = load <8 x i16>* %A
78 %tmp2 = load <8 x i16>* %B
79 %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
80 ret <8 x i16> %tmp3
81 }
82
83 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
84 ;CHECK: vmaxQs32:
85 ;CHECK: vmax.s32
86 %tmp1 = load <4 x i32>* %A
87 %tmp2 = load <4 x i32>* %B
88 %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
89 ret <4 x i32> %tmp3
90 }
91
92 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
93 ;CHECK: vmaxQu8:
94 ;CHECK: vmax.u8
95 %tmp1 = load <16 x i8>* %A
96 %tmp2 = load <16 x i8>* %B
97 %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
98 ret <16 x i8> %tmp3
99 }
100
101 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
102 ;CHECK: vmaxQu16:
103 ;CHECK: vmax.u16
104 %tmp1 = load <8 x i16>* %A
105 %tmp2 = load <8 x i16>* %B
106 %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
107 ret <8 x i16> %tmp3
108 }
109
110 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
111 ;CHECK: vmaxQu32:
112 ;CHECK: vmax.u32
113 %tmp1 = load <4 x i32>* %A
114 %tmp2 = load <4 x i32>* %B
115 %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
116 ret <4 x i32> %tmp3
117 }
118
119 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
120 ;CHECK: vmaxQf32:
121 ;CHECK: vmax.f32
122 %tmp1 = load <4 x float>* %A
123 %tmp2 = load <4 x float>* %B
124 %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
125 ret <4 x float> %tmp3
126 }
127
128 declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
129 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
130 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
131
132 declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
133 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
134 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
135
136 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
137
138 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
139 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
140 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
141
142 declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
143 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
144 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
145
146 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+0
-147
test/CodeGen/ARM/vmin.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vmins8:
4 ;CHECK: vmin.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vmins16:
13 ;CHECK: vmin.s16
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vmins32:
22 ;CHECK: vmin.s32
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i32> %tmp3
27 }
28
29 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vminu8:
31 ;CHECK: vmin.u8
32 %tmp1 = load <8 x i8>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i8> %tmp3
36 }
37
38 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vminu16:
40 ;CHECK: vmin.u16
41 %tmp1 = load <4 x i16>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i16> %tmp3
45 }
46
47 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vminu32:
49 ;CHECK: vmin.u32
50 %tmp1 = load <2 x i32>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i32> %tmp3
54 }
55
56 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
57 ;CHECK: vminf32:
58 ;CHECK: vmin.f32
59 %tmp1 = load <2 x float>* %A
60 %tmp2 = load <2 x float>* %B
61 %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
62 ret <2 x float> %tmp3
63 }
64
65 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
66 ;CHECK: vminQs8:
67 ;CHECK: vmin.s8
68 %tmp1 = load <16 x i8>* %A
69 %tmp2 = load <16 x i8>* %B
70 %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
71 ret <16 x i8> %tmp3
72 }
73
74 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
75 ;CHECK: vminQs16:
76 ;CHECK: vmin.s16
77 %tmp1 = load <8 x i16>* %A
78 %tmp2 = load <8 x i16>* %B
79 %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
80 ret <8 x i16> %tmp3
81 }
82
83 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
84 ;CHECK: vminQs32:
85 ;CHECK: vmin.s32
86 %tmp1 = load <4 x i32>* %A
87 %tmp2 = load <4 x i32>* %B
88 %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
89 ret <4 x i32> %tmp3
90 }
91
92 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
93 ;CHECK: vminQu8:
94 ;CHECK: vmin.u8
95 %tmp1 = load <16 x i8>* %A
96 %tmp2 = load <16 x i8>* %B
97 %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
98 ret <16 x i8> %tmp3
99 }
100
101 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
102 ;CHECK: vminQu16:
103 ;CHECK: vmin.u16
104 %tmp1 = load <8 x i16>* %A
105 %tmp2 = load <8 x i16>* %B
106 %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
107 ret <8 x i16> %tmp3
108 }
109
110 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
111 ;CHECK: vminQu32:
112 ;CHECK: vmin.u32
113 %tmp1 = load <4 x i32>* %A
114 %tmp2 = load <4 x i32>* %B
115 %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
116 ret <4 x i32> %tmp3
117 }
118
119 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
120 ;CHECK: vminQf32:
121 ;CHECK: vmin.f32
122 %tmp1 = load <4 x float>* %A
123 %tmp2 = load <4 x float>* %B
124 %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
125 ret <4 x float> %tmp3
126 }
127
128 declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
129 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
130 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
131
132 declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
133 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
134 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
135
136 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
137
138 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
139 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
140 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
141
142 declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
143 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
144 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
145
146 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
3 ;CHECK: vmins8:
4 ;CHECK: vmin.s8
5 %tmp1 = load <8 x i8>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
8 ret <8 x i8> %tmp3
9 }
10
11 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
12 ;CHECK: vmins16:
13 ;CHECK: vmin.s16
14 %tmp1 = load <4 x i16>* %A
15 %tmp2 = load <4 x i16>* %B
16 %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
17 ret <4 x i16> %tmp3
18 }
19
20 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
21 ;CHECK: vmins32:
22 ;CHECK: vmin.s32
23 %tmp1 = load <2 x i32>* %A
24 %tmp2 = load <2 x i32>* %B
25 %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
26 ret <2 x i32> %tmp3
27 }
28
29 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
30 ;CHECK: vminu8:
31 ;CHECK: vmin.u8
32 %tmp1 = load <8 x i8>* %A
33 %tmp2 = load <8 x i8>* %B
34 %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
35 ret <8 x i8> %tmp3
36 }
37
38 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
39 ;CHECK: vminu16:
40 ;CHECK: vmin.u16
41 %tmp1 = load <4 x i16>* %A
42 %tmp2 = load <4 x i16>* %B
43 %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
44 ret <4 x i16> %tmp3
45 }
46
47 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
48 ;CHECK: vminu32:
49 ;CHECK: vmin.u32
50 %tmp1 = load <2 x i32>* %A
51 %tmp2 = load <2 x i32>* %B
52 %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
53 ret <2 x i32> %tmp3
54 }
55
56 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
57 ;CHECK: vminf32:
58 ;CHECK: vmin.f32
59 %tmp1 = load <2 x float>* %A
60 %tmp2 = load <2 x float>* %B
61 %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
62 ret <2 x float> %tmp3
63 }
64
65 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
66 ;CHECK: vminQs8:
67 ;CHECK: vmin.s8
68 %tmp1 = load <16 x i8>* %A
69 %tmp2 = load <16 x i8>* %B
70 %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
71 ret <16 x i8> %tmp3
72 }
73
74 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
75 ;CHECK: vminQs16:
76 ;CHECK: vmin.s16
77 %tmp1 = load <8 x i16>* %A
78 %tmp2 = load <8 x i16>* %B
79 %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
80 ret <8 x i16> %tmp3
81 }
82
83 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
84 ;CHECK: vminQs32:
85 ;CHECK: vmin.s32
86 %tmp1 = load <4 x i32>* %A
87 %tmp2 = load <4 x i32>* %B
88 %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
89 ret <4 x i32> %tmp3
90 }
91
92 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
93 ;CHECK: vminQu8:
94 ;CHECK: vmin.u8
95 %tmp1 = load <16 x i8>* %A
96 %tmp2 = load <16 x i8>* %B
97 %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
98 ret <16 x i8> %tmp3
99 }
100
101 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
102 ;CHECK: vminQu16:
103 ;CHECK: vmin.u16
104 %tmp1 = load <8 x i16>* %A
105 %tmp2 = load <8 x i16>* %B
106 %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
107 ret <8 x i16> %tmp3
108 }
109
110 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
111 ;CHECK: vminQu32:
112 ;CHECK: vmin.u32
113 %tmp1 = load <4 x i32>* %A
114 %tmp2 = load <4 x i32>* %B
115 %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
116 ret <4 x i32> %tmp3
117 }
118
119 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
120 ;CHECK: vminQf32:
121 ;CHECK: vmin.f32
122 %tmp1 = load <4 x float>* %A
123 %tmp2 = load <4 x float>* %B
124 %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
125 ret <4 x float> %tmp3
126 }
127
128 declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
129 declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
130 declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
131
132 declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
133 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
134 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
135
136 declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
137
138 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
139 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
140 declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
141
142 declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
143 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
144 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
145
146 declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
147
148 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
149 ;CHECK: vmaxs8:
150 ;CHECK: vmax.s8
151 %tmp1 = load <8 x i8>* %A
152 %tmp2 = load <8 x i8>* %B
153 %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
154 ret <8 x i8> %tmp3
155 }
156
157 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
158 ;CHECK: vmaxs16:
159 ;CHECK: vmax.s16
160 %tmp1 = load <4 x i16>* %A
161 %tmp2 = load <4 x i16>* %B
162 %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
163 ret <4 x i16> %tmp3
164 }
165
166 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
167 ;CHECK: vmaxs32:
168 ;CHECK: vmax.s32
169 %tmp1 = load <2 x i32>* %A
170 %tmp2 = load <2 x i32>* %B
171 %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
172 ret <2 x i32> %tmp3
173 }
174
175 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
176 ;CHECK: vmaxu8:
177 ;CHECK: vmax.u8
178 %tmp1 = load <8 x i8>* %A
179 %tmp2 = load <8 x i8>* %B
180 %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
181 ret <8 x i8> %tmp3
182 }
183
184 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
185 ;CHECK: vmaxu16:
186 ;CHECK: vmax.u16
187 %tmp1 = load <4 x i16>* %A
188 %tmp2 = load <4 x i16>* %B
189 %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
190 ret <4 x i16> %tmp3
191 }
192
193 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
194 ;CHECK: vmaxu32:
195 ;CHECK: vmax.u32
196 %tmp1 = load <2 x i32>* %A
197 %tmp2 = load <2 x i32>* %B
198 %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
199 ret <2 x i32> %tmp3
200 }
201
202 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
203 ;CHECK: vmaxf32:
204 ;CHECK: vmax.f32
205 %tmp1 = load <2 x float>* %A
206 %tmp2 = load <2 x float>* %B
207 %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
208 ret <2 x float> %tmp3
209 }
210
211 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
212 ;CHECK: vmaxQs8:
213 ;CHECK: vmax.s8
214 %tmp1 = load <16 x i8>* %A
215 %tmp2 = load <16 x i8>* %B
216 %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
217 ret <16 x i8> %tmp3
218 }
219
220 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
221 ;CHECK: vmaxQs16:
222 ;CHECK: vmax.s16
223 %tmp1 = load <8 x i16>* %A
224 %tmp2 = load <8 x i16>* %B
225 %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
226 ret <8 x i16> %tmp3
227 }
228
229 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
230 ;CHECK: vmaxQs32:
231 ;CHECK: vmax.s32
232 %tmp1 = load <4 x i32>* %A
233 %tmp2 = load <4 x i32>* %B
234 %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
235 ret <4 x i32> %tmp3
236 }
237
238 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
239 ;CHECK: vmaxQu8:
240 ;CHECK: vmax.u8
241 %tmp1 = load <16 x i8>* %A
242 %tmp2 = load <16 x i8>* %B
243 %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
244 ret <16 x i8> %tmp3
245 }
246
247 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
248 ;CHECK: vmaxQu16:
249 ;CHECK: vmax.u16
250 %tmp1 = load <8 x i16>* %A
251 %tmp2 = load <8 x i16>* %B
252 %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
253 ret <8 x i16> %tmp3
254 }
255
256 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
257 ;CHECK: vmaxQu32:
258 ;CHECK: vmax.u32
259 %tmp1 = load <4 x i32>* %A
260 %tmp2 = load <4 x i32>* %B
261 %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
262 ret <4 x i32> %tmp3
263 }
264
265 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
266 ;CHECK: vmaxQf32:
267 ;CHECK: vmax.f32
268 %tmp1 = load <4 x float>* %A
269 %tmp2 = load <4 x float>* %B
270 %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
271 ret <4 x float> %tmp3
272 }
273
274 declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
275 declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
276 declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
277
278 declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
279 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
280 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
281
282 declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
283
284 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
285 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
286 declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
287
288 declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
289 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
290 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
291
292 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
8686 %tmp5 = add <4 x float> %tmp1, %tmp4
8787 ret <4 x float> %tmp5
8888 }
89
90 define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
91 ;CHECK: vmlals8:
92 ;CHECK: vmlal.s8
93 %tmp1 = load <8 x i16>* %A
94 %tmp2 = load <8 x i8>* %B
95 %tmp3 = load <8 x i8>* %C
96 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
97 ret <8 x i16> %tmp4
98 }
99
100 define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
101 ;CHECK: vmlals16:
102 ;CHECK: vmlal.s16
103 %tmp1 = load <4 x i32>* %A
104 %tmp2 = load <4 x i16>* %B
105 %tmp3 = load <4 x i16>* %C
106 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
107 ret <4 x i32> %tmp4
108 }
109
110 define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
111 ;CHECK: vmlals32:
112 ;CHECK: vmlal.s32
113 %tmp1 = load <2 x i64>* %A
114 %tmp2 = load <2 x i32>* %B
115 %tmp3 = load <2 x i32>* %C
116 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
117 ret <2 x i64> %tmp4
118 }
119
120 define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
121 ;CHECK: vmlalu8:
122 ;CHECK: vmlal.u8
123 %tmp1 = load <8 x i16>* %A
124 %tmp2 = load <8 x i8>* %B
125 %tmp3 = load <8 x i8>* %C
126 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
127 ret <8 x i16> %tmp4
128 }
129
130 define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
131 ;CHECK: vmlalu16:
132 ;CHECK: vmlal.u16
133 %tmp1 = load <4 x i32>* %A
134 %tmp2 = load <4 x i16>* %B
135 %tmp3 = load <4 x i16>* %C
136 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
137 ret <4 x i32> %tmp4
138 }
139
140 define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
141 ;CHECK: vmlalu32:
142 ;CHECK: vmlal.u32
143 %tmp1 = load <2 x i64>* %A
144 %tmp2 = load <2 x i32>* %B
145 %tmp3 = load <2 x i32>* %C
146 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
147 ret <2 x i64> %tmp4
148 }
149
150 define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
151 entry:
152 ; CHECK: test_vmlal_lanes16
153 ; CHECK: vmlal.s16 q0, d2, d3[1]
154 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
155 %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
156 ret <4 x i32> %1
157 }
158
159 define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
160 entry:
161 ; CHECK: test_vmlal_lanes32
162 ; CHECK: vmlal.s32 q0, d2, d3[1]
163 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
164 %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
165 ret <2 x i64> %1
166 }
167
168 define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
169 entry:
170 ; CHECK: test_vmlal_laneu16
171 ; CHECK: vmlal.u16 q0, d2, d3[1]
172 %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
173 %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
174 ret <4 x i32> %1
175 }
176
177 define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
178 entry:
179 ; CHECK: test_vmlal_laneu32
180 ; CHECK: vmlal.u32 q0, d2, d3[1]
181 %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
182 %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
183 ret <2 x i64> %1
184 }
185
186 declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
187 declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
188 declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
189
190 declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
191 declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
192 declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+0
-69
test/CodeGen/ARM/vmlal.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
3 ;CHECK: vmlals8:
4 ;CHECK: vmlal.s8
5 %tmp1 = load <8 x i16>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = load <8 x i8>* %C
8 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
9 ret <8 x i16> %tmp4
10 }
11
12 define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
13 ;CHECK: vmlals16:
14 ;CHECK: vmlal.s16
15 %tmp1 = load <4 x i32>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = load <4 x i16>* %C
18 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
19 ret <4 x i32> %tmp4
20 }
21
22 define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
23 ;CHECK: vmlals32:
24 ;CHECK: vmlal.s32
25 %tmp1 = load <2 x i64>* %A
26 %tmp2 = load <2 x i32>* %B
27 %tmp3 = load <2 x i32>* %C
28 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
29 ret <2 x i64> %tmp4
30 }
31
32 define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
33 ;CHECK: vmlalu8:
34 ;CHECK: vmlal.u8
35 %tmp1 = load <8 x i16>* %A
36 %tmp2 = load <8 x i8>* %B
37 %tmp3 = load <8 x i8>* %C
38 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
39 ret <8 x i16> %tmp4
40 }
41
42 define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
43 ;CHECK: vmlalu16:
44 ;CHECK: vmlal.u16
45 %tmp1 = load <4 x i32>* %A
46 %tmp2 = load <4 x i16>* %B
47 %tmp3 = load <4 x i16>* %C
48 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
49 ret <4 x i32> %tmp4
50 }
51
52 define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
53 ;CHECK: vmlalu32:
54 ;CHECK: vmlal.u32
55 %tmp1 = load <2 x i64>* %A
56 %tmp2 = load <2 x i32>* %B
57 %tmp3 = load <2 x i32>* %C
58 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
59 ret <2 x i64> %tmp4
60 }
61
62 declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
63 declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
64 declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
65
66 declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
67 declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
68 declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+0
-47
test/CodeGen/ARM/vmlal_lane.ll less more
None ; RUN: llc -mattr=+neon < %s | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
2 target triple = "thumbv7-elf"
3
4 define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
5 entry:
6 ; CHECK: test_vmlal_lanes16
7 ; CHECK: vmlal.s16 q0, d2, d3[1]
8 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
9 %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
10 ret <4 x i32> %1
11 }
12
13 declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
14
15 define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
16 entry:
17 ; CHECK: test_vmlal_lanes32
18 ; CHECK: vmlal.s32 q0, d2, d3[1]
19 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
20 %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
21 ret <2 x i64> %1
22 }
23
24 declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
25
26 define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
27 entry:
28 ; CHECK: test_vmlal_laneu16
29 ; CHECK: vmlal.u16 q0, d2, d3[1]
30 %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
31 %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
32 ret <4 x i32> %1
33 }
34
35 declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
36
37 define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
38 entry:
39 ; CHECK: test_vmlal_laneu32
40 ; CHECK: vmlal.u32 q0, d2, d3[1]
41 %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
42 %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
43 ret <2 x i64> %1
44 }
45
46 declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
8686 %tmp5 = sub <4 x float> %tmp1, %tmp4
8787 ret <4 x float> %tmp5
8888 }
89
90 define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
91 ;CHECK: vmlsls8:
92 ;CHECK: vmlsl.s8
93 %tmp1 = load <8 x i16>* %A
94 %tmp2 = load <8 x i8>* %B
95 %tmp3 = load <8 x i8>* %C
96 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
97 ret <8 x i16> %tmp4
98 }
99
100 define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
101 ;CHECK: vmlsls16:
102 ;CHECK: vmlsl.s16
103 %tmp1 = load <4 x i32>* %A
104 %tmp2 = load <4 x i16>* %B
105 %tmp3 = load <4 x i16>* %C
106 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
107 ret <4 x i32> %tmp4
108 }
109
110 define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
111 ;CHECK: vmlsls32:
112 ;CHECK: vmlsl.s32
113 %tmp1 = load <2 x i64>* %A
114 %tmp2 = load <2 x i32>* %B
115 %tmp3 = load <2 x i32>* %C
116 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
117 ret <2 x i64> %tmp4
118 }
119
120 define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
121 ;CHECK: vmlslu8:
122 ;CHECK: vmlsl.u8
123 %tmp1 = load <8 x i16>* %A
124 %tmp2 = load <8 x i8>* %B
125 %tmp3 = load <8 x i8>* %C
126 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
127 ret <8 x i16> %tmp4
128 }
129
130 define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
131 ;CHECK: vmlslu16:
132 ;CHECK: vmlsl.u16
133 %tmp1 = load <4 x i32>* %A
134 %tmp2 = load <4 x i16>* %B
135 %tmp3 = load <4 x i16>* %C
136 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
137 ret <4 x i32> %tmp4
138 }
139
140 define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
141 ;CHECK: vmlslu32:
142 ;CHECK: vmlsl.u32
143 %tmp1 = load <2 x i64>* %A
144 %tmp2 = load <2 x i32>* %B
145 %tmp3 = load <2 x i32>* %C
146 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
147 ret <2 x i64> %tmp4
148 }
149
150 define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
151 entry:
152 ; CHECK: test_vmlsl_lanes16
153 ; CHECK: vmlsl.s16 q0, d2, d3[1]
154 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
155 %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
156 ret <4 x i32> %1
157 }
158
159 define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
160 entry:
161 ; CHECK: test_vmlsl_lanes32
162 ; CHECK: vmlsl.s32 q0, d2, d3[1]
163 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
164 %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
165 ret <2 x i64> %1
166 }
167
168 define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
169 entry:
170 ; CHECK: test_vmlsl_laneu16
171 ; CHECK: vmlsl.u16 q0, d2, d3[1]
172 %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
173 %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
174 ret <4 x i32> %1
175 }
176
177 define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
178 entry:
179 ; CHECK: test_vmlsl_laneu32
180 ; CHECK: vmlsl.u32 q0, d2, d3[1]
181 %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
182 %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
183 ret <2 x i64> %1
184 }
185
186 declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
187 declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
188 declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
189
190 declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
191 declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
192 declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+0
-69
test/CodeGen/ARM/vmlsl.ll less more
None ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1
2 define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
3 ;CHECK: vmlsls8:
4 ;CHECK: vmlsl.s8
5 %tmp1 = load <8 x i16>* %A
6 %tmp2 = load <8 x i8>* %B
7 %tmp3 = load <8 x i8>* %C
8 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
9 ret <8 x i16> %tmp4
10 }
11
12 define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
13 ;CHECK: vmlsls16:
14 ;CHECK: vmlsl.s16
15 %tmp1 = load <4 x i32>* %A
16 %tmp2 = load <4 x i16>* %B
17 %tmp3 = load <4 x i16>* %C
18 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
19 ret <4 x i32> %tmp4
20 }
21
22 define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
23 ;CHECK: vmlsls32:
24 ;CHECK: vmlsl.s32
25 %tmp1 = load <2 x i64>* %A
26 %tmp2 = load <2 x i32>* %B
27 %tmp3 = load <2 x i32>* %C
28 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
29 ret <2 x i64> %tmp4
30 }
31
32 define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
33 ;CHECK: vmlslu8:
34 ;CHECK: vmlsl.u8
35 %tmp1 = load <8 x i16>* %A
36 %tmp2 = load <8 x i8>* %B
37 %tmp3 = load <8 x i8>* %C
38 %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
39 ret <8 x i16> %tmp4
40 }
41
42 define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
43 ;CHECK: vmlslu16:
44 ;CHECK: vmlsl.u16
45 %tmp1 = load <4 x i32>* %A
46 %tmp2 = load <4 x i16>* %B
47 %tmp3 = load <4 x i16>* %C
48 %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
49 ret <4 x i32> %tmp4
50 }
51
52 define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
53 ;CHECK: vmlslu32:
54 ;CHECK: vmlsl.u32
55 %tmp1 = load <2 x i64>* %A
56 %tmp2 = load <2 x i32>* %B
57 %tmp3 = load <2 x i32>* %C
58 %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
59 ret <2 x i64> %tmp4
60 }
61
62 declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
63 declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
64 declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
65
66 declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
67 declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
68 declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+0
-47
test/CodeGen/ARM/vmlsl_lane.ll less more
None ; RUN: llc -mattr=+neon < %s | FileCheck %s
1 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
2 target triple = "thumbv7-elf"
3
4 define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
5 entry:
6 ; CHECK: test_vmlsl_lanes16
7 ; CHECK: vmlsl.s16 q0, d2, d3[1]
8 %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
9 %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
10 ret <4 x i32> %1
11 }
12
13 declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
14
15 define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
16 entry:
17 ; CHECK: test_vmlsl_lanes32
18 ; CHECK: vmlsl.s32 q0, d2, d3[1]
19 %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
20 %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
21 ret <2 x i64> %1
22 }
23
24 declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
25
26 define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
27 entry:
28 ; CHECK: test_vmlsl_laneu16
29 ; CHECK: vmlsl.u16 q0, d2, d3[1]
30 %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1]
31 %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
32 ret <4 x i32> %1
33 }
34
35 declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
36
37 define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
38 entry:
39 ; CHECK: test_vmlsl_laneu32
40 ; CHECK: vmlsl.u32 q0, d2, d3[1]
41 %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1]
42 %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
43 ret <2 x i64> %1
44 }
45
46 declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
132132 ;CHECK: vmov.i64
133133 ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
134134 }
135
136 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
137 ;CHECK: vmovls8:
138 ;CHECK: vmovl.s8
139 %tmp1 = load <8 x i8>* %A
140 %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
141 ret <8 x i16> %tmp2
142 }
143
144 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
145 ;CHECK: vmovls16:
146 ;CHECK: vmovl.s16
147 %tmp1 = load <4 x i16>* %A
148 %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
149 ret <4 x i32> %tmp2
150 }
151
152 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
153 ;CHECK: vmovls32:
154 ;CHECK: vmovl.s32
155 %tmp1 = load <2 x i32>* %A
156 %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
157 ret <2 x i64> %tmp2
158 }
159
160 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
161 ;CHECK: vmovlu8:
162 ;CHECK: vmovl.u8
163 %tmp1 = load <8 x i8>* %A
164 %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
165 ret <8 x i16> %tmp2
166 }
167
168 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
169 ;CHECK: vmovlu16:
170 ;CHECK: vmovl.u16
171 %tmp1 = load <4 x i16>* %A
172 %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
173 ret <4 x i32> %tmp2
174 }
175
176 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
177 ;CHECK: vmovlu32:
178 ;CHECK: vmovl.u32
179 %tmp1 = load <2 x i32>* %A
180 %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
181 ret <2 x i64> %tmp2
182 }
183
184 declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
185 declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
186 declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
187
188 declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
189 declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
190 declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
191
192 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
193 ;CHECK: vmovni16:
194 ;CHECK: vmovn.i16
195 %tmp1 = load <8 x i16>* %A
196 %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
197 ret <8 x i8> %tmp2
198 }
199
200 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
201 ;CHECK: vmovni32:
202 ;CHECK: vmovn.i32
203 %tmp1 = load <4 x i32>* %A
204 %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
205 ret <4 x i16> %tmp2
206 }
207
208 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
209 ;CHECK: vmovni64:
210 ;CHECK: vmovn.i64
211 %tmp1 = load <2 x i64>* %A
212 %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
213 ret <2 x i32> %tmp2
214 }
215
216 declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
217 declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
218 declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
219
220 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
221 ;CHECK: vqmovns16:
222 ;CHECK: vqmovn.s16
223 %tmp1 = load <8 x i16>* %A
224 %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
225 ret <8 x i8> %tmp2
226 }
227
228 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
229