llvm.org GIT mirror llvm / 3d69ff4
AArch64: Add missing scalar pair intrinsics. E.g. "float32_t vaddv_f32(float32x2_t a)" to be matched into "faddp s0, v1.2s". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196198 91177308-0d34-0410-b5e6-96231b3b80d8 Hao Liu 5 years ago
2 changed file(s) with 166 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
51935193 // Scalar Reduce Addition Pairwise (Integer)
51945194 def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
51955195 (ADDPvv_D_2D VPR128:$Rn)>;
5196 def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
5197 (ADDPvv_D_2D VPR128:$Rn)>;
51965198
51975199 // Scalar Reduce Addition Pairwise (Floating Point)
51985200 defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
52365238 defm : Neon_ScalarPair_SD_size_patterns
52375239 int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
52385240
5241 defm : Neon_ScalarPair_SD_size_patterns
5242 int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>;
5243
5244 def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))),
5245 (FADDPvv_S_2S (v2f32
5246 (EXTRACT_SUBREG
5247 (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
5248 sub_64)))>;
5249
5250 defm : Neon_ScalarPair_SD_size_patterns
5251 int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>;
5252
5253 defm : Neon_ScalarPair_SD_size_patterns
5254 int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>;
5255
5256 defm : Neon_ScalarPair_SD_size_patterns
5257 int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
5258
5259 defm : Neon_ScalarPair_SD_size_patterns
5260 int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
52395261
52405262 // Scalar by element Arithmetic
52415263
100100 ret <1 x double> %val
101101 }
102102
103 define float @test_vaddv_f32(<2 x float> %a) {
104 ; CHECK-LABEL: test_vaddv_f32
105 ; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
106 %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float> %a)
107 %2 = extractelement <1 x float> %1, i32 0
108 ret float %2
109 }
110
111 define float @test_vaddvq_f32(<4 x float> %a) {
112 ; CHECK-LABEL: test_vaddvq_f32
113 ; CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
114 ; CHECK: faddp {{s[0-9]+}}, {{v[0-9]+}}.2s
115 %1 = tail call <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float> %a)
116 %2 = extractelement <1 x float> %1, i32 0
117 ret float %2
118 }
119
120 define double @test_vaddvq_f64(<2 x double> %a) {
121 ; CHECK-LABEL: test_vaddvq_f64
122 ; CHECK: faddp {{d[0-9]+}}, {{v[0-9]+}}.2d
123 %1 = tail call <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double> %a)
124 %2 = extractelement <1 x double> %1, i32 0
125 ret double %2
126 }
127
128 define float @test_vmaxv_f32(<2 x float> %a) {
129 ; CHECK-LABEL: test_vmaxv_f32
130 ; CHECK: fmaxp {{s[0-9]+}}, {{v[0-9]+}}.2s
131 %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float> %a)
132 %2 = extractelement <1 x float> %1, i32 0
133 ret float %2
134 }
135
136 define double @test_vmaxvq_f64(<2 x double> %a) {
137 ; CHECK-LABEL: test_vmaxvq_f64
138 ; CHECK: fmaxp {{d[0-9]+}}, {{v[0-9]+}}.2d
139 %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double> %a)
140 %2 = extractelement <1 x double> %1, i32 0
141 ret double %2
142 }
143
144 define float @test_vminv_f32(<2 x float> %a) {
145 ; CHECK-LABEL: test_vminv_f32
146 ; CHECK: fminp {{s[0-9]+}}, {{v[0-9]+}}.2s
147 %1 = tail call <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float> %a)
148 %2 = extractelement <1 x float> %1, i32 0
149 ret float %2
150 }
151
152 define double @test_vminvq_f64(<2 x double> %a) {
153 ; CHECK-LABEL: test_vminvq_f64
154 ; CHECK: fminp {{d[0-9]+}}, {{v[0-9]+}}.2d
155 %1 = tail call <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double> %a)
156 %2 = extractelement <1 x double> %1, i32 0
157 ret double %2
158 }
159
160 define double @test_vmaxnmvq_f64(<2 x double> %a) {
161 ; CHECK-LABEL: test_vmaxnmvq_f64
162 ; CHECK: fmaxnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
163 %1 = tail call <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double> %a)
164 %2 = extractelement <1 x double> %1, i32 0
165 ret double %2
166 }
167
168 define float @test_vmaxnmv_f32(<2 x float> %a) {
169 ; CHECK-LABEL: test_vmaxnmv_f32
170 ; CHECK: fmaxnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
171 %1 = tail call <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float> %a)
172 %2 = extractelement <1 x float> %1, i32 0
173 ret float %2
174 }
175
176 define double @test_vminnmvq_f64(<2 x double> %a) {
177 ; CHECK-LABEL: test_vminnmvq_f64
178 ; CHECK: fminnmp {{d[0-9]+}}, {{v[0-9]+}}.2d
179 %1 = tail call <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double> %a)
180 %2 = extractelement <1 x double> %1, i32 0
181 ret double %2
182 }
183
184 define float @test_vminnmv_f32(<2 x float> %a) {
185 ; CHECK-LABEL: test_vminnmv_f32
186 ; CHECK: fminnmp {{s[0-9]+}}, {{v[0-9]+}}.2s
187 %1 = tail call <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float> %a)
188 %2 = extractelement <1 x float> %1, i32 0
189 ret float %2
190 }
191
192 define <2 x i64> @test_vpaddq_s64(<2 x i64> %a, <2 x i64> %b) {
193 ; CHECK-LABEL: test_vpaddq_s64
194 ; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
195 %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
196 ret <2 x i64> %1
197 }
198
199 define <2 x i64> @test_vpaddq_u64(<2 x i64> %a, <2 x i64> %b) {
200 ; CHECK-LABEL: test_vpaddq_u64
201 ; CHECK: addp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
202 %1 = tail call <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64> %a, <2 x i64> %b)
203 ret <2 x i64> %1
204 }
205
206 define i64 @test_vaddvq_s64(<2 x i64> %a) {
207 ; CHECK-LABEL: test_vaddvq_s64
208 ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
209 %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
210 %2 = extractelement <1 x i64> %1, i32 0
211 ret i64 %2
212 }
213
214 define i64 @test_vaddvq_u64(<2 x i64> %a) {
215 ; CHECK-LABEL: test_vaddvq_u64
216 ; CHECK: addp {{d[0-9]+}}, {{v[0-9]+}}.2d
217 %1 = tail call <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64> %a)
218 %2 = extractelement <1 x i64> %1, i32 0
219 ret i64 %2
220 }
221
222 declare <1 x i64> @llvm.aarch64.neon.vaddv.v1i64.v2i64(<2 x i64>)
223
224 declare <2 x i64> @llvm.arm.neon.vpadd.v2i64(<2 x i64>, <2 x i64>)
225
226 declare <1 x float> @llvm.aarch64.neon.vminnmv.v1f32.v2f32(<2 x float>)
227
228 declare <1 x double> @llvm.aarch64.neon.vminnmv.v1f64.v2f64(<2 x double>)
229
230 declare <1 x float> @llvm.aarch64.neon.vmaxnmv.v1f32.v2f32(<2 x float>)
231
232 declare <1 x double> @llvm.aarch64.neon.vmaxnmv.v1f64.v2f64(<2 x double>)
233
234 declare <1 x double> @llvm.aarch64.neon.vminv.v1f64.v2f64(<2 x double>)
235
236 declare <1 x float> @llvm.aarch64.neon.vminv.v1f32.v2f32(<2 x float>)
237
238 declare <1 x double> @llvm.aarch64.neon.vmaxv.v1f64.v2f64(<2 x double>)
239
240 declare <1 x float> @llvm.aarch64.neon.vmaxv.v1f32.v2f32(<2 x float>)
241
242 declare <1 x double> @llvm.aarch64.neon.vaddv.v1f64.v2f64(<2 x double>)
243
244 declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v4f32(<4 x float>)
245
246 declare <1 x float> @llvm.aarch64.neon.vaddv.v1f32.v2f32(<2 x float>)