llvm.org GIT mirror llvm / ba3f83a
[ARM] [FIX] Add missing f16 vector operations lowering Summary: Add missing <8xhalf> shufflevectors pattern, when using concat_vector dag node. As well, allows <8xhalf> and <4xhalf> vldup1 operations. These instructions are required for v8.2a fp16 lowering of vmul_n_f16, vmulq_n_f16 and vmulq_lane_f16 intrinsics. Reviewers: olista01, pbarrio, LukeGeeson, efriedma Reviewed By: efriedma Subscribers: efriedma, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60319 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358081 91177308-0d34-0410-b5e6-96231b3b80d8 Diogo N. Sampaio 9 months ago
3 changed file(s) with 42 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
22112211 case MVT::v8i8:
22122212 case MVT::v16i8: OpcodeIndex = 0; break;
22132213 case MVT::v4i16:
2214 case MVT::v8i16: OpcodeIndex = 1; break;
2214 case MVT::v8i16:
2215 case MVT::v4f16:
2216 case MVT::v8f16:
2217 OpcodeIndex = 1; break;
22152218 case MVT::v2f32:
22162219 case MVT::v2i32:
22172220 case MVT::v4f32:
75747574 def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
75757575 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
75767576 def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
7577 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7578 def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
75777579 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
75787580
75797581 //===----------------------------------------------------------------------===//
12221222 entry:
12231223 %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32>
12241224 ret <8 x half> %shuffle.i
1225 }
1226
1227 define <4 x half> @test_vld_dup1_4xhalf(half* %b) {
1228 ; CHECK-LABEL: test_vld_dup1_4xhalf:
1229 ; CHECK: vld1.16 {d0[]}, [r0:16]
1230 ; CHECK-NEXT: bx lr
1231
1232 entry:
1233 %b1 = load half, half* %b, align 2
1234 %vecinit = insertelement <4 x half> undef, half %b1, i32 0
1235 %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
1236 %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
1237 %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
1238 ret <4 x half> %vecinit4
1239 }
1240
1241 define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr {
1242 ; CHECK-LABEL: test_vld_dup1_8xhalf:
1243 ; CHECK: vld1.16 {d0[], d1[]}, [r0:16]
1244 ; CHECK-NEXT: bx lr
1245
1246 entry:
1247 %b1 = load half, half* %b, align 2
1248 %vecinit = insertelement <8 x half> undef, half %b1, i32 0
1249 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1250 ret <8 x half> %vecinit8
1251 }
1252
1253 define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
1254 ; CHECK-LABEL: test_shufflevector8xhalf:
1255 ; CHECK: vmov.f64 d1, d0
1256 ; CHECK-NEXT: bx lr
1257
1258 entry:
1259 %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32>
1260 ret <8 x half> %r
12251261 }
12261262
12271263 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)