llvm.org GIT mirror llvm / 4204ce0
[ARM][FIX] Add missing f16.lane.vldN/vstN lowering Summary: Add missing D and Q lane VLDSTLane lowering for fp16 elements. Reviewers: efriedma, kosarev, SjoerdMeijer, ostannard Reviewed By: efriedma Subscribers: javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60874 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358962 91177308-0d34-0410-b5e6-96231b3b80d8 Diogo N. Sampaio 9 months ago
2 changed file(s) with 114 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
20912091 default: llvm_unreachable("unhandled vld/vst lane type");
20922092 // Double-register operations:
20932093 case MVT::v8i8: OpcodeIndex = 0; break;
2094 case MVT::v4f16:
20942095 case MVT::v4i16: OpcodeIndex = 1; break;
20952096 case MVT::v2f32:
20962097 case MVT::v2i32: OpcodeIndex = 2; break;
20972098 // Quad-register operations:
2099 case MVT::v8f16:
20982100 case MVT::v8i16: OpcodeIndex = 0; break;
20992101 case MVT::v4f32:
21002102 case MVT::v4i32: OpcodeIndex = 1; break;
13181318 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
13191319 declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
13201320 declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
1321 declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, i32, i32)
1322 declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, i32, i32)
1323 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1324 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1325 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1326 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1327 declare void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, i32, i32)
1328 declare void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, i32, i32)
1329 declare void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1330 declare void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1331 declare void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1332 declare void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1333
1334 define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(i8*, <8 x half>, <8 x half>) {
1335 ; CHECK-LABEL: test_vld2q_lane_f16:
1336 ; CHECK: vld2.16 {d1[3], d3[3]}, [r0]
1337 ; CHECK-NEXT: bx lr
1338 entry:
1339 %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2)
1340 ret { <8 x half>, <8 x half> } %3
1341 }
1342
1343 define { <4 x half>, <4 x half> } @test_vld2_lane_f16(i8*, <4 x half>, <4 x half>) {
1344 ; CHECK-LABEL: test_vld2_lane_f16:
1345 ; CHECK: vld2.16 {d0[3], d1[3]}, [r0]
1346 ; CHECK-NEXT: bx lr
1347 entry:
1348 %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2)
1349 ret { <4 x half>, <4 x half> } %3
1350 }
1351
1352 define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(i8*, <8 x half>, <8 x half>, <8 x half>) {
1353 ; CHECK-LABEL: test_vld3q_lane_f16:
1354 ; CHECK: vld3.16 {d1[3], d3[3], d5[3]}, [r0]
1355 ; CHECK-NEXT: bx lr
1356 entry:
1357 %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2)
1358 ret { <8 x half>, <8 x half>, <8 x half> } %4
1359 }
1360
1361 define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(i8*, <4 x half>, <4 x half>, <4 x half>) {
1362 ; CHECK-LABEL: test_vld3_lane_f16:
1363 ; CHECK: vld3.16 {d0[3], d1[3], d2[3]}, [r0]
1364 ; CHECK-NEXT: bx lr
1365 entry:
1366 %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2)
1367 ret { <4 x half>, <4 x half>, <4 x half> } %4
1368 }
1369 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1370 ; CHECK-LABEL: test_vld4lane_v8f16_p0i8:
1371 ; CHECK: vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0]
1372 ; CHECK-NEXT: bx lr
1373 entry:
1374 %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0i8(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2)
1375 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5
1376 }
1377 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1378 ; CHECK-LABEL: test_vld4lane_v4f16_p0i8:
1379 ; CHECK: vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0]
1380 ; CHECK-NEXT: bx lr
1381 entry:
1382 %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0i8(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2)
1383 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5
1384 }
1385 define void @test_vst2lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>) {
1386 ; CHECK-LABEL: test_vst2lane_p0i8_v8f16:
1387 ; CHECK: vst2.16 {d0[0], d2[0]}, [r0]
1388 ; CHECK-NEXT: bx lr
1389 entry:
1390 tail call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1)
1391 ret void
1392 }
1393 define void @test_vst2lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>) {
1394 ; CHECK-LABEL: test_vst2lane_p0i8_v4f16:
1395 ; CHECK: vst2.16 {d0[0], d1[0]}, [r0:32]
1396 ; CHECK-NEXT: bx lr
1397 entry:
1398 tail call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0)
1399 ret void
1400 }
1401 define void @test_vst3lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>) {
1402 ; CHECK-LABEL: test_vst3lane_p0i8_v8f16:
1403 ; CHECK: vst3.16 {d0[0], d2[0], d4[0]}, [r0]
1404 ; CHECK-NEXT: bx lr
1405 entry:
1406 tail call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0)
1407 ret void
1408 }
1409 define void @test_vst3lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>) {
1410 ; CHECK-LABEL: test_vst3lane_p0i8_v4f16:
1411 ; CHECK: vst3.16 {d0[0], d1[0], d2[0]}, [r0]
1412 ; CHECK-NEXT: bx lr
1413 entry:
1414 tail call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0)
1415 ret void
1416 }
1417 define void @test_vst4lane_p0i8_v8f16(i8*, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1418 ; CHECK-LABEL: test_vst4lane_p0i8_v8f16:
1419 ; CHECK: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64]
1420 ; CHECK-NEXT: bx lr
1421 entry:
1422 tail call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0)
1423 ret void
1424 }
1425 define void @test_vst4lane_p0i8_v4f16(i8*, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1426 ; CHECK-LABEL: test_vst4lane_p0i8_v4f16:
1427 ; CHECK: vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64]
1428 ; CHECK-NEXT: bx lr
1429 entry:
1430 tail call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0)
1431 ret void
1432 }