llvm.org GIT mirror llvm / 973a074
Remove NEON vmovn intrinsic, replacing it with vector truncate operations. Auto-upgrade the old intrinsic and update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112507 91177308-0d34-0410-b5e6-96231b3b80d8 Bob Wilson 10 years ago
6 changed file(s) with 51 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
302302 def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
303303
304304 // Narrowing and Lengthening Vector Moves.
305 def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic;
306305 def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
307306 def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
308307 def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
954954 : N2V
955955 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
956956 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
957
958 // Narrow 2-register operations.
959 class N2VN op24_23, bits<2> op21_20, bits<2> op19_18,
960 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
961 InstrItinClass itin, string OpcodeStr, string Dt,
962 ValueType TyD, ValueType TyQ, SDNode OpNode>
963 : N2V
964 (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
965 [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
957966
958967 // Narrow 2-register intrinsics.
959968 class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18,
15771586 v2i64, v2i64, OpNode, Commutable>;
15781587 }
15791588
1589
1590 // Neon Narrowing 2-register vector operations,
1591 // source operand element sizes of 16, 32 and 64 bits:
1592 multiclass N2VN_HSD op24_23, bits<2> op21_20, bits<2> op17_16,
1593 bits<5> op11_7, bit op6, bit op4,
1594 InstrItinClass itin, string OpcodeStr, string Dt,
1595 SDNode OpNode> {
1596 def v8i8 : N2VN
1597 itin, OpcodeStr, !strconcat(Dt, "16"),
1598 v8i8, v8i16, OpNode>;
1599 def v4i16 : N2VN
1600 itin, OpcodeStr, !strconcat(Dt, "32"),
1601 v4i16, v4i32, OpNode>;
1602 def v2i32 : N2VN
1603 itin, OpcodeStr, !strconcat(Dt, "64"),
1604 v2i32, v2i64, OpNode>;
1605 }
15801606
15811607 // Neon Narrowing 2-register vector intrinsics,
15821608 // source operand element sizes of 16, 32 and 64 bits:
32203246 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
32213247
32223248 // VMOVN : Vector Narrowing Move
3223 defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
3224 "vmovn", "i", int_arm_neon_vmovn>;
3249 defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
3250 "vmovn", "i", trunc>;
32253251 // VQMOVN : Vector Saturating Narrowing Move
32263252 defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
32273253 "vqmovn", "s", int_arm_neon_vqmovns>;
8787 ((Name.compare(14, 5, "vaddw", 5) == 0 ||
8888 Name.compare(14, 5, "vsubw", 5) == 0) &&
8989 (Name.compare(19, 2, "s.", 2) == 0 ||
90 Name.compare(19, 2, "u.", 2) == 0))) {
90 Name.compare(19, 2, "u.", 2) == 0)) ||
91
92 (Name.compare(14, 6, "vmovn.", 6) == 0)) {
9193
9294 // Calls to these are transformed into IR without intrinsics.
9395 NewFn = 0;
400402 else
401403 NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
402404
405 } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
406 NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
407 "upgraded." + CI->getName(), CI);
403408 } else {
404409 llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
405410 }
7575 ; CHECK: zext <4 x i16>
7676 ; CHECK-NEXT: sub <4 x i32>
7777
78 ; vmovn should be auto-upgraded to trunc
79
80 ; CHECK: vmovni16
81 ; CHECK-NOT: arm.neon.vmovn.v8i8
82 ; CHECK: trunc <8 x i16>
83
84 ; CHECK: vmovni32
85 ; CHECK-NOT: arm.neon.vmovn.v4i16
86 ; CHECK: trunc <4 x i32>
87
88 ; CHECK: vmovni64
89 ; CHECK-NOT: arm.neon.vmovn.v2i32
90 ; CHECK: trunc <2 x i64>
91
7892 ; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
7993
8094 ; CHECK: vld1i8
239239 ;CHECK: vmovni16:
240240 ;CHECK: vmovn.i16
241241 %tmp1 = load <8 x i16>* %A
242 %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
242 %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
243243 ret <8 x i8> %tmp2
244244 }
245245
247247 ;CHECK: vmovni32:
248248 ;CHECK: vmovn.i32
249249 %tmp1 = load <4 x i32>* %A
250 %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
250 %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
251251 ret <4 x i16> %tmp2
252252 }
253253
255255 ;CHECK: vmovni64:
256256 ;CHECK: vmovn.i64
257257 %tmp1 = load <2 x i64>* %A
258 %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
258 %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
259259 ret <2 x i32> %tmp2
260260 }
261
262 declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
263 declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
264 declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
265261
266262 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
267263 ;CHECK: vqmovns16: