llvm.org GIT mirror llvm / 06a6a30
This is one of the first steps at moving to replace target-dependent intrinsics with target-indepdent intrinsics. The first instruction(s) to be handled are the vector versions of count leading zeros (ctlz). The changes here are to clang so that it generates a target independent vector ctlz when it sees an ARM dependent vector ctlz. The changes in llvm are to match the target independent vector ctlz and in VMCore/AutoUpgrade.cpp to update any existing bc files containing ARM dependent vector ctlzs with target-independent ctlzs. There are also changes to an existing test case in llvm for ARM vector count instructions and a new test for the bitcode upgrade. <rdar://problem/11831778> There is deliberately no test for the change to clang, as so far as I know, no consensus has been reached regarding how to test neon instructions in clang; q.v. <rdar://problem/8762292> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160200 91177308-0d34-0410-b5e6-96231b3b80d8 Joel Jones 8 years ago
4 changed file(s) with 57 addition(s) and 19 deletion(s). Raw diff Collapse all Expand all
48244824 // VCLZ : Vector Count Leading Zeros
48254825 defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
48264826 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
4827 int_arm_neon_vclz>;
4827 ctlz>;
48284828 // VCNT : Vector Count One Bits
48294829 def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
48304830 IIC_VCNTiD, "vcnt", "8",
5151
5252 switch (Name[0]) {
5353 default: break;
54 case 'a': {
55 if (Name.startswith("arm.neon.vclz")) {
56 Type* args[2] = {
57 F->arg_begin()->getType(),
58 Type::getInt1Ty(F->getContext())
59 };
60 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
61 // the end of the name. Change name from llvm.arm.neon.vclz.* to
62 // llvm.ctlz.*
63 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
64 NewFn = Function::Create(fType, F->getLinkage(),
65 "llvm.ctlz." + Name.substr(14), F->getParent());
66 return true;
67 }
68 break;
69 }
5470 case 'c': {
5571 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
5672 F->setName(Name + ".old");
294310 CI->eraseFromParent();
295311 return;
296312
313 case Intrinsic::arm_neon_vclz: {
314 // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
315 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
316 Builder.getFalse(),
317 "llvm.ctlz." + Name.substr(14)));
318 CI->eraseFromParent();
319 return;
320 }
321
297322 case Intrinsic::x86_xop_vfrcz_ss:
298323 case Intrinsic::x86_xop_vfrcz_sd:
299324 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
0 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
1 ; NB: currently tests only vclz, should also test vcnt and vcls
2
3 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
4 ;CHECK: @vclz16
5 %tmp1 = load <4 x i16>* %A
6 %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
7 ;CHECK: {{call.*@llvm.ctlz.v4i16\(<4 x i16>.*, i1 false}}
8 ret <4 x i16> %tmp2
9 }
10
11 declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
0 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
1 ; NB: this tests vcnt, vclz, and vcls
12
23 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
34 ;CHECK: vcnt8:
2021
2122 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
2223 ;CHECK: vclz8:
23 ;CHECK: vclz.i8
24 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
2425 %tmp1 = load <8 x i8>* %A
25 %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
26 %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
2627 ret <8 x i8> %tmp2
2728 }
2829
2930 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
3031 ;CHECK: vclz16:
31 ;CHECK: vclz.i16
32 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
3233 %tmp1 = load <4 x i16>* %A
33 %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
34 %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
3435 ret <4 x i16> %tmp2
3536 }
3637
3738 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
3839 ;CHECK: vclz32:
39 ;CHECK: vclz.i32
40 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
4041 %tmp1 = load <2 x i32>* %A
41 %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
42 %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
4243 ret <2 x i32> %tmp2
4344 }
4445
4546 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
4647 ;CHECK: vclzQ8:
47 ;CHECK: vclz.i8
48 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
4849 %tmp1 = load <16 x i8>* %A
49 %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
50 %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
5051 ret <16 x i8> %tmp2
5152 }
5253
5354 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
5455 ;CHECK: vclzQ16:
55 ;CHECK: vclz.i16
56 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
5657 %tmp1 = load <8 x i16>* %A
57 %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
58 %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
5859 ret <8 x i16> %tmp2
5960 }
6061
6162 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
6263 ;CHECK: vclzQ32:
63 ;CHECK: vclz.i32
64 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
6465 %tmp1 = load <4 x i32>* %A
65 %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
66 %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
6667 ret <4 x i32> %tmp2
6768 }
6869
69 declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
70 declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
71 declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
70 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
71 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
72 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
7273
73 declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
74 declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
75 declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
74 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
75 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
76 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
7677
7778 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
7879 ;CHECK: vclss8: