llvm.org GIT mirror llvm / e68996a
[ARM] Add patterns for CTLZ on MVE CTLZ intrinsic can use the VCLS instruction on MVE, which produces better results than expanding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371999 91177308-0d34-0410-b5e6-96231b3b80d8 Oliver Cruickshank 1 year, 8 days ago
3 changed file(s) with 150 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
260260 setOperationAction(ISD::SETCC, VT, Custom);
261261 setOperationAction(ISD::MLOAD, VT, Custom);
262262 setOperationAction(ISD::MSTORE, VT, Legal);
263 setOperationAction(ISD::CTLZ, VT, Legal);
263264
264265 // No native support for these.
265266 setOperationAction(ISD::UDIV, VT, Expand);
17541754 def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>;
17551755 def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>;
17561756 def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>;
1757
1758 let Predicates = [HasMVEInt] in {
1759 def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))),
1760 (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>;
1761 def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))),
1762 (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>;
1763 def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))),
1764 (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>;
1765 }
17571766
17581767 class MVE_VABSNEG_int size, bit negate,
17591768 list pattern=[]>
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s
2
3 define arm_aapcs_vfpcc <2 x i64> @ctlz_2i64_0_t(<2 x i64> %src){
4 ; CHECK-LABEL: ctlz_2i64_0_t:
5 ; CHECK: @ %bb.0: @ %entry
6 ; CHECK-NEXT: vmov r0, s3
7 ; CHECK-NEXT: cmp r0, #0
8 ; CHECK-NEXT: cset r1, ne
9 ; CHECK-NEXT: lsls r1, r1, #31
10 ; CHECK-NEXT: vmov r1, s2
11 ; CHECK-NEXT: clz r1, r1
12 ; CHECK-NEXT: add.w r1, r1, #32
13 ; CHECK-NEXT: it ne
14 ; CHECK-NEXT: clzne r1, r0
15 ; CHECK-NEXT: vmov r0, s1
16 ; CHECK-NEXT: vmov s6, r1
17 ; CHECK-NEXT: cmp r0, #0
18 ; CHECK-NEXT: cset r1, ne
19 ; CHECK-NEXT: lsls r1, r1, #31
20 ; CHECK-NEXT: vmov r1, s0
21 ; CHECK-NEXT: clz r1, r1
22 ; CHECK-NEXT: add.w r1, r1, #32
23 ; CHECK-NEXT: it ne
24 ; CHECK-NEXT: clzne r1, r0
25 ; CHECK-NEXT: vmov s4, r1
26 ; CHECK-NEXT: vldr s5, .LCPI0_0
27 ; CHECK-NEXT: vmov.f32 s7, s5
28 ; CHECK-NEXT: vmov q0, q1
29 ; CHECK-NEXT: bx lr
30 ; CHECK-NEXT: .p2align 2
31 ; CHECK-NEXT: @ %bb.1:
32 ; CHECK-NEXT: .LCPI0_0:
33 ; CHECK-NEXT: .long 0 @ float 0
34 entry:
35 %0 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 0)
36 ret <2 x i64> %0
37 }
38
39 define arm_aapcs_vfpcc <4 x i32> @ctlz_4i32_0_t(<4 x i32> %src){
40 ; CHECK-LABEL: ctlz_4i32_0_t:
41 ; CHECK: @ %bb.0: @ %entry
42 ; CHECK-NEXT: vclz.i32 q0, q0
43 ; CHECK-NEXT: bx lr
44 entry:
45 %0 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 0)
46 ret <4 x i32> %0
47 }
48
49 define arm_aapcs_vfpcc <8 x i16> @ctlz_8i16_0_t(<8 x i16> %src){
50 ; CHECK-LABEL: ctlz_8i16_0_t:
51 ; CHECK: @ %bb.0: @ %entry
52 ; CHECK-NEXT: vclz.i16 q0, q0
53 ; CHECK-NEXT: bx lr
54 entry:
55 %0 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 0)
56 ret <8 x i16> %0
57 }
58
59 define arm_aapcs_vfpcc <16 x i8> @ctlz_16i8_0_t(<16 x i8> %src){
60 ; CHECK-LABEL: ctlz_16i8_0_t:
61 ; CHECK: @ %bb.0: @ %entry
62 ; CHECK-NEXT: vclz.i8 q0, q0
63 ; CHECK-NEXT: bx lr
64 entry:
65 %0 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 0)
66 ret <16 x i8> %0
67 }
68
69 define arm_aapcs_vfpcc <2 x i64> @ctlz_2i64_1_t(<2 x i64> %src){
70 ; CHECK-LABEL: ctlz_2i64_1_t:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vmov r0, s3
73 ; CHECK-NEXT: cmp r0, #0
74 ; CHECK-NEXT: cset r1, ne
75 ; CHECK-NEXT: lsls r1, r1, #31
76 ; CHECK-NEXT: vmov r1, s2
77 ; CHECK-NEXT: clz r1, r1
78 ; CHECK-NEXT: add.w r1, r1, #32
79 ; CHECK-NEXT: it ne
80 ; CHECK-NEXT: clzne r1, r0
81 ; CHECK-NEXT: vmov r0, s1
82 ; CHECK-NEXT: vmov s6, r1
83 ; CHECK-NEXT: cmp r0, #0
84 ; CHECK-NEXT: cset r1, ne
85 ; CHECK-NEXT: lsls r1, r1, #31
86 ; CHECK-NEXT: vmov r1, s0
87 ; CHECK-NEXT: clz r1, r1
88 ; CHECK-NEXT: add.w r1, r1, #32
89 ; CHECK-NEXT: it ne
90 ; CHECK-NEXT: clzne r1, r0
91 ; CHECK-NEXT: vmov s4, r1
92 ; CHECK-NEXT: vldr s5, .LCPI4_0
93 ; CHECK-NEXT: vmov.f32 s7, s5
94 ; CHECK-NEXT: vmov q0, q1
95 ; CHECK-NEXT: bx lr
96 ; CHECK-NEXT: .p2align 2
97 ; CHECK-NEXT: @ %bb.1:
98 ; CHECK-NEXT: .LCPI4_0:
99 ; CHECK-NEXT: .long 0 @ float 0
100 entry:
101 %0 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 1)
102 ret <2 x i64> %0
103 }
104
105 define arm_aapcs_vfpcc <4 x i32> @ctlz_4i32_1_t(<4 x i32> %src){
106 ; CHECK-LABEL: ctlz_4i32_1_t:
107 ; CHECK: @ %bb.0: @ %entry
108 ; CHECK-NEXT: vclz.i32 q0, q0
109 ; CHECK-NEXT: bx lr
110 entry:
111 %0 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 1)
112 ret <4 x i32> %0
113 }
114
115 define arm_aapcs_vfpcc <8 x i16> @ctlz_8i16_1_t(<8 x i16> %src){
116 ; CHECK-LABEL: ctlz_8i16_1_t:
117 ; CHECK: @ %bb.0: @ %entry
118 ; CHECK-NEXT: vclz.i16 q0, q0
119 ; CHECK-NEXT: bx lr
120 entry:
121 %0 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 1)
122 ret <8 x i16> %0
123 }
124
125 define arm_aapcs_vfpcc <16 x i8> @ctlz_16i8_1_t(<16 x i8> %src){
126 ; CHECK-LABEL: ctlz_16i8_1_t:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: vclz.i8 q0, q0
129 ; CHECK-NEXT: bx lr
130 entry:
131 %0 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 1)
132 ret <16 x i8> %0
133 }
134
135
136 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
137 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
138 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
139 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)