llvm.org GIT mirror llvm / bb84648
[ARM] Add v4f16 and v8f16 types to the CallingConv Summary: The Procedure Call Standard for the Arm Architecture states that float16x4_t and float16x8_t behave just as uint16x4_t and uint16x8_t for argument passing. This patch adds the fp16 vectors to the ARMCallingConv.td file. Reviewers: miyuki, ostannard Reviewed By: ostannard Subscribers: ostannard, javed.absar, kristof.beyls, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60720 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359431 91177308-0d34-0410-b5e6-96231b3b80d8 Diogo N. Sampaio 8 months ago
2 changed file(s) with 244 addition(s) and 18 deletion(s). Raw diff Collapse all Expand all
2929 CCIfSwiftError>>,
3030
3131 // Handle all vector types as either f64 or v2f64.
32 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>,
33 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToTypef64>>,
32 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
33 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>,
3434
3535 // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
3636 CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
5555 CCIfSwiftError>>,
5656
5757 // Handle all vector types as either f64 or v2f64.
58 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>,
59 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToTypef64>>,
58 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
59 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>,
6060
6161 CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
6262
7070 let Entry = 1 in
7171 def FastCC_ARM_APCS : CallingConv<[
7272 // Handle all vector types as either f64 or v2f64.
73 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>,
74 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToTypef64>>,
73 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
74 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>,
7575
7676 CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
7777 CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
9090 let Entry = 1 in
9191 def RetFastCC_ARM_APCS : CallingConv<[
9292 // Handle all vector types as either f64 or v2f64.
93 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>,
94 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToTypef64>>,
93 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
94 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>,
9595
9696 CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
9797 CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
107107 let Entry = 1 in
108108 def CC_ARM_APCS_GHC : CallingConv<[
109109 // Handle all vector types as either f64 or v2f64.
110 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>,
111 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToTypef64>>,
110 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
111 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>,
112112
113113 CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
114114 CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
164164 CCIfNest>,
165165
166166 // Handle all vector types as either f64 or v2f64.
167 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>,
168 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToTypef64>>,
167 CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
168 CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>,
169169
170170 // Pass SwiftSelf in a callee saved register.
171171 CCIfSwiftSelf>>,
181181 let Entry = 1 in
182182 def RetCC_ARM_AAPCS : CallingConv<[
183183 // Handle all vector types as either f64 or v2f64.
184 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>,
185 CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToTypef64>>,
184 CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
185 CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType>,
186186
187187 // Pass SwiftSelf in a callee saved register.
188188 CCIfSwiftSelf>>,
207207 CCIfByVal>,
208208
209209 // Handle all vector types as either f64 or v2f64.
210 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>,
211 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToTypef64>>,
210 CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
211 CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>,
212212
213213 // Pass SwiftSelf in a callee saved register.
214214 CCIfSwiftSelf>>,
229229 let Entry = 1 in
230230 def RetCC_ARM_AAPCS_VFP : CallingConv<[
231231 // Handle all vector types as either f64 or v2f64.
232 CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>,
233 CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToTypef64>>,
232 CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType<f64>>,
233 CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>,
234234
235235 // Pass SwiftSelf in a callee saved register.
236236 CCIfSwiftSelf>>,
0 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFT
2 ; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARD
3 ; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFTEB
4 ; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARDEB
5
6 declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
7 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
8 declare void @use(double, float, <4 x half>, i16, <8 x half>)
9 define <4 x half> @test_vabs_f16(<4 x half> %a) {
10 ; SOFT-LABEL: test_vabs_f16:
11 ; SOFT: @ %bb.0: @ %entry
12 ; SOFT-NEXT: vmov d16, r0, r1
13 ; SOFT-NEXT: vabs.f16 d16, d16
14 ; SOFT-NEXT: vmov r0, r1, d16
15 ; SOFT-NEXT: bx lr
16 ;
17 ; HARD-LABEL: test_vabs_f16:
18 ; HARD: @ %bb.0: @ %entry
19 ; HARD-NEXT: vabs.f16 d0, d0
20 ; HARD-NEXT: bx lr
21 ;
22 ; SOFTEB-LABEL: test_vabs_f16:
23 ; SOFTEB: @ %bb.0: @ %entry
24 ; SOFTEB-NEXT: vmov d16, r1, r0
25 ; SOFTEB-NEXT: vrev64.16 d16, d16
26 ; SOFTEB-NEXT: vabs.f16 d16, d16
27 ; SOFTEB-NEXT: vrev64.16 d16, d16
28 ; SOFTEB-NEXT: vmov r1, r0, d16
29 ; SOFTEB-NEXT: bx lr
30 ;
31 ; HARDEB-LABEL: test_vabs_f16:
32 ; HARDEB: @ %bb.0: @ %entry
33 ; HARDEB-NEXT: vrev64.16 d16, d0
34 ; HARDEB-NEXT: vabs.f16 d16, d16
35 ; HARDEB-NEXT: vrev64.16 d0, d16
36 ; HARDEB-NEXT: bx lr
37 entry:
38 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
39 ret <4 x half> %vabs1.i
40 }
41
42
43 define <8 x half> @test2_vabs_f16(<8 x half> %a) {
44 ; SOFT-LABEL: test2_vabs_f16:
45 ; SOFT: @ %bb.0: @ %entry
46 ; SOFT-NEXT: vmov d17, r2, r3
47 ; SOFT-NEXT: vmov d16, r0, r1
48 ; SOFT-NEXT: vabs.f16 q8, q8
49 ; SOFT-NEXT: vmov r0, r1, d16
50 ; SOFT-NEXT: vmov r2, r3, d17
51 ; SOFT-NEXT: bx lr
52 ;
53 ; HARD-LABEL: test2_vabs_f16:
54 ; HARD: @ %bb.0: @ %entry
55 ; HARD-NEXT: vabs.f16 q0, q0
56 ; HARD-NEXT: bx lr
57 ;
58 ; SOFTEB-LABEL: test2_vabs_f16:
59 ; SOFTEB: @ %bb.0: @ %entry
60 ; SOFTEB-NEXT: vmov d17, r3, r2
61 ; SOFTEB-NEXT: vmov d16, r1, r0
62 ; SOFTEB-NEXT: vrev64.16 q8, q8
63 ; SOFTEB-NEXT: vabs.f16 q8, q8
64 ; SOFTEB-NEXT: vrev64.16 q8, q8
65 ; SOFTEB-NEXT: vmov r1, r0, d16
66 ; SOFTEB-NEXT: vmov r3, r2, d17
67 ; SOFTEB-NEXT: bx lr
68 ;
69 ; HARDEB-LABEL: test2_vabs_f16:
70 ; HARDEB: @ %bb.0: @ %entry
71 ; HARDEB-NEXT: vrev64.16 q8, q0
72 ; HARDEB-NEXT: vabs.f16 q8, q8
73 ; HARDEB-NEXT: vrev64.16 q0, q8
74 ; HARDEB-NEXT: bx lr
75 entry:
76 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
77 ret <8 x half> %vabs1.i
78 }
79
80 define void @test(double, float, i16, <4 x half>, <8 x half>) {
81 ; SOFT-LABEL: test:
82 ; SOFT: @ %bb.0: @ %entry
83 ; SOFT-NEXT: push {r11, lr}
84 ; SOFT-NEXT: sub sp, sp, #32
85 ; SOFT-NEXT: vldr d16, [sp, #40]
86 ; SOFT-NEXT: mov r12, #16
87 ; SOFT-NEXT: vabs.f16 d16, d16
88 ; SOFT-NEXT: mov lr, sp
89 ; SOFT-NEXT: vst1.16 {d16}, [lr:64], r12
90 ; SOFT-NEXT: add r12, sp, #48
91 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
92 ; SOFT-NEXT: vabs.f16 q8, q8
93 ; SOFT-NEXT: str r3, [sp, #8]
94 ; SOFT-NEXT: vst1.64 {d16, d17}, [lr]
95 ; SOFT-NEXT: bl use
96 ; SOFT-NEXT: add sp, sp, #32
97 ; SOFT-NEXT: pop {r11, pc}
98 ;
99 ; HARD-LABEL: test:
100 ; HARD: @ %bb.0: @ %entry
101 ; HARD-NEXT: vabs.f16 q2, q2
102 ; HARD-NEXT: vabs.f16 d2, d2
103 ; HARD-NEXT: b use
104 ;
105 ; SOFTEB-LABEL: test:
106 ; SOFTEB: @ %bb.0: @ %entry
107 ; SOFTEB-NEXT: .save {r11, lr}
108 ; SOFTEB-NEXT: push {r11, lr}
109 ; SOFTEB-NEXT: .pad #32
110 ; SOFTEB-NEXT: sub sp, sp, #32
111 ; SOFTEB-NEXT: vldr d16, [sp, #40]
112 ; SOFTEB-NEXT: mov r12, #16
113 ; SOFTEB-NEXT: mov lr, sp
114 ; SOFTEB-NEXT: str r3, [sp, #8]
115 ; SOFTEB-NEXT: vrev64.16 d16, d16
116 ; SOFTEB-NEXT: vabs.f16 d16, d16
117 ; SOFTEB-NEXT: vst1.16 {d16}, [lr:64], r12
118 ; SOFTEB-NEXT: add r12, sp, #48
119 ; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
120 ; SOFTEB-NEXT: vrev64.16 q8, q8
121 ; SOFTEB-NEXT: vabs.f16 q8, q8
122 ; SOFTEB-NEXT: vrev64.16 q8, q8
123 ; SOFTEB-NEXT: vst1.64 {d16, d17}, [lr]
124 ; SOFTEB-NEXT: bl use
125 ; SOFTEB-NEXT: add sp, sp, #32
126 ; SOFTEB-NEXT: pop {r11, pc}
127 ;
128 ; HARDEB-LABEL: test:
129 ; HARDEB: @ %bb.0: @ %entry
130 ; HARDEB-NEXT: vrev64.16 d16, d2
131 ; HARDEB-NEXT: vabs.f16 d16, d16
132 ; HARDEB-NEXT: vrev64.16 d2, d16
133 ; HARDEB-NEXT: vrev64.16 q8, q2
134 ; HARDEB-NEXT: vabs.f16 q8, q8
135 ; HARDEB-NEXT: vrev64.16 q2, q8
136 ; HARDEB-NEXT: b use
137 entry:
138 %5 = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %3)
139 %6 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %4)
140 tail call void @use(double %0, float %1, <4 x half> %5, i16 %2, <8 x half> %6)
141 ret void
142 }
143
144 define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) {
145 ; SOFT-LABEL: many_args_test:
146 ; SOFT: @ %bb.0: @ %entry
147 ; SOFT-NEXT: push {r11, lr}
148 ; SOFT-NEXT: sub sp, sp, #32
149 ; SOFT-NEXT: add r12, sp, #80
150 ; SOFT-NEXT: mov lr, sp
151 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
152 ; SOFT-NEXT: add r12, sp, #48
153 ; SOFT-NEXT: vabs.f16 q8, q8
154 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
155 ; SOFT-NEXT: add r12, sp, #64
156 ; SOFT-NEXT: str r3, [sp, #8]
157 ; SOFT-NEXT: vadd.f16 q8, q8, q9
158 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
159 ; SOFT-NEXT: mov r12, #16
160 ; SOFT-NEXT: vmul.f16 q8, q9, q8
161 ; SOFT-NEXT: vldr d18, [sp, #40]
162 ; SOFT-NEXT: vst1.16 {d18}, [lr:64], r12
163 ; SOFT-NEXT: vst1.64 {d16, d17}, [lr]
164 ; SOFT-NEXT: bl use
165 ; SOFT-NEXT: add sp, sp, #32
166 ; SOFT-NEXT: pop {r11, pc}
167 ;
168 ; HARD-LABEL: many_args_test:
169 ; HARD: @ %bb.0: @ %entry
170 ; HARD-NEXT: mov r1, sp
171 ; HARD-NEXT: vld1.64 {d16, d17}, [r1]
172 ; HARD-NEXT: vabs.f16 q8, q8
173 ; HARD-NEXT: vadd.f16 q8, q8, q2
174 ; HARD-NEXT: vmul.f16 q2, q3, q8
175 ; HARD-NEXT: b use
176 ;
177 ; SOFTEB-LABEL: many_args_test:
178 ; SOFTEB: @ %bb.0: @ %entry
179 ; SOFTEB-NEXT: .save {r11, lr}
180 ; SOFTEB-NEXT: push {r11, lr}
181 ; SOFTEB-NEXT: .pad #32
182 ; SOFTEB-NEXT: sub sp, sp, #32
183 ; SOFTEB-NEXT: vldr d16, [sp, #40]
184 ; SOFTEB-NEXT: mov r12, #16
185 ; SOFTEB-NEXT: mov lr, sp
186 ; SOFTEB-NEXT: str r3, [sp, #8]
187 ; SOFTEB-NEXT: vrev64.16 d16, d16
188 ; SOFTEB-NEXT: vst1.16 {d16}, [lr:64], r12
189 ; SOFTEB-NEXT: add r12, sp, #80
190 ; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
191 ; SOFTEB-NEXT: add r12, sp, #48
192 ; SOFTEB-NEXT: vrev64.16 q8, q8
193 ; SOFTEB-NEXT: vabs.f16 q8, q8
194 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
195 ; SOFTEB-NEXT: add r12, sp, #64
196 ; SOFTEB-NEXT: vrev64.16 q9, q9
197 ; SOFTEB-NEXT: vadd.f16 q8, q8, q9
198 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
199 ; SOFTEB-NEXT: vrev64.16 q9, q9
200 ; SOFTEB-NEXT: vmul.f16 q8, q9, q8
201 ; SOFTEB-NEXT: vrev64.16 q8, q8
202 ; SOFTEB-NEXT: vst1.64 {d16, d17}, [lr]
203 ; SOFTEB-NEXT: bl use
204 ; SOFTEB-NEXT: add sp, sp, #32
205 ; SOFTEB-NEXT: pop {r11, pc}
206 ;
207 ; HARDEB-LABEL: many_args_test:
208 ; HARDEB: @ %bb.0: @ %entry
209 ; HARDEB-NEXT: mov r1, sp
210 ; HARDEB-NEXT: vld1.64 {d16, d17}, [r1]
211 ; HARDEB-NEXT: vrev64.16 q8, q8
212 ; HARDEB-NEXT: vabs.f16 q8, q8
213 ; HARDEB-NEXT: vrev64.16 q9, q2
214 ; HARDEB-NEXT: vadd.f16 q8, q8, q9
215 ; HARDEB-NEXT: vrev64.16 q9, q3
216 ; HARDEB-NEXT: vmul.f16 q8, q9, q8
217 ; HARDEB-NEXT: vrev64.16 q2, q8
218 ; HARDEB-NEXT: b use
219 entry:
220 %7 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %6)
221 %8 = fadd <8 x half> %7, %4
222 %9 = fmul <8 x half> %5, %8
223 tail call void @use(double %0, float %1, <4 x half> %3, i16 %2, <8 x half> %9)
224 ret void
225 }