llvm.org GIT mirror llvm / 9105f66
AArch64/ARM64: remove AArch64 from tree prior to renaming ARM64. I'm doing this in two phases for a better "git blame" record. This commit removes the previous AArch64 backend and redirects all functionality to ARM64. It also deduplicates test-lines and removes orphaned AArch64 tests. The next step will be "git mv ARM64 AArch64" and rewire most of the tests. Hopefully LLVM is still functional, though it would be even better if no-one ever had to care because the rename happens straight afterwards. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209576 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
355 changed file(s) with 76 addition(s) and 67376 deletion(s). Raw diff Collapse all Expand all
126126 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
127127
128128 set(LLVM_ALL_TARGETS
129 AArch64
130129 ARM64
131130 ARM
132131 CppBackend
143142 )
144143
145144 # List of targets with JIT support:
146 set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
145 set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ)
147146
148147 set(LLVM_TARGETS_TO_BUILD "all"
149148 CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
420420 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
421421 arm64*-*) llvm_cv_target_arch="ARM64" ;;
422422 arm*-*) llvm_cv_target_arch="ARM" ;;
423 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
423 aarch64*-*) llvm_cv_target_arch="ARM64" ;;
424424 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
425425 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
426426 xcore-*) llvm_cv_target_arch="XCore" ;;
456456 powerpc*-*) host_arch="PowerPC" ;;
457457 arm64*-*) host_arch="ARM64" ;;
458458 arm*-*) host_arch="ARM" ;;
459 aarch64*-*) host_arch="AArch64" ;;
459 aarch64*-*) host_arch="ARM64" ;;
460460 mips-* | mips64-*) host_arch="Mips" ;;
461461 mipsel-* | mips64el-*) host_arch="Mips" ;;
462462 xcore-*) host_arch="XCore" ;;
785785 PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;;
786786 x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;;
787787 ARM) AC_SUBST(TARGET_HAS_JIT,1) ;;
788 AArch64) AC_SUBST(TARGET_HAS_JIT,0) ;;
789788 Mips) AC_SUBST(TARGET_HAS_JIT,1) ;;
790789 XCore) AC_SUBST(TARGET_HAS_JIT,0) ;;
791790 MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
796795 esac
797796 fi
798797
799 TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
798 TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
800799 AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
801800
802801 dnl Allow enablement of building and installing docs
949948 fi
950949
951950 dnl List all possible targets
952 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
951 ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
953952 AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
954953
955954 dnl Allow specific targets to be specified for building (or not)
970969 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
971970 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
972971 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
973 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
972 aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
974973 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
975974 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
976975 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
989988 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
990989 Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
991990 PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
992 AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
991 AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
993992 ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
994993 Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
995994 XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
41524152 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
41534153 arm64*-*) llvm_cv_target_arch="ARM64" ;;
41544154 arm*-*) llvm_cv_target_arch="ARM" ;;
4155 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
4155 aarch64*-*) llvm_cv_target_arch="ARM64" ;;
41564156 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
41574157 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
41584158 xcore-*) llvm_cv_target_arch="XCore" ;;
41894189 powerpc*-*) host_arch="PowerPC" ;;
41904190 arm64*-*) host_arch="ARM64" ;;
41914191 arm*-*) host_arch="ARM" ;;
4192 aarch64*-*) host_arch="AArch64" ;;
4192 aarch64*-*) host_arch="ARM64" ;;
41934193 mips-* | mips64-*) host_arch="Mips" ;;
41944194 mipsel-* | mips64el-*) host_arch="Mips" ;;
41954195 xcore-*) host_arch="XCore" ;;
51025102 ;;
51035103 ARM) TARGET_HAS_JIT=1
51045104 ;;
5105 AArch64) TARGET_HAS_JIT=0
5106 ;;
51075105 Mips) TARGET_HAS_JIT=1
51085106 ;;
51095107 XCore) TARGET_HAS_JIT=0
51215119 esac
51225120 fi
51235121
5124 TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
5122 TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
51255123 TARGETS_WITH_JIT=$TARGETS_WITH_JIT
51265124
51275125
53585356
53595357 fi
53605358
5361 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
5359 ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
53625360 ALL_TARGETS=$ALL_TARGETS
53635361
53645362
53815379 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53825380 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
53835381 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
5384 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
5382 aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
53855383 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
53865384 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
53875385 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
54005398 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
54015399 Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
54025400 PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
5403 AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
5401 AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
54045402 ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
54055403 Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
54065404 XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
533533 include "llvm/IR/IntrinsicsX86.td"
534534 include "llvm/IR/IntrinsicsARM.td"
535535 include "llvm/IR/IntrinsicsARM64.td"
536 include "llvm/IR/IntrinsicsAArch64.td"
537536 include "llvm/IR/IntrinsicsXCore.td"
538537 include "llvm/IR/IntrinsicsHexagon.td"
539538 include "llvm/IR/IntrinsicsNVVM.td"
+0
-407
include/llvm/IR/IntrinsicsAArch64.td less more
None //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the AArch64-specific intrinsics.
10 //
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // Advanced SIMD (NEON)
15
16 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
17
18 // Vector Absolute Compare (Floating Point)
19 def int_aarch64_neon_vacgeq :
20 Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
21 def int_aarch64_neon_vacgtq :
22 Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
23
24 // Vector saturating accumulate
25 def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
26 def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
27
28 // Vector Bitwise reverse
29 def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
30
31 // Vector extract and narrow
32 def int_aarch64_neon_xtn :
33 Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
34
35 // Vector floating-point convert
36 def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
37 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
38 def int_aarch64_neon_vcvtxn :
39 Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
40 def int_aarch64_neon_vcvtzs :
41 Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
42 def int_aarch64_neon_vcvtzu :
43 Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
44
45 // Vector maxNum (Floating Point)
46 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
47
48 // Vector minNum (Floating Point)
49 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
50
51 // Vector Pairwise maxNum (Floating Point)
52 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
53
54 // Vector Pairwise minNum (Floating Point)
55 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
56
57 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
58 def int_aarch64_neon_vmulx :
59 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
60
61 class Neon_N2V_Intrinsic
62 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
63 [IntrNoMem]>;
64 class Neon_N3V_Intrinsic
65 : Intrinsic<[llvm_anyvector_ty],
66 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
67 [IntrNoMem]>;
68 class Neon_N2V_Narrow_Intrinsic
69 : Intrinsic<[llvm_anyvector_ty],
70 [LLVMExtendedType<0>, llvm_i32_ty],
71 [IntrNoMem]>;
72
73 // Vector rounding shift right by immediate (Signed)
74 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
75 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
76 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
77
78 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
79 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
80
81 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
82 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
83 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
84 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
85 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
86 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
87 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
88
89 // Vector across
90 class Neon_Across_Intrinsic
91 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
92
93 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
94 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
95 def int_aarch64_neon_smaxv : Neon_Across_Intrinsic;
96 def int_aarch64_neon_umaxv : Neon_Across_Intrinsic;
97 def int_aarch64_neon_sminv : Neon_Across_Intrinsic;
98 def int_aarch64_neon_uminv : Neon_Across_Intrinsic;
99 def int_aarch64_neon_vaddv : Neon_Across_Intrinsic;
100 def int_aarch64_neon_vmaxv :
101 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
102 def int_aarch64_neon_vminv :
103 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
104 def int_aarch64_neon_vmaxnmv :
105 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
106 def int_aarch64_neon_vminnmv :
107 Intrinsic<[llvm_float_ty], [llvm_v4f32_ty], [IntrNoMem]>;
108
109 // Vector Table Lookup.
110 def int_aarch64_neon_vtbl1 :
111 Intrinsic<[llvm_anyvector_ty],
112 [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
113
114 def int_aarch64_neon_vtbl2 :
115 Intrinsic<[llvm_anyvector_ty],
116 [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
117 [IntrNoMem]>;
118
119 def int_aarch64_neon_vtbl3 :
120 Intrinsic<[llvm_anyvector_ty],
121 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
122 LLVMMatchType<0>], [IntrNoMem]>;
123
124 def int_aarch64_neon_vtbl4 :
125 Intrinsic<[llvm_anyvector_ty],
126 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
127 llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
128
129 // Vector Table Extension.
130 // Some elements of the destination vector may not be updated, so the original
131 // value of that vector is passed as the first argument. The next 1-4
132 // arguments after that are the table.
133 def int_aarch64_neon_vtbx1 :
134 Intrinsic<[llvm_anyvector_ty],
135 [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
136 [IntrNoMem]>;
137
138 def int_aarch64_neon_vtbx2 :
139 Intrinsic<[llvm_anyvector_ty],
140 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
141 LLVMMatchType<0>], [IntrNoMem]>;
142
143 def int_aarch64_neon_vtbx3 :
144 Intrinsic<[llvm_anyvector_ty],
145 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
146 llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
147
148 def int_aarch64_neon_vtbx4 :
149 Intrinsic<[llvm_anyvector_ty],
150 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
151 llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
152 [IntrNoMem]>;
153
154 // Vector Load/store
155 def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
156 [llvm_ptr_ty, llvm_i32_ty],
157 [IntrReadArgMem]>;
158 def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
159 LLVMMatchType<0>],
160 [llvm_ptr_ty, llvm_i32_ty],
161 [IntrReadArgMem]>;
162 def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
163 LLVMMatchType<0>, LLVMMatchType<0>],
164 [llvm_ptr_ty, llvm_i32_ty],
165 [IntrReadArgMem]>;
166
167 def int_aarch64_neon_vst1x2 : Intrinsic<[],
168 [llvm_ptr_ty, llvm_anyvector_ty,
169 LLVMMatchType<0>, llvm_i32_ty],
170 [IntrReadWriteArgMem]>;
171 def int_aarch64_neon_vst1x3 : Intrinsic<[],
172 [llvm_ptr_ty, llvm_anyvector_ty,
173 LLVMMatchType<0>, LLVMMatchType<0>,
174 llvm_i32_ty], [IntrReadWriteArgMem]>;
175 def int_aarch64_neon_vst1x4 : Intrinsic<[],
176 [llvm_ptr_ty, llvm_anyvector_ty,
177 LLVMMatchType<0>, LLVMMatchType<0>,
178 LLVMMatchType<0>, llvm_i32_ty],
179 [IntrReadWriteArgMem]>;
180
181 // Scalar Add
182 def int_aarch64_neon_vaddds :
183 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
184 def int_aarch64_neon_vadddu :
185 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
186
187
188 // Scalar Sub
189 def int_aarch64_neon_vsubds :
190 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
191 def int_aarch64_neon_vsubdu :
192 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
193
194
195 // Scalar Shift
196 // Scalar Shift Left
197 def int_aarch64_neon_vshlds :
198 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
199 def int_aarch64_neon_vshldu :
200 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
201
202 // Scalar Saturating Shift Left
203 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
204 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
205
206 // Scalar Shift Rouding Left
207 def int_aarch64_neon_vrshlds :
208 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
209 def int_aarch64_neon_vrshldu :
210 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
211
212 // Scalar Saturating Rounding Shift Left
213 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
214 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
215
216 // Scalar Reduce Pairwise Add.
217 def int_aarch64_neon_vpadd :
218 Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
219 def int_aarch64_neon_vpfadd :
220 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
221
222 // Scalar Reduce Pairwise Floating Point Max/Min.
223 def int_aarch64_neon_vpmax :
224 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
225 def int_aarch64_neon_vpmin :
226 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
227
228 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
229 def int_aarch64_neon_vpfmaxnm :
230 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
231 def int_aarch64_neon_vpfminnm :
232 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
233
234 // Scalar Signed Integer Convert To Floating-point
235 def int_aarch64_neon_vcvtint2fps :
236 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
237
238 // Scalar Unsigned Integer Convert To Floating-point
239 def int_aarch64_neon_vcvtint2fpu :
240 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
241
242 // Scalar Floating-point Convert
243 def int_aarch64_neon_fcvtxn :
244 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
245 def int_aarch64_neon_fcvtns :
246 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
247 def int_aarch64_neon_fcvtnu :
248 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
249 def int_aarch64_neon_fcvtps :
250 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
251 def int_aarch64_neon_fcvtpu :
252 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
253 def int_aarch64_neon_fcvtms :
254 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
255 def int_aarch64_neon_fcvtmu :
256 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
257 def int_aarch64_neon_fcvtas :
258 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
259 def int_aarch64_neon_fcvtau :
260 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
261 def int_aarch64_neon_fcvtzs :
262 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
263 def int_aarch64_neon_fcvtzu :
264 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
265
266 // Scalar Floating-point Reciprocal Estimate.
267 def int_aarch64_neon_vrecpe :
268 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
269
270 // Scalar Floating-point Reciprocal Exponent
271 def int_aarch64_neon_vrecpx :
272 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
273
274 // Scalar Floating-point Reciprocal Square Root Estimate
275 def int_aarch64_neon_vrsqrte :
276 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
277
278 // Scalar Floating-point Reciprocal Step
279 def int_aarch64_neon_vrecps :
280 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
281 [IntrNoMem]>;
282
283 // Scalar Floating-point Reciprocal Square Root Step
284 def int_aarch64_neon_vrsqrts :
285 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
286 [IntrNoMem]>;
287
288 // Compare with vector operands.
289 class Neon_Cmp_Intrinsic :
290 Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
291 [IntrNoMem]>;
292
293 // Floating-point compare with scalar operands.
294 class Neon_Float_Cmp_Intrinsic :
295 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_anyfloat_ty],
296 [IntrNoMem]>;
297
298 // Scalar Compare Equal
299 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
300 def int_aarch64_neon_fceq : Neon_Float_Cmp_Intrinsic;
301
302 // Scalar Compare Greater-Than or Equal
303 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
304 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
305 def int_aarch64_neon_fcge : Neon_Float_Cmp_Intrinsic;
306 def int_aarch64_neon_fchs : Neon_Float_Cmp_Intrinsic;
307
308 // Scalar Compare Less-Than or Equal
309 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
310 def int_aarch64_neon_fclez : Neon_Float_Cmp_Intrinsic;
311
312 // Scalar Compare Less-Than
313 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
314 def int_aarch64_neon_fcltz : Neon_Float_Cmp_Intrinsic;
315
316 // Scalar Compare Greater-Than
317 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
318 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
319 def int_aarch64_neon_fcgt : Neon_Float_Cmp_Intrinsic;
320 def int_aarch64_neon_fchi : Neon_Float_Cmp_Intrinsic;
321
322 // Scalar Compare Bitwise Test Bits
323 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
324
325 // Scalar Floating-point Absolute Compare Greater Than Or Equal
326 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
327 def int_aarch64_neon_fcage : Neon_Float_Cmp_Intrinsic;
328
329 // Scalar Floating-point Absolute Compare Greater Than
330 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
331 def int_aarch64_neon_fcagt : Neon_Float_Cmp_Intrinsic;
332
333 // Scalar Signed Saturating Accumulated of Unsigned Value
334 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
335
336 // Scalar Unsigned Saturating Accumulated of Signed Value
337 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
338
339 // Scalar Absolute Value
340 def int_aarch64_neon_vabs :
341 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
342
343 // Scalar Absolute Difference
344 def int_aarch64_neon_vabd :
345 Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
346 [IntrNoMem]>;
347
348 // Scalar Negate Value
349 def int_aarch64_neon_vneg :
350 Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
351
352 // Signed Saturating Doubling Multiply-Add Long
353 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
354
355 // Signed Saturating Doubling Multiply-Subtract Long
356 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
357
358 def int_aarch64_neon_vmull_p64 :
359 Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
360
361 class Neon_2Arg_ShiftImm_Intrinsic
362 : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
363
364 class Neon_3Arg_ShiftImm_Intrinsic
365 : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
366 [IntrNoMem]>;
367
368 // Scalar Shift Right (Immediate)
369 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
370 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
371
372 // Scalar Shift Right and Accumulate (Immediate)
373 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
374 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
375
376 // Scalar Rounding Shift Right and Accumulate (Immediate)
377 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
378 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
379
380 // Scalar Shift Left (Immediate)
381 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
382
383 // Scalar Saturating Shift Left (Immediate)
384 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
385 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
386
387 // Scalar Signed Saturating Shift Left Unsigned (Immediate)
388 def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
389
390 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
391 def int_aarch64_neon_vcvtfxs2fp_n :
392 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
393
394 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
395 def int_aarch64_neon_vcvtfxu2fp_n :
396 Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>;
397
398 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
399 def int_aarch64_neon_vcvtfp2fxs_n :
400 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
401
402 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
403 def int_aarch64_neon_vcvtfp2fxu_n :
404 Intrinsic<[llvm_anyvector_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
405
406 }
+0
-46
lib/Target/AArch64/AArch64.h less more
None //==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in the LLVM
10 // AArch64 back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TARGET_AARCH64_H
15 #define LLVM_TARGET_AARCH64_H
16
17 #include "MCTargetDesc/AArch64MCTargetDesc.h"
18 #include "llvm/Target/TargetMachine.h"
19
20 namespace llvm {
21
22 class AArch64AsmPrinter;
23 class FunctionPass;
24 class AArch64TargetMachine;
25 class MachineInstr;
26 class MCInst;
27
28 FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
29 CodeGenOpt::Level OptLevel);
30
31 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
32
33 FunctionPass *createAArch64BranchFixupPass();
34
35 /// \brief Creates an AArch64-specific Target Transformation Info pass.
36 ImmutablePass *createAArch64TargetTransformInfoPass(
37 const AArch64TargetMachine *TM);
38
39 void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
40 AArch64AsmPrinter &AP);
41
42
43 }
44
45 #endif
+0
-83
lib/Target/AArch64/AArch64.td less more
None //===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is the top level entry point for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // Target-independent interfaces
15 //===----------------------------------------------------------------------===//
16
17 include "llvm/Target/Target.td"
18
19 //===----------------------------------------------------------------------===//
20 // AArch64 Subtarget features.
21 //
22
23 def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
24 "Enable ARMv8 FP">;
25
26 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
27 "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
28
29 def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
30 "Enable cryptographic instructions">;
31
32 //===----------------------------------------------------------------------===//
33 // AArch64 Processors
34 //
35
36 include "AArch64Schedule.td"
37
38 class ProcNoItin Features>
39 : Processor;
40
41 def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>;
42
43 def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
44 "Cortex-A53 ARM processors",
45 [FeatureFPARMv8,
46 FeatureNEON,
47 FeatureCrypto]>;
48
49 def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
50 "Cortex-A57 ARM processors",
51 [FeatureFPARMv8,
52 FeatureNEON,
53 FeatureCrypto]>;
54
55 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
56 def : Processor<"cortex-a57", NoItineraries, [ProcA57]>;
57
58 //===----------------------------------------------------------------------===//
59 // Register File Description
60 //===----------------------------------------------------------------------===//
61
62 include "AArch64RegisterInfo.td"
63
64 include "AArch64CallingConv.td"
65
66 //===----------------------------------------------------------------------===//
67 // Instruction Descriptions
68 //===----------------------------------------------------------------------===//
69
70 include "AArch64InstrInfo.td"
71
72 def AArch64InstrInfo : InstrInfo {
73 let noNamedPositionallyEncodedOperands = 1;
74 }
75
76 //===----------------------------------------------------------------------===//
77 // Declare the target which we are implementing
78 //===----------------------------------------------------------------------===//
79
80 def AArch64 : Target {
81 let InstructionSet = AArch64InstrInfo;
82 }
+0
-303
lib/Target/AArch64/AArch64AsmPrinter.cpp less more
None //===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to GAS-format AArch64 assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64AsmPrinter.h"
15 #include "InstPrinter/AArch64InstPrinter.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
18 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/Mangler.h"
21 #include "llvm/MC/MCAsmInfo.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCSymbol.h"
24 #include "llvm/Support/TargetRegistry.h"
25
26 using namespace llvm;
27
28 #define DEBUG_TYPE "asm-printer"
29
30 /// Try to print a floating-point register as if it belonged to a specified
31 /// register-class. For example the inline asm operand modifier "b" requires its
32 /// argument to be printed as "bN".
33 static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
34 const TargetRegisterInfo *TRI,
35 char RegType, raw_ostream &O) {
36 if (!MO.isReg())
37 return true;
38
39 for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
40 if (AArch64::FPR8RegClass.contains(*AR)) {
41 O << RegType << TRI->getEncodingValue(MO.getReg());
42 return false;
43 }
44 }
45
46 // The register doesn't correspond to anything floating-point like.
47 return true;
48 }
49
50 /// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
51 /// with the obvious type and an immediate 0 as either wzr or xzr.
52 static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
53 const TargetRegisterInfo *TRI,
54 const TargetRegisterClass &RegClass,
55 raw_ostream &O) {
56 char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
57
58 if (MO.isImm() && MO.getImm() == 0) {
59 O << Prefix << "zr";
60 return false;
61 } else if (MO.isReg()) {
62 if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
63 O << (Prefix == 'x' ? "sp" : "wsp");
64 return false;
65 }
66
67 for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
68 if (RegClass.contains(*AR)) {
69 O << AArch64InstPrinter::getRegisterName(*AR);
70 return false;
71 }
72 }
73 }
74
75 return true;
76 }
77
78 bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
79 bool PrintImmediatePrefix,
80 StringRef Suffix, raw_ostream &O) {
81 StringRef Name;
82 StringRef Modifier;
83 switch (MO.getType()) {
84 default:
85 return true;
86 case MachineOperand::MO_GlobalAddress:
87 Name = getSymbol(MO.getGlobal())->getName();
88
89 // Global variables may be accessed either via a GOT or in various fun and
90 // interesting TLS-model specific ways. Set the prefix modifier as
91 // appropriate here.
92 if (const GlobalVariable *GV = dyn_cast(MO.getGlobal())) {
93 Reloc::Model RelocM = TM.getRelocationModel();
94 if (GV->isThreadLocal()) {
95 switch (TM.getTLSModel(GV)) {
96 case TLSModel::GeneralDynamic:
97 Modifier = "tlsdesc";
98 break;
99 case TLSModel::LocalDynamic:
100 Modifier = "dtprel";
101 break;
102 case TLSModel::InitialExec:
103 Modifier = "gottprel";
104 break;
105 case TLSModel::LocalExec:
106 Modifier = "tprel";
107 break;
108 }
109 } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
110 Modifier = "got";
111 }
112 }
113 break;
114 case MachineOperand::MO_BlockAddress:
115 Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
116 break;
117 case MachineOperand::MO_ConstantPoolIndex:
118 Name = GetCPISymbol(MO.getIndex())->getName();
119 break;
120 }
121
122 // Some instructions (notably ADRP) don't take the # prefix for
123 // immediates. Only print it if asked to.
124 if (PrintImmediatePrefix)
125 O << '#';
126
127 // Only need the joining "_" if both the prefix and the suffix are
128 // non-null. This little block simply takes care of the four possibly
129 // combinations involved there.
130 if (Modifier == "" && Suffix == "")
131 O << Name;
132 else if (Modifier == "" && Suffix != "")
133 O << ":" << Suffix << ':' << Name;
134 else if (Modifier != "" && Suffix == "")
135 O << ":" << Modifier << ':' << Name;
136 else
137 O << ":" << Modifier << '_' << Suffix << ':' << Name;
138
139 return false;
140 }
141
142 bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
143 unsigned AsmVariant,
144 const char *ExtraCode, raw_ostream &O) {
145 const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
146
147 if (!ExtraCode)
148 ExtraCode = "";
149
150 switch(ExtraCode[0]) {
151 default:
152 if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
153 return false;
154 break;
155 case 'w':
156 // Output 32-bit general register operand, constant zero as wzr, or stack
157 // pointer as wsp. Ignored when used with other operand types.
158 if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
159 AArch64::GPR32RegClass, O))
160 return false;
161 break;
162 case 'x':
163 // Output 64-bit general register operand, constant zero as xzr, or stack
164 // pointer as sp. Ignored when used with other operand types.
165 if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
166 AArch64::GPR64RegClass, O))
167 return false;
168 break;
169 case 'H':
170 // Output higher numbered of a 64-bit general register pair
171 case 'Q':
172 // Output least significant register of a 64-bit general register pair
173 case 'R':
174 // Output most significant register of a 64-bit general register pair
175
176 // FIXME note: these three operand modifiers will require, to some extent,
177 // adding a paired GPR64 register class. Initial investigation suggests that
178 // assertions are hit unless it has a type and is made legal for that type
179 // in ISelLowering. After that step is made, the number of modifications
180 // needed explodes (operation legality, calling conventions, stores, reg
181 // copies ...).
182 llvm_unreachable("FIXME: Unimplemented register pairs");
183 case 'b':
184 case 'h':
185 case 's':
186 case 'd':
187 case 'q':
188 if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
189 ExtraCode[0], O))
190 return false;
191 break;
192 case 'A':
193 // Output symbolic address with appropriate relocation modifier (also
194 // suitable for ADRP).
195 if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
196 return false;
197 break;
198 case 'L':
199 // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
200 // modifier.
201 if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
202 return false;
203 break;
204 case 'G':
205 // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
206 // modifier (currently only for TLS local exec).
207 if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
208 return false;
209 break;
210 case 'a':
211 return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
212 }
213
214 // There's actually no operand modifier, which leads to a slightly eclectic
215 // set of behaviour which we have to handle here.
216 const MachineOperand &MO = MI->getOperand(OpNum);
217 switch (MO.getType()) {
218 default:
219 llvm_unreachable("Unexpected operand for inline assembly");
220 case MachineOperand::MO_Register:
221 // GCC prints the unmodified operand of a 'w' constraint as the vector
222 // register. Technically, we could allocate the argument as a VPR128, but
223 // that leads to extremely dodgy copies being generated to get the data
224 // there.
225 if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
226 O << AArch64InstPrinter::getRegisterName(MO.getReg());
227 break;
228 case MachineOperand::MO_Immediate:
229 O << '#' << MO.getImm();
230 break;
231 case MachineOperand::MO_FPImmediate:
232 assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
233 O << "#0.0";
234 break;
235 case MachineOperand::MO_BlockAddress:
236 case MachineOperand::MO_ConstantPoolIndex:
237 case MachineOperand::MO_GlobalAddress:
238 return printSymbolicAddress(MO, false, "", O);
239 }
240
241 return false;
242 }
243
244 bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
245 unsigned OpNum,
246 unsigned AsmVariant,
247 const char *ExtraCode,
248 raw_ostream &O) {
249 // Currently both the memory constraints (m and Q) behave the same and amount
250 // to the address as a single register. In future, we may allow "m" to provide
251 // both a base and an offset.
252 const MachineOperand &MO = MI->getOperand(OpNum);
253 assert(MO.isReg() && "unexpected inline assembly memory operand");
254 O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
255 return false;
256 }
257
258 #include "AArch64GenMCPseudoLowering.inc"
259
260 void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
261 // Do any auto-generated pseudo lowerings.
262 if (emitPseudoExpansionLowering(OutStreamer, MI))
263 return;
264
265 MCInst TmpInst;
266 LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
267 EmitToStreamer(OutStreamer, TmpInst);
268 }
269
270 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
271 if (Subtarget->isTargetELF()) {
272 const TargetLoweringObjectFileELF &TLOFELF =
273 static_cast(getObjFileLowering());
274
275 MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo();
276
277 // Output stubs for external and common global variables.
278 MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
279 if (!Stubs.empty()) {
280 OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
281 const DataLayout *TD = TM.getDataLayout();
282
283 for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
284 OutStreamer.EmitLabel(Stubs[i].first);
285 OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
286 TD->getPointerSize(0));
287 }
288 Stubs.clear();
289 }
290 }
291 }
292
293 bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
294 return AsmPrinter::runOnMachineFunction(MF);
295 }
296
297 // Force static initialization.
298 extern "C" void LLVMInitializeAArch64AsmPrinter() {
299 RegisterAsmPrinter X(TheAArch64leTarget);
300 RegisterAsmPrinter Y(TheAArch64beTarget);
301 }
302
+0
-76
lib/Target/AArch64/AArch64AsmPrinter.h less more
None // AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64 assembly printer class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_AARCH64ASMPRINTER_H
14 #define LLVM_AARCH64ASMPRINTER_H
15
16 #include "AArch64.h"
17 #include "AArch64TargetMachine.h"
18 #include "llvm/CodeGen/AsmPrinter.h"
19 #include "llvm/MC/MCStreamer.h"
20 #include "llvm/Support/Compiler.h"
21
22 namespace llvm {
23
24 class MCOperand;
25
26 class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
27
28 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
29 /// make the right decision when printing asm code for different targets.
30 const AArch64Subtarget *Subtarget;
31
32 // emitPseudoExpansionLowering - tblgen'erated.
33 bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
34 const MachineInstr *MI);
35
36 public:
37 explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
38 : AsmPrinter(TM, Streamer) {
39 Subtarget = &TM.getSubtarget();
40 }
41
42 bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
43
44 MCOperand lowerSymbolOperand(const MachineOperand &MO,
45 const MCSymbol *Sym) const;
46
47 void EmitInstruction(const MachineInstr *MI) override;
48 void EmitEndOfAsmFile(Module &M) override;
49
50 bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
51 unsigned AsmVariant, const char *ExtraCode,
52 raw_ostream &O) override;
53 bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
54 unsigned AsmVariant, const char *ExtraCode,
55 raw_ostream &O) override;
56
57 /// printSymbolicAddress - Given some kind of reasonably bare symbolic
58 /// reference, print out the appropriate asm string to represent it. If
59 /// appropriate, a relocation-specifier will be produced, composed of a
60 /// general class derived from the MO parameter and an instruction-specific
61 /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
62 /// given.
63 bool printSymbolicAddress(const MachineOperand &MO,
64 bool PrintImmediatePrefix,
65 StringRef Suffix, raw_ostream &O);
66
67 const char *getPassName() const override {
68 return "AArch64 Assembly Printer";
69 }
70
71 bool runOnMachineFunction(MachineFunction &MF) override;
72 };
73 } // end namespace llvm
74
75 #endif
+0
-601
lib/Target/AArch64/AArch64BranchFixupPass.cpp less more
None //===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that fixes AArch64 branches which have ended up out
10 // of range for their immediate operands.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64.h"
15 #include "AArch64InstrInfo.h"
16 #include "Utils/AArch64BaseInfo.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Format.h"
23 #include "llvm/Support/raw_ostream.h"
24 using namespace llvm;
25
26 #define DEBUG_TYPE "aarch64-branch-fixup"
27
28 STATISTIC(NumSplit, "Number of uncond branches inserted");
29 STATISTIC(NumCBrFixed, "Number of cond branches fixed");
30
31 /// Return the worst case padding that could result from unknown offset bits.
32 /// This does not include alignment padding caused by known offset bits.
33 ///
34 /// @param LogAlign log2(alignment)
35 /// @param KnownBits Number of known low offset bits.
36 static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
37 if (KnownBits < LogAlign)
38 return (1u << LogAlign) - (1u << KnownBits);
39 return 0;
40 }
41
42 namespace {
43 /// Due to limited PC-relative displacements, conditional branches to distant
44 /// blocks may need converting into an unconditional equivalent. For example:
45 /// tbz w1, #0, far_away
46 /// becomes
47 /// tbnz w1, #0, skip
48 /// b far_away
49 /// skip:
50 class AArch64BranchFixup : public MachineFunctionPass {
51 /// Information about the offset and size of a single basic block.
52 struct BasicBlockInfo {
53 /// Distance from the beginning of the function to the beginning of this
54 /// basic block.
55 ///
56 /// Offsets are computed assuming worst case padding before an aligned
57 /// block. This means that subtracting basic block offsets always gives a
58 /// conservative estimate of the real distance which may be smaller.
59 ///
60 /// Because worst case padding is used, the computed offset of an aligned
61 /// block may not actually be aligned.
62 unsigned Offset;
63
64 /// Size of the basic block in bytes. If the block contains inline
65 /// assembly, this is a worst case estimate.
66 ///
67 /// The size does not include any alignment padding whether from the
68 /// beginning of the block, or from an aligned jump table at the end.
69 unsigned Size;
70
71 /// The number of low bits in Offset that are known to be exact. The
72 /// remaining bits of Offset are an upper bound.
73 uint8_t KnownBits;
74
75 /// When non-zero, the block contains instructions (inline asm) of unknown
76 /// size. The real size may be smaller than Size bytes by a multiple of 1
77 /// << Unalign.
78 uint8_t Unalign;
79
80 BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
81
82 /// Compute the number of known offset bits internally to this block.
83 /// This number should be used to predict worst case padding when
84 /// splitting the block.
85 unsigned internalKnownBits() const {
86 unsigned Bits = Unalign ? Unalign : KnownBits;
87 // If the block size isn't a multiple of the known bits, assume the
88 // worst case padding.
89 if (Size & ((1u << Bits) - 1))
90 Bits = countTrailingZeros(Size);
91 return Bits;
92 }
93
94 /// Compute the offset immediately following this block. If LogAlign is
95 /// specified, return the offset the successor block will get if it has
96 /// this alignment.
97 unsigned postOffset(unsigned LogAlign = 0) const {
98 unsigned PO = Offset + Size;
99 if (!LogAlign)
100 return PO;
101 // Add alignment padding from the terminator.
102 return PO + UnknownPadding(LogAlign, internalKnownBits());
103 }
104
105 /// Compute the number of known low bits of postOffset. If this block
106 /// contains inline asm, the number of known bits drops to the
107 /// instruction alignment. An aligned terminator may increase the number
108 /// of know bits.
109 /// If LogAlign is given, also consider the alignment of the next block.
110 unsigned postKnownBits(unsigned LogAlign = 0) const {
111 return std::max(LogAlign, internalKnownBits());
112 }
113 };
114
115 std::vector BBInfo;
116
117 /// One per immediate branch, keeping the machine instruction pointer,
118 /// conditional or unconditional, the max displacement, and (if IsCond is
119 /// true) the corresponding inverted branch opcode.
120 struct ImmBranch {
121 MachineInstr *MI;
122 unsigned OffsetBits : 31;
123 bool IsCond : 1;
124 ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
125 : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
126 };
127
128 /// Keep track of all the immediate branch instructions.
129 ///
130 std::vector ImmBranches;
131
132 MachineFunction *MF;
133 const AArch64InstrInfo *TII;
134 public:
135 static char ID;
136 AArch64BranchFixup() : MachineFunctionPass(ID) {}
137
138 bool runOnMachineFunction(MachineFunction &MF) override;
139
140 const char *getPassName() const override {
141 return "AArch64 branch fixup pass";
142 }
143
144 private:
145 void initializeFunctionInfo();
146 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
147 void adjustBBOffsetsAfter(MachineBasicBlock *BB);
148 bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
149 unsigned OffsetBits);
150 bool fixupImmediateBr(ImmBranch &Br);
151 bool fixupConditionalBr(ImmBranch &Br);
152
153 void computeBlockSize(MachineBasicBlock *MBB);
154 unsigned getOffsetOf(MachineInstr *MI) const;
155 void dumpBBs();
156 void verify();
157 };
158 char AArch64BranchFixup::ID = 0;
159 }
160
161 /// check BBOffsets
162 void AArch64BranchFixup::verify() {
163 #ifndef NDEBUG
164 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
165 MBBI != E; ++MBBI) {
166 MachineBasicBlock *MBB = MBBI;
167 unsigned MBBId = MBB->getNumber();
168 assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
169 }
170 #endif
171 }
172
173 /// print block size and offset information - debugging
174 void AArch64BranchFixup::dumpBBs() {
175 DEBUG({
176 for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
177 const BasicBlockInfo &BBI = BBInfo[J];
178 dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
179 << " kb=" << unsigned(BBI.KnownBits)
180 << " ua=" << unsigned(BBI.Unalign)
181 << format(" size=%#x\n", BBInfo[J].Size);
182 }
183 });
184 }
185
186 /// Returns an instance of the branch fixup pass.
187 FunctionPass *llvm::createAArch64BranchFixupPass() {
188 return new AArch64BranchFixup();
189 }
190
191 bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
192 MF = &mf;
193 DEBUG(dbgs() << "***** AArch64BranchFixup ******");
194 TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
195
196 // This pass invalidates liveness information when it splits basic blocks.
197 MF->getRegInfo().invalidateLiveness();
198
199 // Renumber all of the machine basic blocks in the function, guaranteeing that
200 // the numbers agree with the position of the block in the function.
201 MF->RenumberBlocks();
202
203 // Do the initial scan of the function, building up information about the
204 // sizes of each block and location of each immediate branch.
205 initializeFunctionInfo();
206
207 // Iteratively fix up branches until there is no change.
208 unsigned NoBRIters = 0;
209 bool MadeChange = false;
210 while (true) {
211 DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
212 bool BRChange = false;
213 for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
214 BRChange |= fixupImmediateBr(ImmBranches[i]);
215 if (BRChange && ++NoBRIters > 30)
216 report_fatal_error("Branch Fix Up pass failed to converge!");
217 DEBUG(dumpBBs());
218
219 if (!BRChange)
220 break;
221 MadeChange = true;
222 }
223
224 // After a while, this might be made debug-only, but it is not expensive.
225 verify();
226
227 DEBUG(dbgs() << '\n'; dumpBBs());
228
229 BBInfo.clear();
230 ImmBranches.clear();
231
232 return MadeChange;
233 }
234
235 /// Return true if the specified basic block can fallthrough into the block
236 /// immediately after it.
237 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
238 // Get the next machine basic block in the function.
239 MachineFunction::iterator MBBI = MBB;
240 // Can't fall off end of function.
241 if (std::next(MBBI) == MBB->getParent()->end())
242 return false;
243
244 MachineBasicBlock *NextBB = std::next(MBBI);
245 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
246 E = MBB->succ_end(); I != E; ++I)
247 if (*I == NextBB)
248 return true;
249
250 return false;
251 }
252
253 /// Do the initial scan of the function, building up information about the sizes
254 /// of each block, and each immediate branch.
255 void AArch64BranchFixup::initializeFunctionInfo() {
256 BBInfo.clear();
257 BBInfo.resize(MF->getNumBlockIDs());
258
259 // First thing, compute the size of all basic blocks, and see if the function
260 // has any inline assembly in it. If so, we have to be conservative about
261 // alignment assumptions, as we don't know for sure the size of any
262 // instructions in the inline assembly.
263 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
264 computeBlockSize(I);
265
266 // The known bits of the entry block offset are determined by the function
267 // alignment.
268 BBInfo.front().KnownBits = MF->getAlignment();
269
270 // Compute block offsets and known bits.
271 adjustBBOffsetsAfter(MF->begin());
272
273 // Now go back through the instructions and build up our data structures.
274 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
275 MBBI != E; ++MBBI) {
276 MachineBasicBlock &MBB = *MBBI;
277
278 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
279 I != E; ++I) {
280 if (I->isDebugValue())
281 continue;
282
283 int Opc = I->getOpcode();
284 if (I->isBranch()) {
285 bool IsCond = false;
286
287 // The offsets encoded in instructions here scale by the instruction
288 // size (4 bytes), effectively increasing their range by 2 bits.
289 unsigned Bits = 0;
290 switch (Opc) {
291 default:
292 continue; // Ignore other JT branches
293 case AArch64::TBZxii:
294 case AArch64::TBZwii:
295 case AArch64::TBNZxii:
296 case AArch64::TBNZwii:
297 IsCond = true;
298 Bits = 14 + 2;
299 break;
300 case AArch64::Bcc:
301 case AArch64::CBZx:
302 case AArch64::CBZw:
303 case AArch64::CBNZx:
304 case AArch64::CBNZw:
305 IsCond = true;
306 Bits = 19 + 2;
307 break;
308 case AArch64::Bimm:
309 Bits = 26 + 2;
310 break;
311 }
312
313 // Record this immediate branch.
314 ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
315 }
316 }
317 }
318 }
319
320 /// Compute the size and some alignment information for MBB. This function
321 /// updates BBInfo directly.
322 void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
323 BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
324 BBI.Size = 0;
325 BBI.Unalign = 0;
326
327 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
328 ++I) {
329 BBI.Size += TII->getInstSizeInBytes(*I);
330 // For inline asm, GetInstSizeInBytes returns a conservative estimate.
331 // The actual size may be smaller, but still a multiple of the instr size.
332 if (I->isInlineAsm())
333 BBI.Unalign = 2;
334 }
335 }
336
337 /// Return the current offset of the specified machine instruction from the
338 /// start of the function. This offset changes as stuff is moved around inside
339 /// the function.
340 unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
341 MachineBasicBlock *MBB = MI->getParent();
342
343 // The offset is composed of two things: the sum of the sizes of all MBB's
344 // before this instruction's block, and the offset from the start of the block
345 // it is in.
346 unsigned Offset = BBInfo[MBB->getNumber()].Offset;
347
348 // Sum instructions before MI in MBB.
349 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
350 assert(I != MBB->end() && "Didn't find MI in its own basic block?");
351 Offset += TII->getInstSizeInBytes(*I);
352 }
353 return Offset;
354 }
355
356 /// Split the basic block containing MI into two blocks, which are joined by
357 /// an unconditional branch. Update data structures and renumber blocks to
358 /// account for this change and returns the newly created block.
359 MachineBasicBlock *
360 AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
361 MachineBasicBlock *OrigBB = MI->getParent();
362
363 // Create a new MBB for the code after the OrigBB.
364 MachineBasicBlock *NewBB =
365 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
366 MachineFunction::iterator MBBI = OrigBB; ++MBBI;
367 MF->insert(MBBI, NewBB);
368
369 // Splice the instructions starting with MI over to NewBB.
370 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
371
372 // Add an unconditional branch from OrigBB to NewBB.
373 // Note the new unconditional branch is not being recorded.
374 // There doesn't seem to be meaningful DebugInfo available; this doesn't
375 // correspond to anything in the source.
376 BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
377 ++NumSplit;
378
379 // Update the CFG. All succs of OrigBB are now succs of NewBB.
380 NewBB->transferSuccessors(OrigBB);
381
382 // OrigBB branches to NewBB.
383 OrigBB->addSuccessor(NewBB);
384
385 // Update internal data structures to account for the newly inserted MBB.
386 MF->RenumberBlocks(NewBB);
387
388 // Insert an entry into BBInfo to align it properly with the (newly
389 // renumbered) block numbers.
390 BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
391
392 // Figure out how large the OrigBB is. As the first half of the original
393 // block, it cannot contain a tablejump. The size includes
394 // the new jump we added. (It should be possible to do this without
395 // recounting everything, but it's very confusing, and this is rarely
396 // executed.)
397 computeBlockSize(OrigBB);
398
399 // Figure out how large the NewMBB is. As the second half of the original
400 // block, it may contain a tablejump.
401 computeBlockSize(NewBB);
402
403 // All BBOffsets following these blocks must be modified.
404 adjustBBOffsetsAfter(OrigBB);
405
406 return NewBB;
407 }
408
409 void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
410 unsigned BBNum = BB->getNumber();
411 for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
412 // Get the offset and known bits at the end of the layout predecessor.
413 // Include the alignment of the current block.
414 unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
415 unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
416 unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
417
418 // This is where block i begins. Stop if the offset is already correct,
419 // and we have updated 2 blocks. This is the maximum number of blocks
420 // changed before calling this function.
421 if (i > BBNum + 2 &&
422 BBInfo[i].Offset == Offset &&
423 BBInfo[i].KnownBits == KnownBits)
424 break;
425
426 BBInfo[i].Offset = Offset;
427 BBInfo[i].KnownBits = KnownBits;
428 }
429 }
430
431 /// Returns true if the distance between specific MI and specific BB can fit in
432 /// MI's displacement field.
433 bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
434 MachineBasicBlock *DestBB,
435 unsigned OffsetBits) {
436 int64_t BrOffset = getOffsetOf(MI);
437 int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
438
439 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
440 << " from BB#" << MI->getParent()->getNumber()
441 << " bits available=" << OffsetBits
442 << " from " << getOffsetOf(MI) << " to " << DestOffset
443 << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
444
445 return isIntN(OffsetBits, DestOffset - BrOffset);
446 }
447
448 /// Fix up an immediate branch whose destination is too far away to fit in its
449 /// displacement field.
450 bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
451 MachineInstr *MI = Br.MI;
452 MachineBasicBlock *DestBB = nullptr;
453 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
454 if (MI->getOperand(i).isMBB()) {
455 DestBB = MI->getOperand(i).getMBB();
456 break;
457 }
458 }
459 assert(DestBB && "Branch with no destination BB?");
460
461 // Check to see if the DestBB is already in-range.
462 if (isBBInRange(MI, DestBB, Br.OffsetBits))
463 return false;
464
465 assert(Br.IsCond && "Only conditional branches should need fixup");
466 return fixupConditionalBr(Br);
467 }
468
469 /// Fix up a conditional branch whose destination is too far away to fit in its
470 /// displacement field. It is converted to an inverse conditional branch + an
471 /// unconditional branch to the destination.
472 bool
473 AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
474 MachineInstr *MI = Br.MI;
475 MachineBasicBlock *MBB = MI->getParent();
476 unsigned CondBrMBBOperand = 0;
477
478 // The general idea is to add an unconditional branch to the destination and
479 // invert the conditional branch to jump over it. Complications occur around
480 // fallthrough and unreachable ends to the block.
481 // b.lt L1
482 // =>
483 // b.ge L2
484 // b L1
485 // L2:
486
487 // First we invert the conditional branch, by creating a replacement if
488 // necessary. This if statement contains all the special handling of different
489 // branch types.
490 if (MI->getOpcode() == AArch64::Bcc) {
491 // The basic block is operand number 1 for Bcc
492 CondBrMBBOperand = 1;
493
494 A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
495 CC = A64InvertCondCode(CC);
496 MI->getOperand(0).setImm(CC);
497 } else {
498 MachineInstrBuilder InvertedMI;
499 int InvertedOpcode;
500 switch (MI->getOpcode()) {
501 default: llvm_unreachable("Unknown branch type");
502 case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
503 case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
504 case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
505 case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
506 case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
507 case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
508 case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
509 case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
510 }
511
512 InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
513 for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
514 InvertedMI.addOperand(MI->getOperand(i));
515 if (MI->getOperand(i).isMBB())
516 CondBrMBBOperand = i;
517 }
518
519 MI->eraseFromParent();
520 MI = Br.MI = InvertedMI;
521 }
522
523 // If the branch is at the end of its MBB and that has a fall-through block,
524 // direct the updated conditional branch to the fall-through
525 // block. Otherwise, split the MBB before the next instruction.
526 MachineInstr *BMI = &MBB->back();
527 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
528
529 ++NumCBrFixed;
530 if (BMI != MI) {
531 if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
532 BMI->getOpcode() == AArch64::Bimm) {
533 // Last MI in the BB is an unconditional branch. We can swap destinations:
534 // b.eq L1 (temporarily b.ne L1 after first change)
535 // b L2
536 // =>
537 // b.ne L2
538 // b L1
539 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
540 if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
541 DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
542 << *BMI);
543 MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
544 BMI->getOperand(0).setMBB(DestBB);
545 MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
546 return true;
547 }
548 }
549 }
550
551 if (NeedSplit) {
552 MachineBasicBlock::iterator MBBI = MI; ++MBBI;
553 splitBlockBeforeInstr(MBBI);
554 // No need for the branch to the next block. We're adding an unconditional
555 // branch to the destination.
556 int delta = TII->getInstSizeInBytes(MBB->back());
557 BBInfo[MBB->getNumber()].Size -= delta;
558 MBB->back().eraseFromParent();
559 // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
560 }
561
562 // After splitting and removing the unconditional branch from the original BB,
563 // the structure is now:
564 // oldbb:
565 // [things]
566 // b.invertedCC L1
567 // splitbb/fallthroughbb:
568 // [old b L2/real continuation]
569 //
570 // We now have to change the conditional branch to point to splitbb and add an
571 // unconditional branch after it to L1, giving the final structure:
572 // oldbb:
573 // [things]
574 // b.invertedCC splitbb
575 // b L1
576 // splitbb/fallthroughbb:
577 // [old b L2/real continuation]
578 MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
579
580 DEBUG(dbgs() << " Insert B to BB#"
581 << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
582 << " also invert condition and change dest. to BB#"
583 << NextBB->getNumber() << "\n");
584
585 // Insert a new unconditional branch and fixup the destination of the
586 // conditional one. Also update the ImmBranch as well as adding a new entry
587 // for the new branch.
588 BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
589 .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
590 MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
591
592 BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
593
594 // 26 bits written down in Bimm, specifying a multiple of 4.
595 unsigned OffsetBits = 26 + 2;
596 ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
597
598 adjustBBOffsetsAfter(MBB);
599 return true;
600 }
+0
-197
lib/Target/AArch64/AArch64CallingConv.td less more
None //==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This describes the calling conventions for AArch64 architecture.
9 //===----------------------------------------------------------------------===//
10
11
12 // The AArch64 Procedure Call Standard is unfortunately specified at a slightly
13 // higher level of abstraction than LLVM's target interface presents. In
14 // particular, it refers (like other ABIs, in fact) directly to
15 // structs. However, generic LLVM code takes the liberty of lowering structure
16 // arguments to the component fields before we see them.
17 //
18 // As a result, the obvious direct map from LLVM IR to PCS concepts can't be
19 // implemented, so the goals of this calling convention are, in decreasing
20 // priority order:
21 // 1. Expose *some* way to express the concepts required to implement the
22 // generic PCS from a front-end.
23 // 2. Provide a sane ABI for pure LLVM.
24 // 3. Follow the generic PCS as closely as is naturally possible.
25 //
26 // The suggested front-end implementation of PCS features is:
27 // * Integer, float and vector arguments of all sizes which end up in
28 // registers are passed and returned via the natural LLVM type.
29 // * Structure arguments with size <= 16 bytes are passed and returned in
30 // registers as similar integer or composite types. For example:
31 // [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
32 // * HFAs in registers follow rules similar to small structs: appropriate
33 // composite types.
34 // * Structure arguments with size > 16 bytes are passed via a pointer,
35 // handled completely by the front-end.
36 // * Structure return values > 16 bytes via an sret pointer argument.
37 // * Other stack-based arguments (not large structs) are passed using byval
38 // pointers. Padding arguments are added beforehand to guarantee a large
39 // struct doesn't later use integer registers.
40 //
41 // N.b. this means that it is the front-end's responsibility (if it cares about
42 // PCS compliance) to check whether enough registers are available for an
43 // argument when deciding how to pass it.
44
45 class CCIfAlign:
46 CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
47
48 def CC_A64_APCS : CallingConv<[
49 // SRet is an LLVM-specific concept, so it takes precedence over general ABI
50 // concerns. However, this rule will be used by C/C++ frontends to implement
51 // structure return.
52 CCIfSRet>,
53
54 // Put ByVal arguments directly on the stack. Minimum size and alignment of a
55 // slot is 64-bit.
56 CCIfByVal>,
57
58 // Canonicalise the various types that live in different floating-point
59 // registers. This makes sense because the PCS does not distinguish Short
60 // Vectors and Floating-point types.
61 CCIfType<[v1i16, v2i8], CCBitConvertToType>,
62 CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType>,
63 CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>,
64 CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
65 CCBitConvertToType>,
66
67 // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
68 // Floating-point or Short Vector Type and the NSRN is less than 8, then the
69 // argument is allocated to the least significant bits of register
70 // v[NSRN]. The NSRN is incremented by one. The argument has now been
71 // allocated."
72 CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
73 CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
74 CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
75 CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
76 CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
77
78 // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
79 // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
80 // argument is allocated to SIMD and Floating-point registers (with one
81 // register per element of the HFA). The NSRN is incremented by the number of
82 // registers used. The argument has now been allocated."
83 //
84 // N.b. As above, this rule is the responsibility of the front-end.
85
86 // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
87 // the argument is rounded up to the nearest multiple of 8 bytes."
88 //
89 // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
90 // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
91 // Alignment of the Argument's type."
92 //
93 // It is expected that these will be satisfied by adding dummy arguments to
94 // the prototype.
95
96 // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
97 // type then the size of the argument is set to 8 bytes. The effect is as if
98 // the argument had been copied to the least significant bits of a 64-bit
99 // register and the remaining bits filled with unspecified values."
100 CCIfType<[f16, f32], CCPromoteToType>,
101
102 // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
103 // precision Floating-point or Short Vector Type, then the argument is copied
104 // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
105 // argument. The argument has now been allocated."
106 CCIfType<[f64], CCAssignToStack<8, 8>>,
107 CCIfType<[f128], CCAssignToStack<16, 16>>,
108
109 // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
110 // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
111 // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
112 // one. The argument has now been allocated."
113
114 // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
115 // represented as two i64s, the first one being split. If we delayed this
116 // operation C.8 would never be reached.
117 CCIfType<[i64],
118 CCIfSplit>>,
119
120 // Note: the promotion also implements C.14.
121 CCIfType<[i8, i16, i32], CCPromoteToType>,
122
123 // And now the real implementation of C.7
124 CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
125
126 // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
127 // up to the next even number."
128 //
129 // "C.9: If the argument is an Integral Type, the size of the argument is
130 // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
131 // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
132 // memory representation of the argument. The NGRN is incremented by two. The
133 // argument has now been allocated."
134 //
135 // Subtlety here: what if alignment is 16 but it is not an integral type? All
136 // floating-point types have been allocated already, which leaves composite
137 // types: this is why a front-end may need to produce i128 for a struct <= 16
138 // bytes.
139
140 // PCS: "C.10 If the argument is a Composite Type and the size in double-words
141 // of the argument is not more than 8 minus NGRN, then the argument is copied
142 // into consecutive general-purpose registers, starting at x[NGRN]. The
143 // argument is passed as though it had been loaded into the registers from a
144 // double-word aligned address with an appropriate sequence of LDR
145 // instructions loading consecutive registers from memory (the contents of any
146 // unused parts of the registers are unspecified by this standard). The NGRN
147 // is incremented by the number of registers used. The argument has now been
148 // allocated."
149 //
150 // Another one that's the responsibility of the front-end (sigh).
151
152 // PCS: "C.11: The NGRN is set to 8."
153 CCCustom<"CC_AArch64NoMoreRegs">,
154
155 // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
156 // Alignment of the argument's type."
157 //
158 // PCS: "C.13: If the argument is a composite type then the argument is copied
159 // to memory at the adjusted NSAA. The NSAA is by the size of the
160 // argument. The argument has now been allocated."
161 //
162 // Note that the effect of this corresponds to a memcpy rather than register
163 // stores so that the struct ends up correctly addressable at the adjusted
164 // NSAA.
165
166 // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
167 // of the argument is set to 8 bytes. The effect is as if the argument was
168 // copied to the least significant bits of a 64-bit register and the remaining
169 // bits filled with unspecified values."
170 //
171 // Integer types were widened above. Floating-point and composite types have
172 // already been allocated completely. Nothing to do.
173
174 // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
175 // is incremented by the size of the argument. The argument has now been
176 // allocated."
177 CCIfType<[i64], CCIfSplit>>,
178 CCIfType<[i64], CCAssignToStack<8, 8>>
179
180 ]>;
181
182 // According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
183 // of vector registers (8-15) are callee-saved. The order here is is picked up
184 // by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
185 // stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
186 // [sp-16], ...
187 def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
188 (sequence "D%u", 15, 8))>;
189
190
191 // TLS descriptor calls are extremely restricted in their changes, to allow
192 // optimisations in the (hopefully) more common fast path where no real action
193 // is needed. They actually have to preserve all registers, except for the
194 // unavoidable X30 and the return register X0.
195 def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
196 (sequence "Q%u", 31, 0))>;
+0
-626
lib/Target/AArch64/AArch64FrameLowering.cpp less more
None //===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64.h"
14 #include "AArch64FrameLowering.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineMemOperand.h"
21 #include "llvm/CodeGen/MachineModuleInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/MC/MachineLocation.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/ErrorHandling.h"
28
29 using namespace llvm;
30
31 void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
32 uint64_t &Initial,
33 uint64_t &Residual) const {
34 // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
35 // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
36 // 0x1f8, but stack adjustment should always be a multiple of 16.
37 if (Total <= 0x1f0) {
38 Initial = Total;
39 Residual = 0;
40 } else {
41 Initial = 0x1f0;
42 Residual = Total - Initial;
43 }
44 }
45
46 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
47 AArch64MachineFunctionInfo *FuncInfo =
48 MF.getInfo();
49 MachineBasicBlock &MBB = MF.front();
50 MachineBasicBlock::iterator MBBI = MBB.begin();
51 MachineFrameInfo *MFI = MF.getFrameInfo();
52 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
53 DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
54
55 MachineModuleInfo &MMI = MF.getMMI();
56 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
57 bool NeedsFrameMoves = MMI.hasDebugInfo()
58 || MF.getFunction()->needsUnwindTableEntry();
59
60 uint64_t NumInitialBytes, NumResidualBytes;
61
62 // Currently we expect the stack to be laid out by
63 // sub sp, sp, #initial
64 // stp x29, x30, [sp, #offset]
65 // ...
66 // str xxx, [sp, #offset]
67 // sub sp, sp, #rest (possibly via extra instructions).
68 if (MFI->getCalleeSavedInfo().size()) {
69 // If there are callee-saved registers, we want to store them efficiently as
70 // a block, and virtual base assignment happens too early to do it for us so
71 // we adjust the stack in two phases: first just for callee-saved fiddling,
72 // then to allocate the rest of the frame.
73 splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
74 } else {
75 // If there aren't any callee-saved registers, two-phase adjustment is
76 // inefficient. It's more efficient to adjust with NumInitialBytes too
77 // because when we're in a "callee pops argument space" situation, that pop
78 // must be tacked onto Initial for correctness.
79 NumInitialBytes = MFI->getStackSize();
80 NumResidualBytes = 0;
81 }
82
83 // Tell everyone else how much adjustment we're expecting them to use. In
84 // particular if an adjustment is required for a tail call the epilogue could
85 // have a different view of things.
86 FuncInfo->setInitialStackAdjust(NumInitialBytes);
87
88 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
89 MachineInstr::FrameSetup);
90
91 if (NeedsFrameMoves && NumInitialBytes) {
92 // We emit this update even if the CFA is set from a frame pointer later so
93 // that the CFA is valid in the interim.
94 MachineLocation Dst(MachineLocation::VirtualFP);
95 unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
96 unsigned CFIIndex = MMI.addFrameInst(
97 MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes));
98 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
99 .addCFIIndex(CFIIndex);
100 }
101
102 // Otherwise we need to set the frame pointer and/or add a second stack
103 // adjustment.
104
105 bool FPNeedsSetting = hasFP(MF);
106 for (; MBBI != MBB.end(); ++MBBI) {
107 // Note that this search makes strong assumptions about the operation used
108 // to store the frame-pointer: it must be "STP x29, x30, ...". This could
109 // change in future, but until then there's no point in implementing
110 // untestable more generic cases.
111 if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
112 && MBBI->getOperand(0).getReg() == AArch64::X29) {
113 int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
114 FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
115
116 ++MBBI;
117 emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
118 AArch64::X29,
119 NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
120 MachineInstr::FrameSetup);
121
122 // The offset adjustment used when emitting debugging locations relative
123 // to whatever frame base is set. AArch64 uses the default frame base (FP
124 // or SP) and this adjusts the calculations to be correct.
125 MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
126 - MFI->getStackSize());
127
128 if (NeedsFrameMoves) {
129 unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true);
130 unsigned Offset = MFI->getObjectOffset(X29FrameIdx);
131 unsigned CFIIndex = MMI.addFrameInst(
132 MCCFIInstruction::createDefCfa(nullptr, Reg, Offset));
133 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
134 .addCFIIndex(CFIIndex);
135 }
136
137 FPNeedsSetting = false;
138 }
139
140 if (!MBBI->getFlag(MachineInstr::FrameSetup))
141 break;
142 }
143
144 assert(!FPNeedsSetting && "Frame pointer couldn't be set");
145
146 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
147 MachineInstr::FrameSetup);
148
149 // Now we emit the rest of the frame setup information, if necessary: we've
150 // already noted the FP and initial SP moves so we're left with the prologue's
151 // final SP update and callee-saved register locations.
152 if (!NeedsFrameMoves)
153 return;
154
155 // The rest of the stack adjustment
156 if (!hasFP(MF) && NumResidualBytes) {
157 MachineLocation Dst(MachineLocation::VirtualFP);
158 unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
159 unsigned Offset = NumResidualBytes + NumInitialBytes;
160 unsigned CFIIndex =
161 MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
162 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
163 .addCFIIndex(CFIIndex);
164 }
165
166 // And any callee-saved registers (it's fine to leave them to the end here,
167 // because the old values are still valid at this point.
168 const std::vector &CSI = MFI->getCalleeSavedInfo();
169 if (CSI.size()) {
170 for (std::vector::const_iterator I = CSI.begin(),
171 E = CSI.end(); I != E; ++I) {
172 unsigned Offset = MFI->getObjectOffset(I->getFrameIdx());
173 unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true);
174 unsigned CFIIndex = MMI.addFrameInst(
175 MCCFIInstruction::createOffset(nullptr, Reg, Offset));
176 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
177 .addCFIIndex(CFIIndex);
178 }
179 }
180 }
181
182 void
183 AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
184 MachineBasicBlock &MBB) const {
185 AArch64MachineFunctionInfo *FuncInfo =
186 MF.getInfo();
187
188 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
189 DebugLoc DL = MBBI->getDebugLoc();
190 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
191 MachineFrameInfo &MFI = *MF.getFrameInfo();
192 unsigned RetOpcode = MBBI->getOpcode();
193
194 // Initial and residual are named for consitency with the prologue. Note that
195 // in the epilogue, the residual adjustment is executed first.
196 uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
197 uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
198 uint64_t ArgumentPopSize = 0;
199 if (RetOpcode == AArch64::TC_RETURNdi ||
200 RetOpcode == AArch64::TC_RETURNxi) {
201 MachineOperand &JumpTarget = MBBI->getOperand(0);
202 MachineOperand &StackAdjust = MBBI->getOperand(1);
203
204 MachineInstrBuilder MIB;
205 if (RetOpcode == AArch64::TC_RETURNdi) {
206 MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
207 if (JumpTarget.isGlobal()) {
208 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
209 JumpTarget.getTargetFlags());
210 } else {
211 assert(JumpTarget.isSymbol() && "unexpected tail call destination");
212 MIB.addExternalSymbol(JumpTarget.getSymbolName(),
213 JumpTarget.getTargetFlags());
214 }
215 } else {
216 assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
217 && "Unexpected tail call");
218
219 MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
220 MIB.addReg(JumpTarget.getReg(), RegState::Kill);
221 }
222
223 // Add the extra operands onto the new tail call instruction even though
224 // they're not used directly (so that liveness is tracked properly etc).
225 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
226 MIB->addOperand(MBBI->getOperand(i));
227
228
229 // Delete the pseudo instruction TC_RETURN.
230 MachineInstr *NewMI = std::prev(MBBI);
231 MBB.erase(MBBI);
232 MBBI = NewMI;
233
234 // For a tail-call in a callee-pops-arguments environment, some or all of
235 // the stack may actually be in use for the call's arguments, this is
236 // calculated during LowerCall and consumed here...
237 ArgumentPopSize = StackAdjust.getImm();
238 } else {
239 // ... otherwise the amount to pop is *all* of the argument space,
240 // conveniently stored in the MachineFunctionInfo by
241 // LowerFormalArguments. This will, of course, be zero for the C calling
242 // convention.
243 ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
244 }
245
246 assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
247 && "refusing to adjust stack by misaligned amt");
248
249 // We may need to address callee-saved registers differently, so find out the
250 // bound on the frame indices.
251 const std::vector &CSI = MFI.getCalleeSavedInfo();
252 int MinCSFI = 0;
253 int MaxCSFI = -1;
254
255 if (CSI.size()) {
256 MinCSFI = CSI[0].getFrameIdx();
257 MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
258 }
259
260 // The "residual" stack update comes first from this direction and guarantees
261 // that SP is NumInitialBytes below its value on function entry, either by a
262 // direct update or restoring it from the frame pointer.
263 if (NumInitialBytes + ArgumentPopSize != 0) {
264 emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
265 NumInitialBytes + ArgumentPopSize);
266 --MBBI;
267 }
268
269
270 // MBBI now points to the instruction just past the last callee-saved
271 // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
272 // otherwise).
273
274 // Now we need to find out where to put the bulk of the stack adjustment
275 MachineBasicBlock::iterator FirstEpilogue = MBBI;
276 while (MBBI != MBB.begin()) {
277 --MBBI;
278
279 unsigned FrameOp;
280 for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
281 if (MBBI->getOperand(FrameOp).isFI())
282 break;
283 }
284
285 // If this instruction doesn't have a frame index we've reached the end of
286 // the callee-save restoration.
287 if (FrameOp == MBBI->getNumOperands())
288 break;
289
290 // Likewise if it *is* a local reference, but not to a callee-saved object.
291 int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
292 if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
293 break;
294
295 FirstEpilogue = MBBI;
296 }
297
298 if (MF.getFrameInfo()->hasVarSizedObjects()) {
299 int64_t StaticFrameBase;
300 StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
301 emitRegUpdate(MBB, FirstEpilogue, DL, TII,
302 AArch64::XSP, AArch64::X29, AArch64::NoRegister,
303 StaticFrameBase);
304 } else {
305 emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
306 }
307 }
308
309 int64_t
310 AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
311 int FrameIndex,
312 unsigned &FrameReg,
313 int SPAdj,
314 bool IsCalleeSaveOp) const {
315 AArch64MachineFunctionInfo *FuncInfo =
316 MF.getInfo();
317 MachineFrameInfo *MFI = MF.getFrameInfo();
318
319 int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
320
321 assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
322 && "callee-saved register in unexpected place");
323
324 // If the frame for this function is particularly large, we adjust the stack
325 // in two phases which means the callee-save related operations see a
326 // different (intermediate) stack size.
327 int64_t FrameRegPos;
328 if (IsCalleeSaveOp) {
329 FrameReg = AArch64::XSP;
330 FrameRegPos = -static_cast(FuncInfo->getInitialStackAdjust());
331 } else if (useFPForAddressing(MF)) {
332 // Have to use the frame pointer since we have no idea where SP is.
333 FrameReg = AArch64::X29;
334 FrameRegPos = FuncInfo->getFramePointerOffset();
335 } else {
336 FrameReg = AArch64::XSP;
337 FrameRegPos = -static_cast(MFI->getStackSize()) + SPAdj;
338 }
339
340 return TopOfFrameOffset - FrameRegPos;
341 }
342
343 void
344 AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
345 RegScavenger *RS) const {
346 const AArch64RegisterInfo *RegInfo =
347 static_cast(MF.getTarget().getRegisterInfo());
348 MachineFrameInfo *MFI = MF.getFrameInfo();
349 const AArch64InstrInfo &TII =
350 *static_cast(MF.getTarget().getInstrInfo());
351
352 if (hasFP(MF)) {
353 MF.getRegInfo().setPhysRegUsed(AArch64::X29);
354 MF.getRegInfo().setPhysRegUsed(AArch64::X30);
355 }
356
357 // If addressing of local variables is going to be more complicated than
358 // shoving a base register and an offset into the instruction then we may well
359 // need to scavenge registers. We should either specifically add an
360 // callee-save register for this purpose or allocate an extra spill slot.
361 bool BigStack =
362 MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
363 || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
364 || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
365
366 if (!BigStack)
367 return;
368
369 // We certainly need some slack space for the scavenger, preferably an extra
370 // register.
371 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
372 MCPhysReg ExtraReg = AArch64::NoRegister;
373
374 for (unsigned i = 0; CSRegs[i]; ++i) {
375 if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
376 !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
377 ExtraReg = CSRegs[i];
378 break;
379 }
380 }
381
382 if (ExtraReg != 0) {
383 MF.getRegInfo().setPhysRegUsed(ExtraReg);
384 } else {
385 assert(RS && "Expect register scavenger to be available");
386
387 // Create a stack slot for scavenging purposes. PrologEpilogInserter
388 // helpfully places it near either SP or FP for us to avoid
389 // infinitely-regression during scavenging.
390 const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
391 RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
392 RC->getAlignment(),
393 false));
394 }
395 }
396
397 bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
398 unsigned Reg) const {
399 // If @llvm.returnaddress is called then it will refer to X30 by some means;
400 // the prologue store does not kill the register.
401 if (Reg == AArch64::X30) {
402 if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
403 && MBB.getParent()->getRegInfo().isLiveIn(Reg))
404 return false;
405 }
406
407 // In all other cases, physical registers are dead after they've been saved
408 // but live at the beginning of the prologue block.
409 MBB.addLiveIn(Reg);
410 return true;
411 }
412
413 void
414 AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
415 MachineBasicBlock::iterator MBBI,
416 const std::vector &CSI,
417 const TargetRegisterInfo *TRI,
418 const LoadStoreMethod PossClasses[],
419 unsigned NumClasses) const {
420 DebugLoc DL = MBB.findDebugLoc(MBBI);
421 MachineFunction &MF = *MBB.getParent();
422 MachineFrameInfo &MFI = *MF.getFrameInfo();
423 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
424
425 // A certain amount of implicit contract is present here. The actual stack
426 // offsets haven't been allocated officially yet, so for strictly correct code
427 // we rely on the fact that the elements of CSI are allocated in order
428 // starting at SP, purely as dictated by size and alignment. In practice since
429 // this function handles the only accesses to those slots it's not quite so
430 // important.
431 //
432 // We have also ordered the Callee-saved register list in AArch64CallingConv
433 // so that the above scheme puts registers in order: in particular we want
434 // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
435 for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
436 unsigned Reg = CSI[i].getReg();
437
438 // First we need to find out which register class the register belongs to so
439 // that we can use the correct load/store instrucitons.
440 unsigned ClassIdx;
441 for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
442 if (PossClasses[ClassIdx].RegClass->contains(Reg))
443 break;
444 }
445 assert(ClassIdx != NumClasses
446 && "Asked to store register in unexpected class");
447 const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
448
449 // Now we need to decide whether it's possible to emit a paired instruction:
450 // for this we want the next register to be in the same class.
451 MachineInstrBuilder NewMI;
452 bool Pair = false;
453 if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
454 Pair = true;
455 unsigned StLow = 0, StHigh = 0;
456 if (isPrologue) {
457 // Most of these registers will be live-in to the MBB and killed by our
458 // store, though there are exceptions (see determinePrologueDeath).
459 StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
460 StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
461 } else {
462 StLow = RegState::Define;
463 StHigh = RegState::Define;
464 }
465
466 NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
467 .addReg(CSI[i+1].getReg(), StLow)
468 .addReg(CSI[i].getReg(), StHigh);
469
470 // If it's a paired op, we've consumed two registers
471 ++i;
472 } else {
473 unsigned State;
474 if (isPrologue) {
475 State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
476 } else {
477 State = RegState::Define;
478 }
479
480 NewMI = BuildMI(MBB, MBBI, DL,
481 TII.get(PossClasses[ClassIdx].SingleOpcode))
482 .addReg(CSI[i].getReg(), State);
483 }
484
485 // Note that the FrameIdx refers to the second register in a pair: it will
486 // be allocated the smaller numeric address and so is the one an LDP/STP
487 // address must use.
488 int FrameIdx = CSI[i].getFrameIdx();
489 MachineMemOperand::MemOperandFlags Flags;
490 Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
491 MachineMemOperand *MMO =
492 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
493 Flags,
494 Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
495 MFI.getObjectAlignment(FrameIdx));
496
497 NewMI.addFrameIndex(FrameIdx)
498 .addImm(0) // address-register offset
499 .addMemOperand(MMO);
500
501 if (isPrologue)
502 NewMI.setMIFlags(MachineInstr::FrameSetup);
503
504 // For aesthetic reasons, during an epilogue we want to emit complementary
505 // operations to the prologue, but in the opposite order. So we still
506 // iterate through the CalleeSavedInfo list in order, but we put the
507 // instructions successively earlier in the MBB.
508 if (!isPrologue)
509 --MBBI;
510 }
511 }
512
513 bool
514 AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
515 MachineBasicBlock::iterator MBBI,
516 const std::vector &CSI,
517 const TargetRegisterInfo *TRI) const {
518 if (CSI.empty())
519 return false;
520
521 static const LoadStoreMethod PossibleClasses[] = {
522 {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
523 {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
524 };
525 const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
526
527 emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
528 PossibleClasses, NumClasses);
529
530 return true;
531 }
532
533 bool
534 AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
535 MachineBasicBlock::iterator MBBI,
536 const std::vector &CSI,
537 const TargetRegisterInfo *TRI) const {
538
539 if (CSI.empty())
540 return false;
541
542 static const LoadStoreMethod PossibleClasses[] = {
543 {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
544 {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
545 };
546 const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
547
548 emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
549 PossibleClasses, NumClasses);
550
551 return true;
552 }
553
554 bool
555 AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
556 const MachineFrameInfo *MFI = MF.getFrameInfo();
557 const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
558
559 // This is a decision of ABI compliance. The AArch64 PCS gives various options
560 // for conformance, and even at the most stringent level more or less permits
561 // elimination for leaf functions because there's no loss of functionality
562 // (for debugging etc)..
563 if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
564 return true;
565
566 // The following are hard-limits: incorrect code will be generated if we try
567 // to omit the frame.
568 return (RI->needsStackRealignment(MF) ||
569 MFI->hasVarSizedObjects() ||
570 MFI->isFrameAddressTaken());
571 }
572
573 bool
574 AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
575 return MF.getFrameInfo()->hasVarSizedObjects();
576 }
577
578 bool
579 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
580 const MachineFrameInfo *MFI = MF.getFrameInfo();
581
582 // Of the various reasons for having a frame pointer, it's actually only
583 // variable-sized objects that prevent reservation of a call frame.
584 return !(hasFP(MF) && MFI->hasVarSizedObjects());
585 }
586
587 void
588 AArch64FrameLowering::eliminateCallFramePseudoInstr(
589 MachineFunction &MF,
590 MachineBasicBlock &MBB,
591 MachineBasicBlock::iterator MI) const {
592 const AArch64InstrInfo &TII =
593 *static_cast(MF.getTarget().getInstrInfo());
594 DebugLoc dl = MI->getDebugLoc();
595 int Opcode = MI->getOpcode();
596 bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
597 uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
598
599 if (!hasReservedCallFrame(MF)) {
600 unsigned Align = getStackAlignment();
601
602 int64_t Amount = MI->getOperand(0).getImm();
603 Amount = RoundUpToAlignment(Amount, Align);
604 if (!IsDestroy) Amount = -Amount;
605
606 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
607 // doesn't have to pop anything), then the first operand will be zero too so
608 // this adjustment is a no-op.
609 if (CalleePopAmount == 0) {
610 // FIXME: in-function stack adjustment for calls is limited to 12-bits
611 // because there's no guaranteed temporary register available. Mostly call
612 // frames will be allocated at the start of a function so this is OK, but
613 // it is a limitation that needs dealing with.
614 assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
615 emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
616 }
617 } else if (CalleePopAmount != 0) {
618 // If the calling convention demands that the callee pops arguments from the
619 // stack, we want to add it back if we have a reserved call frame.
620 assert(CalleePopAmount < 0xfff && "call frame too large");
621 emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
622 }
623
624 MBB.erase(MI);
625 }
+0
-108
lib/Target/AArch64/AArch64FrameLowering.h less more
None //==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class implements the AArch64-specific parts of the TargetFrameLowering
10 // class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_AARCH64_FRAMEINFO_H
15 #define LLVM_AARCH64_FRAMEINFO_H
16
17 #include "AArch64Subtarget.h"
18 #include "llvm/Target/TargetFrameLowering.h"
19
20 namespace llvm {
21 class AArch64Subtarget;
22
23 class AArch64FrameLowering : public TargetFrameLowering {
24 private:
25 // In order to unify the spilling and restoring of callee-saved registers into
26 // emitFrameMemOps, we need to be able to specify which instructions to use
27 // for the relevant memory operations on each register class. An array of the
28 // following struct is populated and passed in to achieve this.
29 struct LoadStoreMethod {
30 const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
31
32 // The preferred instruction.
33 unsigned PairOpcode; // E.g. LSPair64_STR
34
35 // Sometimes only a single register can be handled at once.
36 unsigned SingleOpcode; // E.g. LS64_STR
37 };
38 protected:
39 const AArch64Subtarget &STI;
40
41 public:
42 explicit AArch64FrameLowering(const AArch64Subtarget &sti)
43 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
44 STI(sti) {
45 }
46
47 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
48 /// the function.
49 void emitPrologue(MachineFunction &MF) const override;
50 void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
51
52 /// Decides how much stack adjustment to perform in each phase of the prologue
53 /// and epilogue.
54 void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
55 uint64_t &Residual) const;
56
57 int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
58 unsigned &FrameReg, int SPAdj,
59 bool IsCalleeSaveOp) const;
60
61 void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
62 RegScavenger *RS) const override;
63
64 bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
65 MachineBasicBlock::iterator MI,
66 const std::vector &CSI,
67 const TargetRegisterInfo *TRI) const override;
68 bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
69 MachineBasicBlock::iterator MI,
70 const std::vector &CSI,
71 const TargetRegisterInfo *TRI) const override;
72
73 void
74 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
75 MachineBasicBlock::iterator MI) const override;
76
77 /// If the register is X30 (i.e. LR) and the return address is used in the
78 /// function then the callee-save store doesn't actually kill the register,
79 /// otherwise it does.
80 bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
81
82 /// This function emits the loads or stores required during prologue and
83 /// epilogue as efficiently as possible.
84 ///
85 /// The operations involved in setting up and tearing down the frame are
86 /// similar enough to warrant a shared function, particularly as discrepancies
87 /// between the two would be disastrous.
88 void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
89 MachineBasicBlock::iterator MI,
90 const std::vector &CSI,
91 const TargetRegisterInfo *TRI,
92 const LoadStoreMethod PossibleClasses[],
93 unsigned NumClasses) const;
94
95
96 bool hasFP(const MachineFunction &MF) const override;
97
98 bool useFPForAddressing(const MachineFunction &MF) const;
99
100 /// On AA
101 bool hasReservedCallFrame(const MachineFunction &MF) const override;
102
103 };
104
105 } // End llvm namespace
106
107 #endif
+0
-1576
lib/Target/AArch64/AArch64ISelDAGToDAG.cpp less more
None //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64.h"
14 #include "AArch64InstrInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "AArch64TargetMachine.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/CodeGen/SelectionDAGISel.h"
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
23
24 using namespace llvm;
25
26 #define DEBUG_TYPE "aarch64-isel"
27
28 //===--------------------------------------------------------------------===//
29 /// AArch64 specific code to select AArch64 machine instructions for
30 /// SelectionDAG operations.
31 ///
32 namespace {
33
34 class AArch64DAGToDAGISel : public SelectionDAGISel {
35 AArch64TargetMachine &TM;
36
37 /// Keep a pointer to the AArch64Subtarget around so that we can
38 /// make the right decision when generating code for different targets.
39 const AArch64Subtarget *Subtarget;
40
41 public:
42 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
43 CodeGenOpt::Level OptLevel)
44 : SelectionDAGISel(tm, OptLevel), TM(tm),
45 Subtarget(&TM.getSubtarget()) {
46 }
47
48 const char *getPassName() const override {
49 return "AArch64 Instruction Selection";
50 }
51
52 // Include the pieces autogenerated from the target description.
53 #include "AArch64GenDAGISel.inc"
54
55 template
56 bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
57 const ConstantSDNode *CN = dyn_cast(N);
58 if (!CN || CN->getZExtValue() % MemSize != 0
59 || CN->getZExtValue() / MemSize > 0xfff)
60 return false;
61
62 UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
63 return true;
64 }
65
66 template
67 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
68 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
69 }
70
71 /// Used for pre-lowered address-reference nodes, so we already know
72 /// the fields match. This operand's job is simply to add an
73 /// appropriate shift operand to the MOVZ/MOVK instruction.
74 template
75 bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
76 Imm = N;
77 Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
78 return true;
79 }
80
81 bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
82
83 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
84 unsigned RegWidth);
85
86 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
87 char ConstraintCode,
88 std::vector &OutOps) override;
89
90 bool SelectLogicalImm(SDValue N, SDValue &Imm);
91
92 template
93 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
94 return SelectTSTBOperand(N, FixedPos, RegWidth);
95 }
96
97 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
98
99 SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
100 unsigned Op64);
101
102 /// Put the given constant into a pool and return a DAG which will give its
103 /// address.
104 SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
105
106 SDNode *TrySelectToMoveImm(SDNode *N);
107 SDNode *LowerToFPLitPool(SDNode *Node);
108 SDNode *SelectToLitPool(SDNode *N);
109
110 SDNode* Select(SDNode*) override;
111 private:
112 /// Get the opcode for table lookup instruction
113 unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
114
115 /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
116 /// IsExt is to indicate if the result will be extended with an argument.
117 SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
118
119 /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
120 SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
121 const uint16_t *Opcode);
122
123 /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
124 SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
125 const uint16_t *Opcodes);
126
127 /// Form sequences of consecutive 64/128-bit registers for use in NEON
128 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
129 /// between 1 and 4 elements. If it contains a single element that is returned
130 /// unchanged; otherwise a REG_SEQUENCE value is returned.
131 SDValue createDTuple(ArrayRef Vecs);
132 SDValue createQTuple(ArrayRef Vecs);
133
134 /// Generic helper for the createDTuple/createQTuple
135 /// functions. Those should almost always be called instead.
136 SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[],
137 unsigned SubRegs[]);
138
139 /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
140 /// The opcode array specifies the instructions used for load.
141 SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
142 const uint16_t *Opcodes);
143
144 /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
145 /// The opcode arrays specify the instructions used for load/store.
146 SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
147 unsigned NumVecs, const uint16_t *Opcodes);
148
149 SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
150 SDValue Operand);
151 };
152 }
153
154 bool
155 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
156 unsigned RegWidth) {
157 const ConstantFPSDNode *CN = dyn_cast(N);
158 if (!CN) return false;
159
160 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
161 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
162 // x-register.
163 //
164 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
165 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
166 // integers.
167 bool IsExact;
168
169 // fbits is between 1 and 64 in the worst-case, which means the fmul
170 // could have 2^64 as an actual operand. Need 65 bits of precision.
171 APSInt IntVal(65, true);
172 CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
173
174 // N.b. isPowerOf2 also checks for > 0.
175 if (!IsExact || !IntVal.isPowerOf2()) return false;
176 unsigned FBits = IntVal.logBase2();
177
178 // Checks above should have guaranteed that we haven't lost information in
179 // finding FBits, but it must still be in range.
180 if (FBits == 0 || FBits > RegWidth) return false;
181
182 FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
183 return true;
184 }
185
186 bool
187 AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
188 char ConstraintCode,
189 std::vector &OutOps) {
190 switch (ConstraintCode) {
191 default: llvm_unreachable("Unrecognised AArch64 memory constraint");
192 case 'm':
193 // FIXME: more freedom is actually permitted for 'm'. We can go
194 // hunting for a base and an offset if we want. Of course, since
195 // we don't really know how the operand is going to be used we're
196 // probably restricted to the load/store pair's simm7 as an offset
197 // range anyway.
198 case 'Q':
199 OutOps.push_back(Op);
200 }
201
202 return false;
203 }
204
205 bool
206 AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
207 ConstantFPSDNode *Imm = dyn_cast(N);
208 if (!Imm || !Imm->getValueAPF().isPosZero())
209 return false;
210
211 // Doesn't actually carry any information, but keeps TableGen quiet.
212 Dummy = CurDAG->getTargetConstant(0, MVT::i32);
213 return true;
214 }
215
216 bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
217 uint32_t Bits;
218 uint32_t RegWidth = N.getValueType().getSizeInBits();
219
220 ConstantSDNode *CN = dyn_cast(N);
221 if (!CN) return false;
222
223 if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
224 return false;
225
226 Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
227 return true;
228 }
229
230 SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
231 SDNode *ResNode;
232 SDLoc dl(Node);
233 EVT DestType = Node->getValueType(0);
234 unsigned DestWidth = DestType.getSizeInBits();
235
236 unsigned MOVOpcode;
237 EVT MOVType;
238 int UImm16, Shift;
239 uint32_t LogicalBits;
240
241 uint64_t BitPat = cast(Node)->getZExtValue();
242 if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
243 MOVType = DestType;
244 MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
245 } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
246 MOVType = DestType;
247 MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
248 } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
249 // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
250 // use a 32-bit instruction: "movn w0, 0xedbc".
251 MOVType = MVT::i32;
252 MOVOpcode = AArch64::MOVNwii;
253 } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
254 MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
255 uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
256
257 return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
258 CurDAG->getRegister(ZR, DestType),
259 CurDAG->getTargetConstant(LogicalBits, MVT::i32));
260 } else {
261 // Can't handle it in one instruction. There's scope for permitting two (or
262 // more) instructions, but that'll need more thought.
263 return nullptr;
264 }
265
266 ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
267 CurDAG->getTargetConstant(UImm16, MVT::i32),
268 CurDAG->getTargetConstant(Shift, MVT::i32));
269
270 if (MOVType != DestType) {
271 ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
272 MVT::i64, MVT::i32, MVT::Other,
273 CurDAG->getTargetConstant(0, MVT::i64),
274 SDValue(ResNode, 0),
275 CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
276 }
277
278 return ResNode;
279 }
280
281 SDValue
282 AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
283 const Constant *CV) {
284 EVT PtrVT = getTargetLowering()->getPointerTy();
285
286 switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
287 case CodeModel::Small: {
288 unsigned Alignment =
289 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
290 return CurDAG->getNode(
291 AArch64ISD::WrapperSmall, DL, PtrVT,
292 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
293 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
294 CurDAG->getConstant(Alignment, MVT::i32));
295 }
296 case CodeModel::Large: {
297 SDNode *LitAddr;
298 LitAddr = CurDAG->getMachineNode(
299 AArch64::MOVZxii, DL, PtrVT,
300 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
301 CurDAG->getTargetConstant(3, MVT::i32));
302 LitAddr = CurDAG->getMachineNode(
303 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
304 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
305 CurDAG->getTargetConstant(2, MVT::i32));
306 LitAddr = CurDAG->getMachineNode(
307 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
308 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
309 CurDAG->getTargetConstant(1, MVT::i32));
310 LitAddr = CurDAG->getMachineNode(
311 AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
312 CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
313 CurDAG->getTargetConstant(0, MVT::i32));
314 return SDValue(LitAddr, 0);
315 }
316 default:
317 llvm_unreachable("Only small and large code models supported now");
318 }
319 }
320
321 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
322 SDLoc DL(Node);
323 uint64_t UnsignedVal = cast(Node)->getZExtValue();
324 int64_t SignedVal = cast(Node)->getSExtValue();
325 EVT DestType = Node->getValueType(0);
326
327 // Since we may end up loading a 64-bit constant from a 32-bit entry the
328 // constant in the pool may have a different type to the eventual node.
329 ISD::LoadExtType Extension;
330 EVT MemType;
331
332 assert((DestType == MVT::i64 || DestType == MVT::i32)
333 && "Only expect integer constants at the moment");
334
335 if (DestType == MVT::i32) {
336 Extension = ISD::NON_EXTLOAD;
337 MemType = MVT::i32;
338 } else if (UnsignedVal <= UINT32_MAX) {
339 Extension = ISD::ZEXTLOAD;
340 MemType = MVT::i32;
341 } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
342 Extension = ISD::SEXTLOAD;
343 MemType = MVT::i32;
344 } else {
345 Extension = ISD::NON_EXTLOAD;
346 MemType = MVT::i64;
347 }
348
349 Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
350 MemType.getSizeInBits()),
351 UnsignedVal);
352 SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
353 unsigned Alignment =
354 getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
355
356 return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
357 PoolAddr,
358 MachinePointerInfo::getConstantPool(), MemType,
359 /* isVolatile = */ false,
360 /* isNonTemporal = */ false,
361 Alignment).getNode();
362 }
363
364 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
365 SDLoc DL(Node);
366 const ConstantFP *FV = cast(Node)->getConstantFPValue();
367 EVT DestType = Node->getValueType(0);
368
369 unsigned Alignment =
370 getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
371 SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
372
373 return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
374 MachinePointerInfo::getConstantPool(),
375 /* isVolatile = */ false,
376 /* isNonTemporal = */ false,
377 /* isInvariant = */ true,
378 Alignment).getNode();
379 }
380
381 bool
382 AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
383 unsigned RegWidth) {
384 const ConstantSDNode *CN = dyn_cast(N);
385 if (!CN) return false;
386
387 uint64_t Val = CN->getZExtValue();
388
389 if (!isPowerOf2_64(Val)) return false;
390
391 unsigned TestedBit = Log2_64(Val);
392 // Checks above should have guaranteed that we haven't lost information in
393 // finding TestedBit, but it must still be in range.
394 if (TestedBit >= RegWidth) return false;
395
396 FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
397 return true;
398 }
399
400 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
401 unsigned Op16,unsigned Op32,
402 unsigned Op64) {
403 // Mostly direct translation to the given operations, except that we preserve
404 // the AtomicOrdering for use later on.
405 AtomicSDNode *AN = cast(Node);
406 EVT VT = AN->getMemoryVT();
407
408 unsigned Op;
409 if (VT == MVT::i8)
410 Op = Op8;
411 else if (VT == MVT::i16)
412 Op = Op16;
413 else if (VT == MVT::i32)
414 Op = Op32;
415 else if (VT == MVT::i64)
416 Op = Op64;
417 else
418 llvm_unreachable("Unexpected atomic operation");
419
420 SmallVector Ops;
421 for (unsigned i = 1; i < AN->getNumOperands(); ++i)
422 Ops.push_back(AN->getOperand(i));
423
424 Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
425 Ops.push_back(AN->getOperand(0)); // Chain moves to the end
426
427 return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, Ops);
428 }
429
430 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) {
431 static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
432 AArch64::DTripleRegClassID,
433 AArch64::DQuadRegClassID };
434 static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
435 AArch64::dsub_2, AArch64::dsub_3 };
436
437 return createTuple(Regs, RegClassIDs, SubRegs);
438 }
439
440 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) {
441 static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
442 AArch64::QTripleRegClassID,
443 AArch64::QQuadRegClassID };
444 static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
445 AArch64::qsub_2, AArch64::qsub_3 };
446
447 return createTuple(Regs, RegClassIDs, SubRegs);
448 }
449
450 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs,
451 unsigned RegClassIDs[],
452 unsigned SubRegs[]) {
453 // There's no special register-class for a vector-list of 1 element: it's just
454 // a vector.
455 if (Regs.size() == 1)
456 return Regs[0];
457
458 assert(Regs.size() >= 2 && Regs.size() <= 4);
459
460 SDLoc DL(Regs[0].getNode());
461
462 SmallVector Ops;
463
464 // First operand of REG_SEQUENCE is the desired RegClass.
465 Ops.push_back(
466 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
467
468 // Then we get pairs of source & subregister-position for the components.
469 for (unsigned i = 0; i < Regs.size(); ++i) {
470 Ops.push_back(Regs[i]);
471 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
472 }
473
474 SDNode *N =
475 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
476 return SDValue(N, 0);
477 }
478
479
480 // Get the register stride update opcode of a VLD/VST instruction that
481 // is otherwise equivalent to the given fixed stride updating instruction.
482 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
483 switch (Opc) {
484 default: break;
485 case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
486 case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
487 case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
488 case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
489 case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
490 case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
491 case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
492 case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
493
494 case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
495 case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
496 case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
497 case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
498 case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
499 case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
500 case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
501
502 case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
503 case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
504 case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
505 case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
506 case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
507 case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
508 case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
509
510 case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
511 case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
512 case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
513 case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
514 case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
515 case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
516 case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
517
518 case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
519 case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
520 case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
521 case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
522 case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
523 case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
524 case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
525 case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
526
527 case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
528 case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
529 case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
530 case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
531 case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
532 case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
533 case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
534 case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
535
536 case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
537 case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
538 case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
539 case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
540 case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
541 case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
542 case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
543 case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
544
545 case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
546 case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
547 case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
548 case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
549 case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
550 case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
551 case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
552 case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
553
554 case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
555 case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
556 case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
557 case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
558 case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
559 case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
560 case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
561
562 case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
563 case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
564 case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
565 case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
566 case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
567 case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
568 case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
569
570 case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
571 case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
572 case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
573 case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
574 case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
575 case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
576 case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
577
578 case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
579 case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
580 case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
581 case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
582 case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
583 case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
584 case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
585 case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
586
587 case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
588 case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
589 case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
590 case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
591 case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
592 case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
593 case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
594 case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
595
596 case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
597 case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
598 case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
599 case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
600 case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
601 case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
602 case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
603 case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
604
605 // Post-index of duplicate loads
606 case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
607 case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
608 case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
609 case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
610 case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
611 case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
612 case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
613 case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
614
615 case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
616 case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
617 case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
618 case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
619 case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
620 case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
621 case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
622 case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
623
624 case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
625 case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
626 case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
627 case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
628 case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
629 case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
630 case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
631 case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
632
633 // Post-index of lane loads
634 case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
635 case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
636 case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
637 case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
638
639 case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
640 case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
641 case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
642 case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
643
644 case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
645 case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
646 case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
647 case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
648
649 // Post-index of lane stores
650 case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
651 case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
652 case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
653 case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
654
655 case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
656 case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
657 case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
658 case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
659
660 case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
661 case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
662 case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
663 case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
664 }
665 return Opc; // If not one we handle, return it unchanged.
666 }
667
668 SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
669 unsigned NumVecs,
670 const uint16_t *Opcodes) {
671 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
672
673 EVT VT = N->getValueType(0);
674 unsigned OpcodeIndex;
675 bool is64BitVector = VT.is64BitVector();
676 switch (VT.getScalarType().getSizeInBits()) {
677 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
678 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
679 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
680 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
681 default: llvm_unreachable("unhandled vector load type");
682 }
683 unsigned Opc = Opcodes[OpcodeIndex];
684
685 SmallVector Ops;
686 unsigned AddrOpIdx = isUpdating ? 1 : 2;
687 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
688
689 if (isUpdating) {
690 SDValue Inc = N->getOperand(AddrOpIdx + 1);
691 if (!isa(Inc.getNode())) // Increment in Register
692 Opc = getVLDSTRegisterUpdateOpcode(Opc);
693 Ops.push_back(Inc);
694 }
695
696 Ops.push_back(N->getOperand(0)); // Push back the Chain
697
698 SmallVector ResTys;
699 // Push back the type of return super register
700 if (NumVecs == 1)
701 ResTys.push_back(VT);
702 else if (NumVecs == 3)
703 ResTys.push_back(MVT::Untyped);
704 else {
705 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
706 is64BitVector ? NumVecs : NumVecs * 2);
707 ResTys.push_back(ResTy);
708 }
709
710 if (isUpdating)
711 ResTys.push_back(MVT::i64); // Type of the updated register
712 ResTys.push_back(MVT::Other); // Type of the Chain
713 SDLoc dl(N);
714 SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
715
716 // Transfer memoperands.
717 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
718 MemOp[0] = cast(N)->getMemOperand();
719 cast(VLd)->setMemRefs(MemOp, MemOp + 1);
720
721 if (NumVecs == 1)
722 return VLd;
723
724 // If NumVecs > 1, the return result is a super register containing 2-4
725 // consecutive vector registers.
726 SDValue SuperReg = SDValue(VLd, 0);
727
728 unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
729 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
730 ReplaceUses(SDValue(N, Vec),
731 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
732 // Update users of the Chain
733 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
734 if (isUpdating)
735 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
736
737 return nullptr;
738 }
739
740 SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
741 unsigned NumVecs,
742 const uint16_t *Opcodes) {
743 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
744 SDLoc dl(N);
745
746 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
747 MemOp[0] = cast(N)->getMemOperand();
748
749 unsigned AddrOpIdx = isUpdating ? 1 : 2;
750 unsigned Vec0Idx = 3;
751 EVT VT = N->getOperand(Vec0Idx).getValueType();
752 unsigned OpcodeIndex;
753 bool is64BitVector = VT.is64BitVector();
754 switch (VT.getScalarType().getSizeInBits()) {
755 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
756 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
757 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
758 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
759 default: llvm_unreachable("unhandled vector store type");
760 }
761 unsigned Opc = Opcodes[OpcodeIndex];
762
763 SmallVector ResTys;
764 if (isUpdating)
765 ResTys.push_back(MVT::i64);
766 ResTys.push_back(MVT::Other); // Type for the Chain
767
768 SmallVector Ops;
769 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
770
771 if (isUpdating) {
772 SDValue Inc = N->getOperand(AddrOpIdx + 1);
773 if (!isa(Inc.getNode())) // Increment in Register
774 Opc = getVLDSTRegisterUpdateOpcode(Opc);
775 Ops.push_back(Inc);
776 }
777
778 SmallVector Regs(N->op_begin() + Vec0Idx,
779 N->op_begin() + Vec0Idx + NumVecs);
780 SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
781 Ops.push_back(SrcReg);
782
783 // Push back the Chain
784 Ops.push_back(N->getOperand(0));
785
786 // Transfer memoperands.
787 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
788 cast(VSt)->setMemRefs(MemOp, MemOp + 1);
789
790 return VSt;
791 }
792
793 SDValue
794 AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
795 SDValue Operand) {
796 SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
797 VT, VTD, MVT::Other,
798 CurDAG->getTargetConstant(0, MVT::i64),
799 Operand,
800 CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
801 return SDValue(Reg, 0);
802 }
803
804 SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
805 unsigned NumVecs,
806 const uint16_t *Opcodes) {
807 assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
808 SDLoc dl(N);
809
810 EVT VT = N->getValueType(0);
811 unsigned OpcodeIndex;
812 bool is64BitVector = VT.is64BitVector();
813 switch (VT.getScalarType().getSizeInBits()) {
814 case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
815 case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
816 case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
817 case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
818 default: llvm_unreachable("unhandled vector duplicate lane load type");
819 }
820 unsigned Opc = Opcodes[OpcodeIndex];
821
822 SDValue SuperReg;
823 SmallVector Ops;
824 Ops.push_back(N->getOperand(1)); // Push back the Memory Address
825 if (isUpdating) {
826 SDValue Inc = N->getOperand(2);
827 if (!isa(Inc.getNode())) // Increment in Register
828 Opc = getVLDSTRegisterUpdateOpcode(Opc);
829 Ops.push_back(Inc);
830 }
831 Ops.push_back(N->getOperand(0)); // Push back the Chain
832
833 SmallVector ResTys;
834 // Push back the type of return super register
835 if (NumVecs == 3)
836 ResTys.push_back(MVT::Untyped);
837 else {
838 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
839 is64BitVector ? NumVecs : NumVecs * 2);
840 ResTys.push_back(ResTy);
841 }
842 if (isUpdating)
843 ResTys.push_back(MVT::i64); // Type of the updated register
844 ResTys.push_back(MVT::Other); // Type of the Chain
845 SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
846
847 // Transfer memoperands.
848 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
849 MemOp[0] = cast(N)->getMemOperand();
850 cast(VLdDup)->setMemRefs(MemOp, MemOp + 1);
851
852 SuperReg = SDValue(VLdDup, 0);
853 unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
854 // Update uses of each registers in super register
855 for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
856 ReplaceUses(SDValue(N, Vec),
857 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
858 // Update uses of the Chain
859 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
860 if (isUpdating)
861 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
862 return nullptr;
863 }
864
865 // We only have 128-bit vector type of load/store lane instructions.
866 // If it is 64-bit vector, we also select it to the 128-bit instructions.
867 // Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
868 // EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
869 SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
870 bool isUpdating, unsigned NumVecs,
871 const uint16_t *Opcodes) {
872 assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
873 SDLoc dl(N);
874 unsigned AddrOpIdx = isUpdating ? 1 : 2;
875 unsigned Vec0Idx = 3;
876
877 SDValue Chain = N->getOperand(0);
878 unsigned Lane =
879 cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
880 EVT VT = N->getOperand(Vec0Idx).getValueType();
881 bool is64BitVector = VT.is64BitVector();
882 EVT VT64; // 64-bit Vector Type
883
884 if (is64BitVector) {
885 VT64 = VT;
886 VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
887 VT.getVectorNumElements() * 2);
888 }
889
890 unsigned OpcodeIndex;
891 switch (VT.getScalarType().getSizeInBits()) {
892 case 8: OpcodeIndex = 0; break;
893 case 16: OpcodeIndex = 1; break;
894 case 32: OpcodeIndex = 2; break;
895 case 64: OpcodeIndex = 3; break;
896 default: llvm_unreachable("unhandled vector lane load/store type");
897 }
898 unsigned Opc = Opcodes[OpcodeIndex];
899
900 SmallVector ResTys;
901 if (IsLoad) {
902 // Push back the type of return super register
903 if (NumVecs == 3)
904 ResTys.push_back(MVT::Untyped);
905 else {
906 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
907 is64BitVector ? NumVecs : NumVecs * 2);
908 ResTys.push_back(ResTy);
909 }
910 }
911 if (isUpdating)
912 ResTys.push_back(MVT::i64); // Type of the updated register
913 ResTys.push_back(MVT::Other); // Type of Chain
914 SmallVector Ops;
915 Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
916 if (isUpdating) {
917 SDValue Inc = N->getOperand(AddrOpIdx + 1);
918 if (!isa(Inc.getNode())) // Increment in Register
919 Opc = getVLDSTRegisterUpdateOpcode(Opc);
920 Ops.push_back(Inc);
921 }
922
923 SmallVector Regs(N->op_begin() + Vec0Idx,
924 N->op_begin() + Vec0Idx + NumVecs);
925 if (is64BitVector)
926 for (unsigned i = 0; i < Regs.size(); i++)
927 Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
928 SDValue SuperReg = createQTuple(Regs);
929
930 Ops.push_back(SuperReg); // Source Reg
931 SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
932 Ops.push_back(LaneValue);
933 Ops.push_back(Chain); // Push back the Chain
934
935 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
936 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
937 MemOp[0] = cast(N)->getMemOperand();
938 cast(VLdLn)->setMemRefs(MemOp, MemOp + 1);
939 if (!IsLoad)
940 return VLdLn;
941
942 // Extract the subregisters.
943 SuperReg = SDValue(VLdLn, 0);
944 unsigned Sub0 = AArch64::qsub_0;
945 // Update uses of each registers in super register
946 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
947 SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
948 if (is64BitVector) {
949 SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
950 }
951 ReplaceUses(SDValue(N, Vec), SUB0);
952 }
953 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
954 if (isUpdating)
955 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
956 return nullptr;
957 }
958
959 unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
960 unsigned NumOfVec) {
961 assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
962
963 unsigned Opc = 0;
964 switch (NumOfVec) {
965 default:
966 break;
967 case 1:
968 if (IsExt)
969 Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
970 else
971 Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
972 break;
973 case 2:
974 if (IsExt)
975 Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
976 else
977 Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
978 break;
979 case 3:
980 if (IsExt)
981 Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
982 else
983 Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
984 break;
985 case 4:
986 if (IsExt)
987 Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
988 else
989 Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
990 break;
991 }
992
993 return Opc;
994 }
995
996 SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
997 bool IsExt) {
998 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
999 SDLoc dl(N);
1000
1001 // Check the element of look up table is 64-bit or not
1002 unsigned Vec0Idx = IsExt ? 2 : 1;
1003 assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
1004 "The element of lookup table for vtbl and vtbx must be 128-bit");
1005
1006 // Check the return value type is 64-bit or not
1007 EVT ResVT = N->getValueType(0);
1008 bool is64BitRes = ResVT.is64BitVector();
1009
1010 // Create new SDValue for vector list
1011 SmallVector Regs(N->op_begin() + Vec0Idx,
1012 N->op_begin() + Vec0Idx + NumVecs);
1013 SDValue TblReg = createQTuple(Regs);
1014 unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
1015
1016 SmallVector Ops;
1017 if (IsExt)
1018