llvm.org GIT mirror llvm / 7b837d8
ARM64: initial backend import This adds a second implementation of the AArch64 architecture to LLVM, accessible in parallel via the "arm64" triple. The plan over the coming weeks & months is to merge the two into a single backend, during which time thorough code review should naturally occur. Everything will be easier with the target in-tree though, hence this commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205090 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
394 changed file(s) with 105888 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
127127
128128 set(LLVM_ALL_TARGETS
129129 AArch64
130 ARM64
130131 ARM
131132 CppBackend
132133 Hexagon
142143 )
143144
144145 # List of targets with JIT support:
145 set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
146 set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM64 ARM Mips SystemZ)
146147
147148 set(LLVM_TARGETS_TO_BUILD "all"
148149 CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
418418 amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
419419 sparc*-*) llvm_cv_target_arch="Sparc" ;;
420420 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
421 arm64*-*) llvm_cv_target_arch="ARM64" ;;
421422 arm*-*) llvm_cv_target_arch="ARM" ;;
422423 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
423424 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
453454 amd64-* | x86_64-*) host_arch="x86_64" ;;
454455 sparc*-*) host_arch="Sparc" ;;
455456 powerpc*-*) host_arch="PowerPC" ;;
457 arm64*-*) host_arch="ARM64" ;;
456458 arm*-*) host_arch="ARM" ;;
457459 aarch64*-*) host_arch="AArch64" ;;
458460 mips-* | mips64-*) host_arch="Mips" ;;
794796 esac
795797 fi
796798
797 TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
799 TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
798800 AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
799801
800802 dnl Allow enablement of building and installing docs
947949 fi
948950
949951 dnl List all possible targets
950 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
952 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
951953 AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
952954
953955 dnl Allow specific targets to be specified for building (or not)
954956 TARGETS_TO_BUILD=""
955957 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
956958 [Build specific host targets: all or target1,target2,... Valid targets are:
957 host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
959 host, x86, x86_64, sparc, powerpc, arm64, arm, aarch64, mips, hexagon,
958960 xcore, msp430, nvptx, systemz, r600, and cpp (default=all)]),,
959961 enableval=all)
960962 if test "$enableval" = host-only ; then
969971 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
970972 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
971973 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
974 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
972975 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
973976 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
974977 mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
365365 set(LLVM_NATIVE_ARCH PowerPC)
366366 elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
367367 set(LLVM_NATIVE_ARCH AArch64)
368 elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
369 set(LLVM_NATIVE_ARCH ARM64)
368370 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
369371 set(LLVM_NATIVE_ARCH ARM)
370372 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
14461446 Enable crash handling overrides (default is YES)
14471447 --enable-targets Build specific host targets: all or
14481448 target1,target2,... Valid targets are: host, x86,
1449 x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
1450 xcore, msp430, nvptx, systemz, r600, and cpp
1451 (default=all)
1449 x86_64, sparc, powerpc, arm64, arm, aarch64, mips,
1450 hexagon, xcore, msp430, nvptx, systemz, r600, and
1451 cpp (default=all)
14521452 --enable-experimental-targets
14531453 Build experimental host targets: disable or
14541454 target1,target2,... (default=disable)
41504150 amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
41514151 sparc*-*) llvm_cv_target_arch="Sparc" ;;
41524152 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
4153 arm64*-*) llvm_cv_target_arch="ARM64" ;;
41534154 arm*-*) llvm_cv_target_arch="ARM" ;;
41544155 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
41554156 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
41864187 amd64-* | x86_64-*) host_arch="x86_64" ;;
41874188 sparc*-*) host_arch="Sparc" ;;
41884189 powerpc*-*) host_arch="PowerPC" ;;
4190 arm64*-*) host_arch="ARM64" ;;
41894191 arm*-*) host_arch="ARM" ;;
41904192 aarch64*-*) host_arch="AArch64" ;;
41914193 mips-* | mips64-*) host_arch="Mips" ;;
51195121 esac
51205122 fi
51215123
5122 TARGETS_WITH_JIT="AArch64 ARM Mips PowerPC SystemZ X86"
5124 TARGETS_WITH_JIT="AArch64 ARM ARM64 Mips PowerPC SystemZ X86"
51235125 TARGETS_WITH_JIT=$TARGETS_WITH_JIT
51245126
51255127
53565358
53575359 fi
53585360
5359 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
5361 ALL_TARGETS="X86 Sparc PowerPC AArch64 ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
53605362 ALL_TARGETS=$ALL_TARGETS
53615363
53625364
53805382 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
53815383 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
53825384 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
5385 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
53835386 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
53845387 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
53855388 mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
4747
4848 arm, // ARM (little endian): arm, armv.*, xscale
4949 armeb, // ARM (big endian): armeb
50 arm64, // ARM: arm64
5051 aarch64, // AArch64 (little endian): aarch64
5152 aarch64_be, // AArch64 (big endian): aarch64_be
5253 hexagon, // Hexagon: hexagon
528528 include "llvm/IR/IntrinsicsPowerPC.td"
529529 include "llvm/IR/IntrinsicsX86.td"
530530 include "llvm/IR/IntrinsicsARM.td"
531 include "llvm/IR/IntrinsicsARM64.td"
531532 include "llvm/IR/IntrinsicsAArch64.td"
532533 include "llvm/IR/IntrinsicsXCore.td"
533534 include "llvm/IR/IntrinsicsHexagon.td"
0 //===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the ARM64-specific intrinsics.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "arm64" in {
14
15 def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
16 def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
17 def int_arm64_clrex : Intrinsic<[]>;
18
19 def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
20 def int_arm64_stxp : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty,
21 llvm_ptr_ty]>;
22
23 def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
24 LLVMMatchType<0>], [IntrNoMem]>;
25 def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
26 LLVMMatchType<0>], [IntrNoMem]>;
27 }
28
29 //===----------------------------------------------------------------------===//
30 // Advanced SIMD (NEON)
31
32 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
33 class AdvSIMD_2Scalar_Float_Intrinsic
34 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
35 [IntrNoMem]>;
36
37 class AdvSIMD_FPToIntRounding_Intrinsic
38 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
39
40 class AdvSIMD_1IntArg_Intrinsic
41 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
42 class AdvSIMD_1FloatArg_Intrinsic
43 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
44 class AdvSIMD_1VectorArg_Intrinsic
45 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
46 class AdvSIMD_1VectorArg_Expand_Intrinsic
47 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
48 class AdvSIMD_1VectorArg_Long_Intrinsic
49 : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
50 class AdvSIMD_1IntArg_Narrow_Intrinsic
51 : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
52 class AdvSIMD_1VectorArg_Narrow_Intrinsic
53 : Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
54 class AdvSIMD_1VectorArg_Int_Across_Intrinsic
55 : Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
56 class AdvSIMD_1VectorArg_Float_Across_Intrinsic
57 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
58
59 class AdvSIMD_2IntArg_Intrinsic
60 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
61 [IntrNoMem]>;
62 class AdvSIMD_2FloatArg_Intrinsic
63 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
64 [IntrNoMem]>;
65 class AdvSIMD_2VectorArg_Intrinsic
66 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
67 [IntrNoMem]>;
68 class AdvSIMD_2VectorArg_Compare_Intrinsic
69 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
70 [IntrNoMem]>;
71 class AdvSIMD_2Arg_FloatCompare_Intrinsic
72 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
73 [IntrNoMem]>;
74 class AdvSIMD_2VectorArg_Long_Intrinsic
75 : Intrinsic<[llvm_anyvector_ty],
76 [LLVMTruncatedType<0>,
77 LLVMTruncatedType<0>],
78 [IntrNoMem]>;
79 class AdvSIMD_2VectorArg_Wide_Intrinsic
80 : Intrinsic<[llvm_anyvector_ty],
81 [LLVMMatchType<0>, LLVMTruncatedType<0>],
82 [IntrNoMem]>;
83 class AdvSIMD_2VectorArg_Narrow_Intrinsic
84 : Intrinsic<[llvm_anyvector_ty],
85 [LLVMExtendedType<0>, LLVMExtendedType<0>],
86 [IntrNoMem]>;
87 class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
88 : Intrinsic<[llvm_anyint_ty],
89 [LLVMExtendedType<0>, llvm_i32_ty],
90 [IntrNoMem]>;
91 class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
92 : Intrinsic<[llvm_anyvector_ty],
93 [llvm_anyvector_ty],
94 [IntrNoMem]>;
95 class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
96 : Intrinsic<[llvm_anyvector_ty],
97 [LLVMTruncatedType<0>],
98 [IntrNoMem]>;
99 class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
100 : Intrinsic<[llvm_anyvector_ty],
101 [LLVMTruncatedType<0>, llvm_i32_ty],
102 [IntrNoMem]>;
103 class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
104 : Intrinsic<[llvm_anyvector_ty],
105 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
106 [IntrNoMem]>;
107
108 class AdvSIMD_3VectorArg_Intrinsic
109 : Intrinsic<[llvm_anyvector_ty],
110 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
111 [IntrNoMem]>;
112 class AdvSIMD_3VectorArg_Scalar_Intrinsic
113 : Intrinsic<[llvm_anyvector_ty],
114 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
115 [IntrNoMem]>;
116 class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
117 : Intrinsic<[llvm_anyvector_ty],
118 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
119 LLVMMatchType<1>], [IntrNoMem]>;
120 class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
121 : Intrinsic<[llvm_anyvector_ty],
122 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
123 [IntrNoMem]>;
124 class AdvSIMD_CvtFxToFP_Intrinsic
125 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
126 [IntrNoMem]>;
127 class AdvSIMD_CvtFPToFx_Intrinsic
128 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
129 [IntrNoMem]>;
130 }
131
132 // Arithmetic ops
133
134 let Properties = [IntrNoMem] in {
135 // Vector Add Across Lanes
136 def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
137 def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
138 def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
139
140 // Vector Long Add Across Lanes
141 def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
142 def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
143
144 // Vector Halving Add
145 def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
146 def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
147
148 // Vector Rounding Halving Add
149 def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
150 def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
151
152 // Vector Saturating Add
153 def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
154 def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
155 def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
156 def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
157
158 // Vector Add High-Half
159 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
160 // header is no longer supported.
161 def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
162
163 // Vector Rounding Add High-Half
164 def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
165
166 // Vector Saturating Doubling Multiply High
167 def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
168
169 // Vector Saturating Rounding Doubling Multiply High
170 def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
171
172 // Vector Polynominal Multiply
173 def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
174
175 // Vector Long Multiply
176 def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
177 def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
178 def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
179
180 // Vector Extending Multiply
181 def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic;
182
183 // Vector Saturating Doubling Long Multiply
184 def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
185 def int_arm64_neon_sqdmulls_scalar
186 : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
187
188 // Vector Halving Subtract
189 def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
190 def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
191
192 // Vector Saturating Subtract
193 def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
194 def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
195
196 // Vector Subtract High-Half
197 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
198 // header is no longer supported.
199 def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
200
201 // Vector Rounding Subtract High-Half
202 def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
203
204 // Vector Compare Absolute Greater-than-or-equal
205 def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
206
207 // Vector Compare Absolute Greater-than
208 def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
209
210 // Vector Absolute Difference
211 def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
212 def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
213 def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
214
215 // Scalar Absolute Difference
216 def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
217
218 // Vector Max
219 def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
220 def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
221 def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
222 def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
223
224 // Vector Max Across Lanes
225 def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
226 def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
227 def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
228 def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
229
230 // Vector Min
231 def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
232 def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
233 def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
234 def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
235
236 // Vector Min/Max Number
237 def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
238 def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
239
240 // Vector Min Across Lanes
241 def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
242 def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
243 def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
244 def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
245
246 // Pairwise Add
247 def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
248
249 // Long Pairwise Add
250 // FIXME: In theory, we shouldn't need intrinsics for saddlp or
251 // uaddlp, but tblgen's type inference currently can't handle the
252 // pattern fragments this ends up generating.
253 def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
254 def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
255
256 // Folding Maximum
257 def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
258 def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
259 def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
260
261 // Folding Minimum
262 def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
263 def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
264 def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
265
266 // Reciprocal Estimate/Step
267 def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
268 def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
269
270 // Vector Saturating Shift Left
271 def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
272 def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
273
274 // Vector Rounding Shift Left
275 def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
276 def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
277
278 // Vector Saturating Rounding Shift Left
279 def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
280 def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
281
282 // Vector Signed->Unsigned Shift Left by Constant
283 def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
284
285 // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
286 def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
287
288 // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
289 def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
290
291 // Vector Narrowing Shift Right by Constant
292 def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
293 def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
294
295 // Vector Rounding Narrowing Shift Right by Constant
296 def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
297
298 // Vector Rounding Narrowing Saturating Shift Right by Constant
299 def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
300 def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
301
302 // Vector Shift Left
303 def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
304 def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
305
306 // Vector Widening Shift Left by Constant
307 def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
308 def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
309 def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
310
311 // Vector Shift Right by Constant and Insert
312 def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
313
314 // Vector Shift Left by Constant and Insert
315 def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
316
317 // Vector Saturating Narrow
318 def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
319 def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
320 def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
321 def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
322
323 // Vector Saturating Extract and Unsigned Narrow
324 def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
325 def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
326
327 // Vector Absolute Value
328 def int_arm64_neon_abs : AdvSIMD_1VectorArg_Intrinsic;
329
330 // Vector Saturating Absolute Value
331 def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
332
333 // Vector Saturating Negation
334 def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
335
336 // Vector Count Leading Sign Bits
337 def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
338
339 // Vector Reciprocal Estimate
340 def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
341 def int_arm64_neon_frecpe : AdvSIMD_1VectorArg_Intrinsic;
342
343 // Vector Square Root Estimate
344 def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
345 def int_arm64_neon_frsqrte : AdvSIMD_1VectorArg_Intrinsic;
346
347 // Vector Bitwise Reverse
348 def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
349
350 // Vector Conversions Between Half-Precision and Single-Precision.
351 def int_arm64_neon_vcvtfp2hf
352 : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
353 def int_arm64_neon_vcvthf2fp
354 : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
355
356 // Vector Conversions Between Floating-point and Fixed-point.
357 def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
358 def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
359 def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
360 def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
361
362 // Vector FP->Int Conversions
363 def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
364 def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
365 def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
366 def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
367 def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
368 def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
369 def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
370 def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
371 def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
372 def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
373
374 // Vector FP Rounding: only ties to even is unrepresented by a normal
375 // intrinsic.
376 def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
377
378 // Scalar FP->Int conversions
379
380 // Vector FP Inexact Narrowing
381 def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
382
383 // Scalar FP Inexact Narrowing
384 def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
385 [IntrNoMem]>;
386 }
387
388 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
389 class AdvSIMD_2Vector2Index_Intrinsic
390 : Intrinsic<[llvm_anyvector_ty],
391 [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
392 [IntrNoMem]>;
393 }
394
395 // Vector element to element moves
396 def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
397
398 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
399 class AdvSIMD_1Vec_Load_Intrinsic
400 : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>],
401 [IntrReadArgMem]>;
402 class AdvSIMD_1Vec_Store_Lane_Intrinsic
403 : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
404 [IntrReadWriteArgMem, NoCapture<2>]>;
405
406 class AdvSIMD_2Vec_Load_Intrinsic
407 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
408 [LLVMAnyPointerType>],
409 [IntrReadArgMem]>;
410 class AdvSIMD_2Vec_Load_Lane_Intrinsic
411 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
412 [LLVMMatchType<0>, LLVMMatchType<0>,
413 llvm_i64_ty, llvm_anyptr_ty],
414 [IntrReadArgMem]>;
415 class AdvSIMD_2Vec_Store_Intrinsic
416 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
417 LLVMAnyPointerType>],
418 [IntrReadWriteArgMem, NoCapture<2>]>;
419 class AdvSIMD_2Vec_Store_Lane_Intrinsic
420 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
421 llvm_i64_ty, llvm_anyptr_ty],
422 [IntrReadWriteArgMem, NoCapture<3>]>;
423
424 class AdvSIMD_3Vec_Load_Intrinsic
425 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
426 [LLVMAnyPointerType>],
427 [IntrReadArgMem]>;
428 class AdvSIMD_3Vec_Load_Lane_Intrinsic
429 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
430 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
431 llvm_i64_ty, llvm_anyptr_ty],
432 [IntrReadArgMem]>;
433 class AdvSIMD_3Vec_Store_Intrinsic
434 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
435 LLVMMatchType<0>, LLVMAnyPointerType>],
436 [IntrReadWriteArgMem, NoCapture<3>]>;
437 class AdvSIMD_3Vec_Store_Lane_Intrinsic
438 : Intrinsic<[], [llvm_anyvector_ty,
439 LLVMMatchType<0>, LLVMMatchType<0>,
440 llvm_i64_ty, llvm_anyptr_ty],
441 [IntrReadWriteArgMem, NoCapture<4>]>;
442
443 class AdvSIMD_4Vec_Load_Intrinsic
444 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
445 LLVMMatchType<0>, LLVMMatchType<0>],
446 [LLVMAnyPointerType>],
447 [IntrReadArgMem]>;
448 class AdvSIMD_4Vec_Load_Lane_Intrinsic
449 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
450 LLVMMatchType<0>, LLVMMatchType<0>],
451 [LLVMMatchType<0>, LLVMMatchType<0>,
452 LLVMMatchType<0>, LLVMMatchType<0>,
453 llvm_i64_ty, llvm_anyptr_ty],
454 [IntrReadArgMem]>;
455 class AdvSIMD_4Vec_Store_Intrinsic
456 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
457 LLVMMatchType<0>, LLVMMatchType<0>,
458 LLVMAnyPointerType>],
459 [IntrReadWriteArgMem, NoCapture<4>]>;
460 class AdvSIMD_4Vec_Store_Lane_Intrinsic
461 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
462 LLVMMatchType<0>, LLVMMatchType<0>,
463 llvm_i64_ty, llvm_anyptr_ty],
464 [IntrReadWriteArgMem, NoCapture<5>]>;
465 }
466
467 // Memory ops
468
469 def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
470 def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
471 def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
472
473 def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
474 def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
475 def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
476
477 def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
478 def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
479 def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
480
481 def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
482 def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
483 def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
484
485 def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
486 def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
487 def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
488
489 def int_arm64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
490 def int_arm64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
491 def int_arm64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
492
493 def int_arm64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
494 def int_arm64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
495 def int_arm64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
496
497 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
498 class AdvSIMD_Tbl1_Intrinsic
499 : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
500 [IntrNoMem]>;
501 class AdvSIMD_Tbl2_Intrinsic
502 : Intrinsic<[llvm_anyvector_ty],
503 [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
504 class AdvSIMD_Tbl3_Intrinsic
505 : Intrinsic<[llvm_anyvector_ty],
506 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
507 LLVMMatchType<0>],
508 [IntrNoMem]>;
509 class AdvSIMD_Tbl4_Intrinsic
510 : Intrinsic<[llvm_anyvector_ty],
511 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
512 LLVMMatchType<0>],
513 [IntrNoMem]>;
514
515 class AdvSIMD_Tbx1_Intrinsic
516 : Intrinsic<[llvm_anyvector_ty],
517 [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
518 [IntrNoMem]>;
519 class AdvSIMD_Tbx2_Intrinsic
520 : Intrinsic<[llvm_anyvector_ty],
521 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
522 LLVMMatchType<0>],
523 [IntrNoMem]>;
524 class AdvSIMD_Tbx3_Intrinsic
525 : Intrinsic<[llvm_anyvector_ty],
526 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
527 llvm_v16i8_ty, LLVMMatchType<0>],
528 [IntrNoMem]>;
529 class AdvSIMD_Tbx4_Intrinsic
530 : Intrinsic<[llvm_anyvector_ty],
531 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
532 llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
533 [IntrNoMem]>;
534 }
535 def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
536 def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
537 def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
538 def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
539
540 def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
541 def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
542 def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
543 def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
544
545 let TargetPrefix = "arm64" in {
546 class Crypto_AES_DataKey_Intrinsic
547 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
548
549 class Crypto_AES_Data_Intrinsic
550 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
551
552 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
553 // (v4i32).
554 class Crypto_SHA_5Hash4Schedule_Intrinsic
555 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
556 [IntrNoMem]>;
557
558 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
559 // (v4i32).
560 class Crypto_SHA_1Hash_Intrinsic
561 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
562
563 // SHA intrinsic taking 8 words of the schedule
564 class Crypto_SHA_8Schedule_Intrinsic
565 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
566
567 // SHA intrinsic taking 12 words of the schedule
568 class Crypto_SHA_12Schedule_Intrinsic
569 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
570 [IntrNoMem]>;
571
572 // SHA intrinsic taking 8 words of the hash and 4 of the schedule.
573 class Crypto_SHA_8Hash4Schedule_Intrinsic
574 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
575 [IntrNoMem]>;
576 }
577
578 // AES
579 def int_arm64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
580 def int_arm64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
581 def int_arm64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
582 def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
583
584 // SHA1
585 def int_arm64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
586 def int_arm64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
587 def int_arm64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
588 def int_arm64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
589
590 def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
591 def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
592
593 // SHA256
594 def int_arm64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
595 def int_arm64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
596 def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
597 def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
598
599 //===----------------------------------------------------------------------===//
600 // CRC32
601
602 let TargetPrefix = "arm64" in {
603
604 def int_arm64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
605 [IntrNoMem]>;
606 def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
607 [IntrNoMem]>;
608 def int_arm64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
609 [IntrNoMem]>;
610 def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
611 [IntrNoMem]>;
612 def int_arm64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
613 [IntrNoMem]>;
614 def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
615 [IntrNoMem]>;
616 def int_arm64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
617 [IntrNoMem]>;
618 def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
619 [IntrNoMem]>;
620 }
157157 VK_TLSLDM,
158158 VK_TPOFF,
159159 VK_DTPOFF,
160 VK_TLVP, // Mach-O thread local variable relocation
160 VK_TLVP, // Mach-O thread local variable relocations
161 VK_TLVPPAGE,
162 VK_TLVPPAGEOFF,
163 VK_PAGE,
164 VK_PAGEOFF,
165 VK_GOTPAGE,
166 VK_GOTPAGEOFF,
161167 VK_SECREL,
162168 VK_WEAKREF, // The link between the symbols in .weakref foo, bar
163169
407407 ARM_RELOC_HALF = 8,
408408 ARM_RELOC_HALF_SECTDIFF = 9,
409409
410 // Constant values for the r_type field in an ARM64 architecture
411 // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
412 // structure.
413
414 // For pointers.
415 ARM64_RELOC_UNSIGNED = 0,
416 // Must be followed by an ARM64_RELOC_UNSIGNED
417 ARM64_RELOC_SUBTRACTOR = 1,
418 // A B/BL instruction with 26-bit displacement.
419 ARM64_RELOC_BRANCH26 = 2,
420 // PC-rel distance to page of target.
421 ARM64_RELOC_PAGE21 = 3,
422 // Offset within page, scaled by r_length.
423 ARM64_RELOC_PAGEOFF12 = 4,
424 // PC-rel distance to page of GOT slot.
425 ARM64_RELOC_GOT_LOAD_PAGE21 = 5,
426 // Offset within page of GOT slot, scaled by r_length.
427 ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6,
428 // For pointers to GOT slots.
429 ARM64_RELOC_POINTER_TO_GOT = 7,
430 // PC-rel distance to page of TLVP slot.
431 ARM64_RELOC_TLVP_LOAD_PAGE21 = 8,
432 // Offset within page of TLVP slot, scaled by r_length.
433 ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
434 // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
435 ARM64_RELOC_ADDEND = 10,
436
437
410438 // Constant values for the r_type field in an x86_64 architecture
411439 // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
412440 // structure
913941 /* CPU_TYPE_MIPS = 8, */
914942 CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC
915943 CPU_TYPE_ARM = 12,
944 CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64,
916945 CPU_TYPE_SPARC = 14,
917946 CPU_TYPE_POWERPC = 18,
918947 CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64
9861015 CPU_SUBTYPE_ARM_V7EM = 16
9871016 };
9881017
1018 enum CPUSubTypeARM64 {
1019 CPU_SUBTYPE_ARM64_ALL = 0
1020 };
1021
9891022 enum CPUSubTypeSPARC {
9901023 CPU_SUBTYPE_SPARC_ALL = 0
9911024 };
361361 def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
362362 def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
363363
364
365364 def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
366365 def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
367366 def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>;
465464 def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
466465 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
467466 def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
468 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>,[]>;
467 SDTypeProfile<1, 2, [SDTCisSubVecOfVec<1, 0>, SDTCisSameAs<1, 2>]>,[]>;
469468
470469 // This operator does not do subvector type checking. The ARM
471470 // backend, at least, needs it.
9595 #define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
9696
9797 /**
98 * The ARM64 target VariantKinds.
99 */
100 #define LLVMDisassembler_VariantKind_ARM64_PAGE 1 /* @page */
101 #define LLVMDisassembler_VariantKind_ARM64_PAGEOFF 2 /* @pageoff */
102 #define LLVMDisassembler_VariantKind_ARM64_GOTPAGE 3 /* @gotpage */
103 #define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF 4 /* @gotpageoff */
104 #define LLVMDisassembler_VariantKind_ARM64_TLVP 5 /* @tvlppage */
105 #define LLVMDisassembler_VariantKind_ARM64_TLVOFF 6 /* @tvlppageoff */
106
107 /**
98108 * The type for the symbol lookup function. This may be called by the
99109 * disassembler for things like adding a comment for a PC plus a constant
100110 * offset load instruction to use a symbol name instead of a load address value.
122132 /* The input reference is from a PC relative load instruction. */
123133 #define LLVMDisassembler_ReferenceType_In_PCrel_Load 2
124134
135 /* The input reference is from an ARM64::ADRP instruction. */
136 #define LLVMDisassembler_ReferenceType_In_ARM64_ADRP 0x100000001
137 /* The input reference is from an ARM64::ADDXri instruction. */
138 #define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri 0x100000002
139 /* The input reference is from an ARM64::LDRXui instruction. */
140 #define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui 0x100000003
141 /* The input reference is from an ARM64::LDRXl instruction. */
142 #define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl 0x100000004
143 /* The input reference is from an ARM64::ADR instruction. */
144 #define LLVMDisassembler_ReferenceType_In_ARM64_ADR 0x100000005
145
125146 /* The output reference is to as symbol stub. */
126147 #define LLVMDisassembler_ReferenceType_Out_SymbolStub 1
127148 /* The output reference is to a symbol address in a literal pool. */
165165 case Triple::thumb:
166166 resolveARMRelocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel,
167167 MachoType, Size, Addend);
168 break;
169 case Triple::arm64:
170 resolveARM64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value,
171 isPCRel, MachoType, Size, Addend);
168172 break;
169173 }
170174 }
287291 case MachO::ARM_RELOC_SECTDIFF:
288292 case MachO::ARM_RELOC_LOCAL_SECTDIFF:
289293 case MachO::ARM_RELOC_PB_LA_PTR:
294 return Error("Relocation type not implemented yet!");
295 }
296 return false;
297 }
298
299 bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
300 uint64_t FinalAddress,
301 uint64_t Value, bool isPCRel,
302 unsigned Type, unsigned Size,
303 int64_t Addend) {
304 // If the relocation is PC-relative, the value to be encoded is the
305 // pointer difference.
306 if (isPCRel)
307 Value -= FinalAddress;
308
309 switch (Type) {
310 default:
311 llvm_unreachable("Invalid relocation type!");
312 case MachO::ARM64_RELOC_UNSIGNED: {
313 // Mask in the target value a byte at a time (we don't have an alignment
314 // guarantee for the target address, so this is safest).
315 uint8_t *p = (uint8_t *)LocalAddress;
316 for (unsigned i = 0; i < Size; ++i) {
317 *p++ = (uint8_t)Value;
318 Value >>= 8;
319 }
320 break;
321 }
322 case MachO::ARM64_RELOC_BRANCH26: {
323 // Mask the value into the target address. We know instructions are
324 // 32-bit aligned, so we can do it all at once.
325 uint32_t *p = (uint32_t *)LocalAddress;
326 // The low two bits of the value are not encoded.
327 Value >>= 2;
328 // Mask the value to 26 bits.
329 Value &= 0x3ffffff;
330 // Insert the value into the instruction.
331 *p = (*p & ~0x3ffffff) | Value;
332 break;
333 }
334 case MachO::ARM64_RELOC_SUBTRACTOR:
335 case MachO::ARM64_RELOC_PAGE21:
336 case MachO::ARM64_RELOC_PAGEOFF12:
337 case MachO::ARM64_RELOC_GOT_LOAD_PAGE21:
338 case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12:
339 case MachO::ARM64_RELOC_POINTER_TO_GOT:
340 case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
341 case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
342 case MachO::ARM64_RELOC_ADDEND:
290343 return Error("Relocation type not implemented yet!");
291344 }
292345 return false;
3333 bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress,
3434 uint64_t Value, bool isPCRel, unsigned Type,
3535 unsigned Size, int64_t Addend);
36 bool resolveARM64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
37 uint64_t Value, bool IsPCRel, unsigned Type,
38 unsigned Size, int64_t Addend);
3639
3740 void resolveRelocation(const SectionEntry &Section, uint64_t Offset,
3841 uint64_t Value, uint32_t Type, int64_t Addend,
320320 MCpu = "core2";
321321 else if (Triple.getArch() == llvm::Triple::x86)
322322 MCpu = "yonah";
323 else if (Triple.getArch() == llvm::Triple::arm64)
324 MCpu = "cyclone";
323325 }
324326
325327 TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
167167 CPU = "core2";
168168 else if (Triple.getArch() == llvm::Triple::x86)
169169 CPU = "yonah";
170 else if (Triple.getArch() == llvm::Triple::arm64)
171 CPU = "cyclone";
170172 }
171173
172174 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
178178 case VK_TPOFF: return "TPOFF";
179179 case VK_DTPOFF: return "DTPOFF";
180180 case VK_TLVP: return "TLVP";
181 case VK_TLVPPAGE: return "TLVPPAGE";
182 case VK_TLVPPAGEOFF: return "TLVPPAGEOFF";
183 case VK_PAGE: return "PAGE";
184 case VK_PAGEOFF: return "PAGEOFF";
185 case VK_GOTPAGE: return "GOTPAGE";
186 case VK_GOTPAGEOFF: return "GOTPAGEOFF";
181187 case VK_SECREL: return "SECREL32";
182188 case VK_WEAKREF: return "WEAKREF";
183189 case VK_ARM_NONE: return "none";
299305 .Case("dtpoff", VK_DTPOFF)
300306 .Case("TLVP", VK_TLVP)
301307 .Case("tlvp", VK_TLVP)
308 .Case("TLVPPAGE", VK_TLVPPAGE)
309 .Case("tlvppage", VK_TLVPPAGE)
310 .Case("TLVPPAGEOFF", VK_TLVPPAGEOFF)
311 .Case("tlvppageoff", VK_TLVPPAGEOFF)
312 .Case("PAGE", VK_PAGE)
313 .Case("page", VK_PAGE)
314 .Case("PAGEOFF", VK_PAGEOFF)
315 .Case("pageoff", VK_PAGEOFF)
316 .Case("GOTPAGE", VK_GOTPAGE)
317 .Case("gotpage", VK_GOTPAGE)
318 .Case("GOTPAGEOFF", VK_GOTPAGEOFF)
319 .Case("gotpageoff", VK_GOTPAGEOFF)
302320 .Case("IMGREL", VK_COFF_IMGREL32)
303321 .Case("imgrel", VK_COFF_IMGREL32)
304322 .Case("SECREL32", VK_SECREL)
2020 // MachO
2121 IsFunctionEHFrameSymbolPrivate = false;
2222 SupportsWeakOmittedEHFrame = false;
23
24 if (T.isOSDarwin() && T.getArch() == Triple::arm64)
25 SupportsCompactUnwindWithoutEHFrame = true;
2326
2427 PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
2528 | dwarf::DW_EH_PE_sdata4;
145148
146149 COFFDebugSymbolsSection = 0;
147150
148 if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
151 if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
152 (T.isOSDarwin() && T.getArch() == Triple::arm64)) {
149153 CompactUnwindSection =
150154 Ctx->getMachOSection("__LD", "__compact_unwind",
151155 MachO::S_ATTR_DEBUG,
153157
154158 if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
155159 CompactUnwindDwarfEHFrameOnly = 0x04000000;
160 else if (T.getArch() == Triple::arm64)
161 CompactUnwindDwarfEHFrameOnly = 0x03000000;
156162 }
157163
158164 // Debug Information.
762768 // cellspu-apple-darwin. Perhaps we should fix in Triple?
763769 if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
764770 Arch == Triple::arm || Arch == Triple::thumb ||
771 Arch == Triple::arm64 ||
765772 Arch == Triple::ppc || Arch == Triple::ppc64 ||
766773 Arch == Triple::UnknownArch) &&
767774 (T.isOSDarwin() || T.isOSBinFormatMachO())) {
1212 //===----------------------------------------------------------------------===//
1313
1414 #include "llvm/Object/MachO.h"
15 #include "llvm/ADT/STLExtras.h"
1516 #include "llvm/ADT/Triple.h"
1617 #include "llvm/Support/DataExtractor.h"
1718 #include "llvm/Support/Format.h"
928929 "ARM_RELOC_HALF_SECTDIFF" };
929930
930931 if (RType > 9)
932 res = "Unknown";
933 else
934 res = Table[RType];
935 break;
936 }
937 case Triple::arm64:
938 case Triple::aarch64: {
939 static const char *const Table[] = {
940 "ARM64_RELOC_UNSIGNED", "ARM64_RELOC_SUBTRACTOR",
941 "ARM64_RELOC_BRANCH26", "ARM64_RELOC_PAGE21",
942 "ARM64_RELOC_PAGEOFF12", "ARM64_RELOC_GOT_LOAD_PAGE21",
943 "ARM64_RELOC_GOT_LOAD_PAGEOFF12", "ARM64_RELOC_POINTER_TO_GOT",
944 "ARM64_RELOC_TLVP_LOAD_PAGE21", "ARM64_RELOC_TLVP_LOAD_PAGEOFF12",
945 "ARM64_RELOC_ADDEND"
946 };
947
948 if (RType >= array_lengthof(Table))
931949 res = "Unknown";
932950 else
933951 res = Table[RType];
12551273 switch (CPUType) {
12561274 case llvm::MachO::CPU_TYPE_X86_64:
12571275 return "Mach-O 64-bit x86-64";
1276 case llvm::MachO::CPU_TYPE_ARM64:
1277 return "Mach-O arm64";
12581278 case llvm::MachO::CPU_TYPE_POWERPC64:
12591279 return "Mach-O 64-bit ppc64";
12601280 default:
12701290 return Triple::x86_64;
12711291 case llvm::MachO::CPU_TYPE_ARM:
12721292 return Triple::arm;
1293 case llvm::MachO::CPU_TYPE_ARM64:
1294 return Triple::arm64;
12731295 case llvm::MachO::CPU_TYPE_POWERPC:
12741296 return Triple::ppc;
12751297 case llvm::MachO::CPU_TYPE_POWERPC64:
2222 case aarch64_be: return "aarch64_be";
2323 case arm: return "arm";
2424 case armeb: return "armeb";
25 case arm64: return "arm64";
2526 case hexagon: return "hexagon";
2627 case mips: return "mips";
2728 case mipsel: return "mipsel";
6566 case thumb:
6667 case thumbeb: return "arm";
6768
69 case arm64: return "arm64";
70
6871 case ppc64:
6972 case ppc64le:
7073 case ppc: return "ppc";
9093
9194 case nvptx: return "nvptx";
9295 case nvptx64: return "nvptx";
96
9397 case le32: return "le32";
9498 case amdil: return "amdil";
9599 case spir: return "spir";
172176 .Case("aarch64_be", aarch64_be)
173177 .Case("arm", arm)
174178 .Case("armeb", armeb)
179 .Case("arm64", arm64)
175180 .Case("mips", mips)
176181 .Case("mipsel", mipsel)
177182 .Case("mips64", mips64)
218223 .Cases("armv6", "thumbv6", "armv6")
219224 .Cases("armv7", "thumbv7", "armv7")
220225 .Case("armeb", "armeb")
226 .Case("arm64", "arm64")
221227 .Case("r600", "r600")
222228 .Case("nvptx", "nvptx")
223229 .Case("nvptx64", "nvptx64")
249255 .StartsWith("thumbv", Triple::thumb)
250256 .Case("thumbeb", Triple::thumbeb)
251257 .StartsWith("thumbebv", Triple::thumbeb)
258 .Case("arm64", Triple::arm64)
252259 .Case("msp430", Triple::msp430)
253260 .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
254261 .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
680687 break;
681688 case IOS:
682689 getOSVersion(Major, Minor, Micro);
683 // Default to 5.0.
690 // Default to 5.0 (or 7.0 for arm64).
684691 if (Major == 0)
685 Major = 5;
692 Major = (getArch() == arm64) ? 7 : 5;
686693 break;
687694 }
688695 }
770777 case llvm::Triple::spir:
771778 return 32;
772779
780 case llvm::Triple::arm64:
773781 case llvm::Triple::aarch64:
774782 case llvm::Triple::aarch64_be:
775783 case llvm::Triple::mips64:
837845 case Triple::sparcv9: T.setArch(Triple::sparc); break;
838846 case Triple::x86_64: T.setArch(Triple::x86); break;
839847 case Triple::spir64: T.setArch(Triple::spir); break;
848 case Triple::arm64: T.setArch(Triple::arm); break;
840849 }
841850 return T;
842851 }
846855 switch (getArch()) {
847856 case Triple::UnknownArch:
848857 case Triple::amdil:
849 case Triple::arm:
850858 case Triple::armeb:
851859 case Triple::hexagon:
852860 case Triple::le32:
870878 case Triple::sparcv9:
871879 case Triple::systemz:
872880 case Triple::x86_64:
881 case Triple::arm64:
873882 // Already 64-bit.
874883 break;
875884
880889 case Triple::sparc: T.setArch(Triple::sparcv9); break;
881890 case Triple::x86: T.setArch(Triple::x86_64); break;
882891 case Triple::spir: T.setArch(Triple::spir64); break;
892 case Triple::arm: T.setArch(Triple::arm64); break;
883893 }
884894 return T;
885895 }
204204 void* start = NearBlock ? (unsigned char*)NearBlock->base() +
205205 NearBlock->size() : 0;
206206
207 #if defined(__APPLE__) && defined(__arm__)
207 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
208208 void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
209209 flags, fd, 0);
210210 #else
219219 return MemoryBlock();
220220 }
221221
222 #if defined(__APPLE__) && defined(__arm__)
222 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
223223 kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
224224 (vm_size_t)(PageSize*NumPages), 0,
225225 VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
252252 }
253253
254254 bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
255 #if defined(__APPLE__) && defined(__arm__)
255 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
256256 if (M.Address == 0 || M.Size == 0) return false;
257257 Memory::InvalidateInstructionCache(M.Address, M.Size);
258258 kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
264264 }
265265
266266 bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
267 #if defined(__APPLE__) && defined(__arm__)
267 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
268268 if (M.Address == 0 || M.Size == 0) return false;
269269 Memory::InvalidateInstructionCache(M.Address, M.Size);
270270 kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
279279 }
280280
281281 bool Memory::setRangeWritable(const void *Addr, size_t Size) {
282 #if defined(__APPLE__) && defined(__arm__)
282 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
283283 kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
284284 (vm_size_t)Size, 0,
285285 VM_PROT_READ | VM_PROT_WRITE);
290290 }
291291
292292 bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
293 #if defined(__APPLE__) && defined(__arm__)
293 #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
294294 kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
295295 (vm_size_t)Size, 0,
296296 VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
310310 #if defined(__APPLE__)
311311
312312 # if (defined(__POWERPC__) || defined (__ppc__) || \
313 defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
313 defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \
314 defined(__arm64__))
314315 sys_icache_invalidate(const_cast(Addr), Len);
315316 # endif
316317
0 //===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in the LLVM
10 // ARM64 back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef TARGET_ARM64_H
15 #define TARGET_ARM64_H
16
17 #include "MCTargetDesc/ARM64BaseInfo.h"
18 #include "MCTargetDesc/ARM64MCTargetDesc.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Support/DataTypes.h"
21
22 namespace llvm {
23
24 class ARM64TargetMachine;
25 class FunctionPass;
26 class MachineFunctionPass;
27
28 FunctionPass *createARM64DeadRegisterDefinitions();
29 FunctionPass *createARM64ConditionalCompares();
30 FunctionPass *createARM64AdvSIMDScalar();
31 FunctionPass *createARM64BranchRelaxation();
32 FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
33 CodeGenOpt::Level OptLevel);
34 FunctionPass *createARM64StorePairSuppressPass();
35 FunctionPass *createARM64ExpandPseudoPass();
36 FunctionPass *createARM64LoadStoreOptimizationPass();
37 ModulePass *createARM64PromoteConstantPass();
38 FunctionPass *createARM64AddressTypePromotionPass();
39 /// \brief Creates an ARM-specific Target Transformation Info pass.
40 ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
41
42 FunctionPass *createARM64CleanupLocalDynamicTLSPass();
43
44 FunctionPass *createARM64CollectLOHPass();
45 } // end namespace llvm
46
47 #endif
0 //===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11
12 //===----------------------------------------------------------------------===//
13 // Target-independent interfaces which we are implementing
14 //===----------------------------------------------------------------------===//
15
16 include "llvm/Target/Target.td"
17
18 //===----------------------------------------------------------------------===//
19 // ARM64 Subtarget features.
20 //
21
22 /// Cyclone has register move instructions which are "free".
23 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
24 "Has zereo-cycle register moves">;
25
26 /// Cyclone has instructions which zero registers for "free".
27 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
28 "Has zero-cycle zeroing instructions">;
29
30 //===----------------------------------------------------------------------===//
31 // Register File Description
32 //===----------------------------------------------------------------------===//
33
34 include "ARM64RegisterInfo.td"
35 include "ARM64CallingConvention.td"
36
37 //===----------------------------------------------------------------------===//
38 // Instruction Descriptions
39 //===----------------------------------------------------------------------===//
40
41 include "ARM64Schedule.td"
42 include "ARM64InstrInfo.td"
43
44 def ARM64InstrInfo : InstrInfo;
45
46 //===----------------------------------------------------------------------===//
47 // ARM64 Processors supported.
48 //
49 include "ARM64SchedCyclone.td"
50
51 def : ProcessorModel<"arm64-generic", NoSchedModel, []>;
52
53 def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>;
54
55 //===----------------------------------------------------------------------===//
56 // Assembly parser
57 //===----------------------------------------------------------------------===//
58
59 def GenericAsmParserVariant : AsmParserVariant {
60 int Variant = 0;
61 string Name = "generic";
62 }
63
64 def AppleAsmParserVariant : AsmParserVariant {
65 int Variant = 1;
66 string Name = "apple-neon";
67 }
68
69 //===----------------------------------------------------------------------===//
70 // Assembly printer
71 //===----------------------------------------------------------------------===//
72 // ARM64 Uses the MC printer for asm output, so make sure the TableGen
73 // AsmWriter bits get associated with the correct class.
74 def GenericAsmWriter : AsmWriter {
75 string AsmWriterClassName = "InstPrinter";
76 int Variant = 0;
77 bit isMCAsmWriter = 1;
78 }
79
80 def AppleAsmWriter : AsmWriter {
81 let AsmWriterClassName = "AppleInstPrinter";
82 int Variant = 1;
83 int isMCAsmWriter = 1;
84 }
85
86 //===----------------------------------------------------------------------===//
87 // Target Declaration
88 //===----------------------------------------------------------------------===//
89
90 def ARM64 : Target {
91 let InstructionSet = ARM64InstrInfo;
92 let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
93 let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
94 }
0
1 //===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass tries to promote the computations use to obtained a sign extended
11 // value used into memory accesses.
12 // E.g.
13 // a = add nsw i32 b, 3
14 // d = sext i32 a to i64
15 // e = getelementptr ..., i64 d
16 //
17 // =>
18 // f = sext i32 b to i64
19 // a = add nsw i64 f, 3
20 // e = getelementptr ..., i64 a
21 //
22 // This is legal to do so if the computations are markers with either nsw or nuw
23 // markers.
24 // Moreover, the current heuristic is simple: it does not create new sext
25 // operations, i.e., it gives up when a sext would have forked (e.g., if
26 // a = add i32 b, c, two sexts are required to promote the computation).
27 //
28 // FIXME: This pass may be useful for other targets too.
29 // ===---------------------------------------------------------------------===//
30
31 #define DEBUG_TYPE "arm64-type-promotion"
32 #include "ARM64.h"
33 #include "llvm/ADT/DenseMap.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Module.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/Pass.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Debug.h"
45
46 using namespace llvm;
47
48 static cl::opt
49 EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
50 cl::desc("Enable the type promotion pass"),
51 cl::init(true));
52 static cl::opt
53 EnableMerge("arm64-type-promotion-merge", cl::Hidden,
54 cl::desc("Enable merging of redundant sexts when one is dominating"
55 " the other."),
56 cl::init(true));
57
58 //===----------------------------------------------------------------------===//
59 // ARM64AddressTypePromotion
60 //===----------------------------------------------------------------------===//
61
62 namespace llvm {
63 void initializeARM64AddressTypePromotionPass(PassRegistry &);
64 }
65
66 namespace {
67 class ARM64AddressTypePromotion : public FunctionPass {
68
69 public:
70 static char ID;
71 ARM64AddressTypePromotion()
72 : FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) {
73 initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
74 }
75
76 virtual const char *getPassName() const {
77 return "ARM64 Address Type Promotion";
78 }
79
80 /// Iterate over the functions and promote the computation of interesting
81 // sext instructions.
82 bool runOnFunction(Function &F);
83
84 private:
85 /// The current function.
86 Function *Func;
87 /// Filter out all sexts that does not have this type.
88 /// Currently initialized with Int64Ty.
89 Type *ConsideredSExtType;
90
91 // This transformation requires dominator info.
92 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
93 AU.setPreservesCFG();
94 AU.addRequired();
95 AU.addPreserved();
96 FunctionPass::getAnalysisUsage(AU);
97 }
98
99 typedef SmallPtrSet SetOfInstructions;
100 typedef SmallVector Instructions;
101 typedef DenseMap ValueToInsts;
102
103 /// Check if it is profitable to move a sext through this instruction.
104 /// Currently, we consider it is profitable if:
105 /// - Inst is used only once (no need to insert truncate).
106 /// - Inst has only one operand that will require a sext operation (we do
107 /// do not create new sext operation).
108 bool shouldGetThrough(const Instruction *Inst);
109
110 /// Check if it is possible and legal to move a sext through this
111 /// instruction.
112 /// Current heuristic considers that we can get through:
113 /// - Arithmetic operation marked with the nsw or nuw flag.
114 /// - Other sext operation.
115 /// - Truncate operation if it was just dropping sign extended bits.
116 bool canGetThrough(const Instruction *Inst);
117
118 /// Move sext operations through safe to sext instructions.
119 bool propagateSignExtension(Instructions &SExtInsts);
120
121 /// Is this sext should be considered for code motion.
122 /// We look for sext with ConsideredSExtType and uses in at least one
123 // GetElementPtrInst.
124 bool shouldConsiderSExt(const Instruction *SExt) const;
125
126 /// Collect all interesting sext operations, i.e., the ones with the right
127 /// type and used in memory accesses.
128 /// More precisely, a sext instruction is considered as interesting if it
129 /// is used in a "complex" getelementptr or it exits at least another
130 /// sext instruction that sign extended the same initial value.
131 /// A getelementptr is considered as "complex" if it has more than 2
132 // operands.
133 void analyzeSExtension(Instructions &SExtInsts);
134
135 /// Merge redundant sign extension operations in common dominator.
136 void mergeSExts(ValueToInsts &ValToSExtendedUses,
137 SetOfInstructions &ToRemove);
138 };
139 } // end anonymous namespace.
140
141 char ARM64AddressTypePromotion::ID = 0;
142
143 INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
144 "ARM64 Type Promotion Pass", false, false)
145 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
146 INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
147 "ARM64 Type Promotion Pass", false, false)
148
149 FunctionPass *llvm::createARM64AddressTypePromotionPass() {
150 return new ARM64AddressTypePromotion();
151 }
152
153 bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
154 if (isa(Inst))
155 return true;
156
157 const BinaryOperator *BinOp = dyn_cast(Inst);
158 if (BinOp && isa(BinOp) &&
159 (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
160 return true;
161
162 // sext(trunc(sext)) --> sext
163 if (isa(Inst) && isa(Inst->getOperand(0))) {
164 const Instruction *Opnd = cast(Inst->getOperand(0));
165 // Check that the truncate just drop sign extended bits.
166 if (Inst->getType()->getIntegerBitWidth() >=
167 Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
168 Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
169 ConsideredSExtType->getIntegerBitWidth())
170 return true;
171 }
172
173 return false;
174 }
175
176 bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
177 // If the type of the sext is the same as the considered one, this sext
178 // will become useless.
179 // Otherwise, we will have to do something to preserve the original value,
180 // unless it is used once.
181 if (isa(Inst) &&
182 (Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
183 return true;
184
185 // If the Inst is used more that once, we may need to insert truncate
186 // operations and we don't do that at the moment.
187 if (!Inst->hasOneUse())
188 return false;
189
190 // This truncate is used only once, thus if we can get thourgh, it will become
191 // useless.
192 if (isa(Inst))
193 return true;
194
195 // If both operands are not constant, a new sext will be created here.
196 // Current heuristic is: each step should be profitable.
197 // Therefore we don't allow to increase the number of sext even if it may
198 // be profitable later on.
199 if (isa(Inst) && isa(Inst->getOperand(1)))
200 return true;
201
202 return false;
203 }
204
205 static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
206 if (isa(Inst) && OpIdx == 0)
207 return false;
208 return true;
209 }
210
211 bool
212 ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
213 if (SExt->getType() != ConsideredSExtType)
214 return false;
215
216 for (Value::const_use_iterator UseIt = SExt->use_begin(),
217 EndUseIt = SExt->use_end();
218 UseIt != EndUseIt; ++UseIt) {
219 if (isa(*UseIt))
220 return true;
221 }
222
223 return false;
224 }
225
226 // Input:
227 // - SExtInsts contains all the sext instructions that are use direclty in
228 // GetElementPtrInst, i.e., access to memory.
229 // Algorithm:
230 // - For each sext operation in SExtInsts:
231 // Let var be the operand of sext.
232 // while it is profitable (see shouldGetThrough), legal, and safe
233 // (see canGetThrough) to move sext through var's definition:
234 // * promote the type of var's definition.
235 // * fold var into sext uses.
236 // * move sext above var's definition.
237 // * update sext operand to use the operand of var that should be sign
238 // extended (by construction there is only one).
239 //
240 // E.g.,
241 // a = ... i32 c, 3
242 // b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
243 // ...
244 // = b
245 // => Yes, update the code
246 // b = sext i32 c to i64
247 // a = ... i64 b, 3
248 // ...
249 // = a
250 // Iterate on 'c'.
251 bool
252 ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
253 DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
254
255 bool LocalChange = false;
256 SetOfInstructions ToRemove;
257 ValueToInsts ValToSExtendedUses;
258 while (!SExtInsts.empty()) {
259 // Get through simple chain.
260 Instruction *SExt = SExtInsts.pop_back_val();
261
262 DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
263
264 // If this SExt has already been merged continue.
265 if (SExt->use_empty() && ToRemove.count(SExt)) {
266 DEBUG(dbgs() << "No uses => marked as delete\n");
267 continue;
268 }
269
270 // Now try to get through the chain of definitions.
271 while (isa(SExt->getOperand(0))) {
272 Instruction *Inst = dyn_cast(SExt->getOperand(0));
273 DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
274 if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
275 // We cannot get through something that is not an Instruction
276 // or not safe to SExt.
277 DEBUG(dbgs() << "Cannot get through\n");
278 break;
279 }
280
281 LocalChange = true;
282 // If this is a sign extend, it becomes useless.
283 if (isa(Inst) || isa(Inst)) {
284 DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
285 // We cannot use replaceAllUsesWith here because we may trigger some
286 // assertion on the type as all involved sext operation may have not
287 // been moved yet.
288 while (!Inst->use_empty()) {
289 Value::use_iterator UseIt = Inst->use_begin();
290 Instruction *UseInst = dyn_cast(*UseIt);
291 assert(UseInst && "Use of sext is not an Instruction!");
292 UseInst->setOperand(UseIt->getOperandNo(), SExt);
293 }
294 ToRemove.insert(Inst);
295 SExt->setOperand(0, Inst->getOperand(0));
296 SExt->moveBefore(Inst);
297 continue;
298 }
299
300 // Get through the Instruction:
301 // 1. Update its type.
302 // 2. Replace the uses of SExt by Inst.
303 // 3. Sign extend each operand that needs to be sign extended.
304
305 // Step #1.
306 Inst->mutateType(SExt->getType());
307 // Step #2.
308 SExt->replaceAllUsesWith(Inst);
309 // Step #3.
310 Instruction *SExtForOpnd = SExt;
311
312 DEBUG(dbgs() << "Propagate SExt to operands\n");
313 for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
314 ++OpIdx) {
315 DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
316 if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
317 !shouldSExtOperand(Inst, OpIdx)) {
318 DEBUG(dbgs() << "No need to propagate\n");
319 continue;
320 }
321 // Check if we can statically sign extend the operand.
322 Value *Opnd = Inst->getOperand(OpIdx);
323 if (const ConstantInt *Cst = dyn_cast(Opnd)) {
324 DEBUG(dbgs() << "Statically sign extend\n");
325 Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
326 Cst->getSExtValue()));
327 continue;
328 }
329 // UndefValue are typed, so we have to statically sign extend them.
330 if (isa(Opnd)) {
331 DEBUG(dbgs() << "Statically sign extend\n");
332 Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
333 continue;
334 }
335
336 // Otherwise we have to explicity sign extend it.
337 assert(SExtForOpnd &&
338 "Only one operand should have been sign extended");
339
340 SExtForOpnd->setOperand(0, Opnd);
341
342 DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
343 // Move the sign extension before the insertion point.
344 SExtForOpnd->moveBefore(Inst);
345 Inst->setOperand(OpIdx, SExtForOpnd);
346 // If more sext are required, new instructions will have to be created.
347 SExtForOpnd = NULL;
348 }
349 if (SExtForOpnd == SExt) {
350 DEBUG(dbgs() << "Sign extension is useless now\n");
351 ToRemove.insert(SExt);
352 break;
353 }
354 }
355
356 // If the use is already of the right type, connect its uses to its argument
357 // and delete it.
358 // This can happen for an Instruction which all uses are sign extended.
359 if (!ToRemove.count(SExt) &&
360 SExt->getType() == SExt->getOperand(0)->getType()) {
361 DEBUG(dbgs() << "Sign extension is useless, attach its use to "
362 "its argument\n");
363 SExt->replaceAllUsesWith(SExt->getOperand(0));
364 ToRemove.insert(SExt);
365 } else
366 ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
367 }
368
369 if (EnableMerge)
370 mergeSExts(ValToSExtendedUses, ToRemove);
371
372 // Remove all instructions marked as ToRemove.
373 for (SetOfInstructions::iterator ToRemoveIt = ToRemove.begin(),
374 EndToRemoveIt = ToRemove.end();
375 ToRemoveIt != EndToRemoveIt; ++ToRemoveIt)
376 (*ToRemoveIt)->eraseFromParent();
377 return LocalChange;
378 }
379
380 void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
381 SetOfInstructions &ToRemove) {
382 DominatorTree &DT = getAnalysis().getDomTree();
383
384 for (ValueToInsts::iterator It = ValToSExtendedUses.begin(),
385 EndIt = ValToSExtendedUses.end();
386 It != EndIt; ++It) {
387 Instructions &Insts = It->second;
388 Instructions CurPts;
389 for (Instructions::iterator IIt = Insts.begin(), EndIIt = Insts.end();
390 IIt != EndIIt; ++IIt) {
391 if (ToRemove.count(*IIt))
392 continue;
393 bool inserted = false;
394 for (Instructions::iterator CurPtsIt = CurPts.begin(),
395 EndCurPtsIt = CurPts.end();
396 CurPtsIt != EndCurPtsIt; ++CurPtsIt) {
397 if (DT.dominates(*IIt, *CurPtsIt)) {
398 DEBUG(dbgs() << "Replace all uses of:\n" << **CurPtsIt << "\nwith:\n"
399 << **IIt << '\n');
400 (*CurPtsIt)->replaceAllUsesWith(*IIt);
401 ToRemove.insert(*CurPtsIt);
402 *CurPtsIt = *IIt;
403 inserted = true;
404 break;
405 }
406 if (!DT.dominates(*CurPtsIt, *IIt))
407 // Give up if we need to merge in a common dominator as the
408 // expermients show it is not profitable.
409 continue;
410
411 DEBUG(dbgs() << "Replace all uses of:\n" << **IIt << "\nwith:\n"
412 << **CurPtsIt << '\n');
413 (*IIt)->replaceAllUsesWith(*CurPtsIt);
414 ToRemove.insert(*IIt);
415 inserted = true;
416 break;
417 }
418 if (!inserted)
419 CurPts.push_back(*IIt);
420 }
421 }
422 }
423
424 void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
425 DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
426
427 DenseMap SeenChains;
428
429 for (Function::iterator IBB = Func->begin(), IEndBB = Func->end();
430 IBB != IEndBB; ++IBB) {
431 for (BasicBlock::iterator II = IBB->begin(), IEndI = IBB->end();
432 II != IEndI; ++II) {
433
434 // Collect all sext operation per type.
435 if (!isa(II) || !shouldConsiderSExt(II))
436 continue;
437 Instruction *SExt = II;
438
439 DEBUG(dbgs() << "Found:\n" << (*II) << '\n');
440
441 // Cases where we actually perform the optimization:
442 // 1. SExt is used in a getelementptr with more than 2 operand =>
443 // likely we can merge some computation if they are done on 64 bits.
444 // 2. The beginning of the SExt chain is SExt several time. =>
445 // code sharing is possible.
446
447 bool insert = false;
448 // #1.
449 for (Value::use_iterator UseIt = SExt->use_begin(),
450 EndUseIt = SExt->use_end();
451 UseIt != EndUseIt; ++UseIt) {
452 const Instruction *Inst = dyn_cast(*UseIt);
453 if (Inst && Inst->getNumOperands() > 2) {
454 DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
455 << '\n');
456 insert = true;
457 break;
458 }
459 }
460
461 // #2.
462 // Check the head of the chain.
463 Instruction *Inst = SExt;
464 Value *Last;
465 do {
466 int OpdIdx = 0;
467 const BinaryOperator *BinOp = dyn_cast(Inst);
468 if (BinOp && isa(BinOp->getOperand(0)))
469 OpdIdx = 1;
470 Last = Inst->getOperand(OpdIdx);
471 Inst = dyn_cast(Last);
472 } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
473
474 DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
475 DenseMap::iterator AlreadySeen =
476 SeenChains.find(Last);
477 if (insert || AlreadySeen != SeenChains.end()) {
478 DEBUG(dbgs() << "Insert\n");
479 SExtInsts.push_back(II);
480 if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) {
481 DEBUG(dbgs() << "Insert chain member\n");
482 SExtInsts.push_back(AlreadySeen->second);
483 SeenChains[Last] = NULL;
484 }
485 } else {
486 DEBUG(dbgs() << "Record its chain membership\n");
487 SeenChains[Last] = SExt;
488 }
489 }
490 }
491 }
492
493 bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
494 if (!EnableAddressTypePromotion || F.isDeclaration())
495 return false;
496 Func = &F;
497 ConsideredSExtType = Type::getInt64Ty(Func->getContext());
498
499 DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
500
501 Instructions SExtInsts;
502 analyzeSExtension(SExtInsts);
503 return propagateSignExtension(SExtInsts);
504 }
0 //===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // When profitable, replace GPR targeting i64 instructions with their
9 // AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
10 // as minimizing the number of cross-class register copies.
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // TODO: Graph based predicate heuristics.
15 // Walking the instruction list linearly will get many, perhaps most, of
16 // the cases, but to do a truly throrough job of this, we need a more
17 // wholistic approach.
18 //
19 // This optimization is very similar in spirit to the register allocator's
20 // spill placement, only here we're determining where to place cross-class
21 // register copies rather than spills. As such, a similar approach is
22 // called for.
23 //
24 // We want to build up a set of graphs of all instructions which are candidates
25 // for transformation along with instructions which generate their inputs and
26 // consume their outputs. For each edge in the graph, we assign a weight
27 // based on whether there is a copy required there (weight zero if not) and
28 // the block frequency of the block containing the defining or using
29 // instruction, whichever is less. Our optimization is then a graph problem
30 // to minimize the total weight of all the graphs, then transform instructions
31 // and add or remove copy instructions as called for to implement the
32 // solution.
33 //===----------------------------------------------------------------------===//
34
35 #define DEBUG_TYPE "arm64-simd-scalar"
36 #include "ARM64.h"
37 #include "ARM64InstrInfo.h"
38 #include "ARM64RegisterInfo.h"
39 #include "llvm/ADT/Statistic.h"
40 #include "llvm/CodeGen/MachineFunctionPass.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineRegisterInfo.h"
45 #include "llvm/Support/CommandLine.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/raw_ostream.h"
48 using namespace llvm;
49
50 static cl::opt
51 AdvSIMDScalar("arm64-simd-scalar",
52 cl::desc("enable use of AdvSIMD scalar integer instructions"),
53 cl::init(false), cl::Hidden);
54 // Allow forcing all i64 operations with equivalent SIMD instructions to use
55 // them. For stress-testing the transformation function.
56 static cl::opt
57 TransformAll("arm64-simd-scalar-force-all",
58 cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
59 cl::init(false), cl::Hidden);
60
61 STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
62 STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
63 STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
64
65 namespace {
66 class ARM64AdvSIMDScalar : public MachineFunctionPass {
67 MachineRegisterInfo *MRI;
68 const ARM64InstrInfo *TII;
69
70 private:
71 // isProfitableToTransform - Predicate function to determine whether an
72 // instruction should be transformed to its equivalent AdvSIMD scalar
73 // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
74 bool isProfitableToTransform(const MachineInstr *MI) const;
75
76 // tranformInstruction - Perform the transformation of an instruction
77 // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
78 // to be the correct register class, minimizing cross-class copies.
79 void transformInstruction(MachineInstr *MI);
80
81 // processMachineBasicBlock - Main optimzation loop.
82 bool processMachineBasicBlock(MachineBasicBlock *MBB);
83
84 public:
85 static char ID; // Pass identification, replacement for typeid.
86 explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
87
88 virtual bool runOnMachineFunction(MachineFunction &F);
89
90 const char *getPassName() const {
91 return "AdvSIMD scalar operation optimization";
92 }
93
94 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
95 AU.setPreservesCFG();
96 MachineFunctionPass::getAnalysisUsage(AU);
97 }
98 };
99 char ARM64AdvSIMDScalar::ID = 0;
100 } // end anonymous namespace
101
102 static bool isGPR64(unsigned Reg, unsigned SubReg,
103 const MachineRegisterInfo *MRI) {
104 if (SubReg)
105 return false;
106 if (TargetRegisterInfo::isVirtualRegister(Reg))
107 return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
108 return ARM64::GPR64RegClass.contains(Reg);
109 }
110
111 static bool isFPR64(unsigned Reg, unsigned SubReg,
112 const MachineRegisterInfo *MRI) {
113 if (TargetRegisterInfo::isVirtualRegister(Reg))
114 return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
115 SubReg == 0) ||
116 (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
117 SubReg == ARM64::dsub);
118 // Physical register references just check the regist class directly.
119 return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
120 (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
121 }
122
123 // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
124 // copy instruction. Return zero_reg if the instruction is not a copy.
125 static unsigned getSrcFromCopy(const MachineInstr *MI,
126 const MachineRegisterInfo *MRI,
127 unsigned &SubReg) {
128 SubReg = 0;
129 // The "FMOV Xd, Dn" instruction is the typical form.
130 if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
131 return MI->getOperand(1).getReg();
132 // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
133 // these at this stage, but it's easy to check for.
134 if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
135 SubReg = ARM64::dsub;
136 return MI->getOperand(1).getReg();
137 }
138 // Or just a plain COPY instruction. This can be directly to/from FPR64,
139 // or it can be a dsub subreg reference to an FPR128.
140 if (MI->getOpcode() == ARM64::COPY) {
141 if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
142 MRI) &&
143 isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
144 return MI->getOperand(1).getReg();
145 if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
146 MRI) &&
147 isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
148 MRI)) {
149 SubReg = ARM64::dsub;
150 return MI->getOperand(1).getReg();
151 }
152 }
153
154 // Otherwise, this is some other kind of instruction.
155 return 0;
156 }
157
158 // getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
159 // that we're considering transforming to, return that AdvSIMD opcode. For all
160 // others, return the original opcode.
161 static int getTransformOpcode(unsigned Opc) {
162 switch (Opc) {
163 default:
164 break;
165 // FIXME: Lots more possibilities.
166 case ARM64::ADDXrr:
167 return ARM64::ADDv1i64;
168 case ARM64::SUBXrr:
169 return ARM64::SUBv1i64;
170 }
171 // No AdvSIMD equivalent, so just return the original opcode.
172 return Opc;
173 }
174
175 static bool isTransformable(const MachineInstr *MI) {
176 int Opc = MI->getOpcode();
177 return Opc != getTransformOpcode(Opc);
178 }
179
180 // isProfitableToTransform - Predicate function to determine whether an
181 // instruction should be transformed to its equivalent AdvSIMD scalar
182 // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
183 bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
184 // If this instruction isn't eligible to be transformed (no SIMD equivalent),
185 // early exit since that's the common case.
186 if (!isTransformable(MI))
187 return false;
188
189 // Count the number of copies we'll need to add and approximate the number
190 // of copies that a transform will enable us to remove.
191 unsigned NumNewCopies = 3;
192 unsigned NumRemovableCopies = 0;
193
194 unsigned OrigSrc0 = MI->getOperand(1).getReg();
195 unsigned OrigSrc1 = MI->getOperand(2).getReg();
196 unsigned Src0 = 0, SubReg0;
197 unsigned Src1 = 0, SubReg1;
198 if (!MRI->def_empty(OrigSrc0)) {
199 MachineRegisterInfo::def_instr_iterator Def =
200 MRI->def_instr_begin(OrigSrc0);
201 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
202 Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
203 // If the source was from a copy, we don't need to insert a new copy.
204 if (Src0)
205 --NumNewCopies;
206 // If there are no other users of the original source, we can delete
207 // that instruction.
208 if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0))
209 ++NumRemovableCopies;
210 }
211 if (!MRI->def_empty(OrigSrc1)) {
212 MachineRegisterInfo::def_instr_iterator Def =
213 MRI->def_instr_begin(OrigSrc1);
214 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
215 Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
216 if (Src1)
217 --NumNewCopies;
218 // If there are no other users of the original source, we can delete
219 // that instruction.
220 if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1))
221 ++NumRemovableCopies;
222 }
223
224 // If any of the uses of the original instructions is a cross class copy,
225 // that's a copy that will be removable if we transform. Likewise, if
226 // any of the uses is a transformable instruction, it's likely the tranforms
227 // will chain, enabling us to save a copy there, too. This is an aggressive
228 // heuristic that approximates the graph based cost analysis described above.
229 unsigned Dst = MI->getOperand(0).getReg();
230 bool AllUsesAreCopies = true;
231 for (MachineRegisterInfo::use_instr_nodbg_iterator
232 Use = MRI->use_instr_nodbg_begin(Dst),
233 E = MRI->use_instr_nodbg_end();
234 Use != E; ++Use) {
235 unsigned SubReg;
236 if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use))
237 ++NumRemovableCopies;
238 // If the use is an INSERT_SUBREG, that's still something that can
239 // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
240 // preferable to have it use the FPR64 in most cases, as if the source
241 // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
242 // Ditto for a lane insert.
243 else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
244 Use->getOpcode() == ARM64::INSvi64gpr)
245 ;
246 else
247 AllUsesAreCopies = false;
248 }
249 // If all of the uses of the original destination register are copies to
250 // FPR64, then we won't end up having a new copy back to GPR64 either.
251 if (AllUsesAreCopies)
252 --NumNewCopies;
253
254 // If a tranform will not increase the number of cross-class copies required,
255 // return true.
256 if (NumNewCopies <= NumRemovableCopies)
257 return true;
258
259 // Finally, even if we otherwise wouldn't transform, check if we're forcing
260 // transformation of everything.
261 return TransformAll;
262 }
263
264 static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
265 unsigned Dst, unsigned Src, bool IsKill) {
266 MachineInstrBuilder MIB =
267 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
268 Dst)
269 .addReg(Src, getKillRegState(IsKill));
270 DEBUG(dbgs() << " adding copy: " << *MIB);
271 ++NumCopiesInserted;
272 return MIB;
273 }
274
275 // tranformInstruction - Perform the transformation of an instruction
276 // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
277 // to be the correct register class, minimizing cross-class copies.
278 void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
279 DEBUG(dbgs() << "Scalar transform: " << *MI);
280
281 MachineBasicBlock *MBB = MI->getParent();
282 int OldOpc = MI->getOpcode();
283 int NewOpc = getTransformOpcode(OldOpc);
284 assert(OldOpc != NewOpc && "transform an instruction to itself?!");
285
286 // Check if we need a copy for the source registers.
287 unsigned OrigSrc0 = MI->getOperand(1).getReg();
288 unsigned OrigSrc1 = MI->getOperand(2).getReg();
289 unsigned Src0 = 0, SubReg0;
290 unsigned Src1 = 0, SubReg1;
291 if (!MRI->def_empty(OrigSrc0)) {
292 MachineRegisterInfo::def_instr_iterator Def =
293 MRI->def_instr_begin(OrigSrc0);
294 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
295 Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
296 // If there are no other users of the original source, we can delete
297 // that instruction.
298 if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) {
299 assert(Src0 && "Can't delete copy w/o a valid original source!");
300 Def->eraseFromParent();
301 ++NumCopiesDeleted;
302 }
303 }
304 if (!MRI->def_empty(OrigSrc1)) {
305 MachineRegisterInfo::def_instr_iterator Def =
306 MRI->def_instr_begin(OrigSrc1);
307 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
308 Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
309 // If there are no other users of the original source, we can delete
310 // that instruction.
311 if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) {
312 assert(Src1 && "Can't delete copy w/o a valid original source!");
313 Def->eraseFromParent();
314 ++NumCopiesDeleted;
315 }
316 }
317 // If we weren't able to reference the original source directly, create a
318 // copy.
319 if (!Src0) {
320 SubReg0 = 0;
321 Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
322 insertCopy(TII, MI, Src0, OrigSrc0, true);
323 }
324 if (!Src1) {
325 SubReg1 = 0;
326 Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
327 insertCopy(TII, MI, Src1, OrigSrc1, true);
328 }
329
330 // Create a vreg for the destination.
331 // FIXME: No need to do this if the ultimate user expects an FPR64.
332 // Check for that and avoid the copy if possible.
333 unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
334
335 // For now, all of the new instructions have the same simple three-register
336 // form, so no need to special case based on what instruction we're
337 // building.
338 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst)
339 .addReg(Src0, getKillRegState(true), SubReg0)
340 .addReg(Src1, getKillRegState(true), SubReg1);
341
342 // Now copy the result back out to a GPR.
343 // FIXME: Try to avoid this if all uses could actually just use the FPR64
344 // directly.
345 insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true);
346
347 // Erase the old instruction.
348 MI->eraseFromParent();
349
350 ++NumScalarInsnsUsed;
351 }
352
353 // processMachineBasicBlock - Main optimzation loop.
354 bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
355 bool Changed = false;
356 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
357 MachineInstr *MI = I;
358 ++I;
359 if (isProfitableToTransform(MI)) {
360 transformInstruction(MI);
361 Changed = true;
362 }
363 }
364 return Changed;
365 }
366
367 // runOnMachineFunction - Pass entry point from PassManager.
368 bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
369 // Early exit if pass disabled.
370 if (!AdvSIMDScalar)
371 return false;
372
373 bool Changed = false;
374 DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
375
376 const TargetMachine &TM = mf.getTarget();
377 MRI = &mf.getRegInfo();
378 TII = static_cast(TM.getInstrInfo());
379
380 // Just check things on a one-block-at-a-time basis.
381 for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
382 if (processMachineBasicBlock(I))
383 Changed = true;
384 return Changed;
385 }
386
387 // createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
388 // to add the pass to the PassManager.
389 FunctionPass *llvm::createARM64AdvSIMDScalar() {
390 return new ARM64AdvSIMDScalar();
391 }
0 //===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to the ARM64 assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "asm-printer"
15 #include "ARM64.h"
16 #include "ARM64MachineFunctionInfo.h"
17 #include "ARM64MCInstLower.h"
18 #include "ARM64RegisterInfo.h"
19 #include "InstPrinter/ARM64InstPrinter.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/CodeGen/AsmPrinter.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/StackMaps.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DebugInfo.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCInst.h"
31 #include "llvm/MC/MCInstBuilder.h"
32 #include "llvm/MC/MCLinkerOptimizationHint.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/TargetRegistry.h"
36 using namespace llvm;
37
38 namespace {
39
40 class ARM64AsmPrinter : public AsmPrinter {
41 ARM64MCInstLower MCInstLowering;
42 StackMaps SM;
43
44 public:
45 ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
46 : AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this),
47 SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {}
48
49 virtual const char *getPassName() const { return "ARM64 Assembly Printer"; }
50
51 /// \brief Wrapper for MCInstLowering.lowerOperand() for the
52 /// tblgen'erated pseudo lowering.
53 bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
54 return MCInstLowering.lowerOperand(MO, MCOp);
55 }
56
57 void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
58 const MachineInstr &MI);
59 void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
60 const MachineInstr &MI);
61 /// \brief tblgen'erated driver function for lowering simple MI->MC
62 /// pseudo instructions.
63 bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
64 const MachineInstr *MI);
65
66 void EmitInstruction(const MachineInstr *MI);
67
68 void getAnalysisUsage(AnalysisUsage &AU) const {
69 AsmPrinter::getAnalysisUsage(AU);
70 AU.setPreservesAll();
71 }
72
73 bool runOnMachineFunction(MachineFunction &F) {
74 ARM64FI = F.getInfo();
75 return AsmPrinter::runOnMachineFunction(F);
76 }
77
78 private:
79 MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
80 void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
81 bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
82 bool printAsmRegInClass(const MachineOperand &MO,
83 const TargetRegisterClass *RC, bool isVector,
84 raw_ostream &O);
85
86 bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
87 unsigned AsmVariant, const char *ExtraCode,
88 raw_ostream &O);
89 bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
90 unsigned AsmVariant, const char *ExtraCode,
91 raw_ostream &O);
92
93 void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
94
95 void EmitFunctionBodyEnd();
96
97 MCSymbol *GetCPISymbol(unsigned CPID) const;
98 void EmitEndOfAsmFile(Module &M);
99 ARM64FunctionInfo *ARM64FI;
100
101 /// \brief Emit the LOHs contained in ARM64FI.
102 void EmitLOHs();
103
104 typedef std::map MInstToMCSymbol;
105 MInstToMCSymbol LOHInstToLabel;
106 unsigned LOHLabelCounter;
107 };
108
109 } // end of anonymous namespace
110
111 //===----------------------------------------------------------------------===//
112
113 void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
114 // Funny Darwin hack: This flag tells the linker that no global symbols
115 // contain code that falls through to other global symbols (e.g. the obvious
116 // implementation of multiple entry points). If this doesn't occur, the
117 // linker can safely perform dead code stripping. Since LLVM never
118 // generates code that does this, it is always safe to set.
119 OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
120 SM.serializeToStackMapSection();
121 }
122
123 MachineLocation
124 ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
125 MachineLocation Location;
126 assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
127 // Frame address. Currently handles register +- offset only.
128 if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
129 Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
130 else {
131 DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
132 }
133 return Location;
134 }
135
136 void ARM64AsmPrinter::EmitLOHs() {
137 const ARM64FunctionInfo::MILOHDirectives &LOHs =
138 const_cast(ARM64FI)
139 ->getLOHContainer()
140 .getDirectives();
141 SmallVector MCArgs;
142
143 for (ARM64FunctionInfo::MILOHDirectives::const_iterator It = LOHs.begin(),
144 EndIt = LOHs.end();
145 It != EndIt; ++It) {
146 const ARM64FunctionInfo::MILOHArgs &MIArgs = It->getArgs();
147 for (ARM64FunctionInfo::MILOHArgs::const_iterator
148 MIArgsIt = MIArgs.begin(),
149 EndMIArgsIt = MIArgs.end();
150 MIArgsIt != EndMIArgsIt; ++MIArgsIt) {
151 MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(*MIArgsIt);
152 assert(LabelIt != LOHInstToLabel.end() &&
153 "Label hasn't been inserted for LOH related instruction");
154 MCArgs.push_back(LabelIt->second);
155 }
156 OutStreamer.EmitLOHDirective(It->getKind(), MCArgs);
157 MCArgs.clear();
158 }
159 }
160
161 void ARM64AsmPrinter::EmitFunctionBodyEnd() {
162 if (!ARM64FI->getLOHRelated().empty())
163 EmitLOHs();
164 }
165
166 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
167 MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
168 // Darwin uses a linker-private symbol name for constant-pools (to
169 // avoid addends on the relocation?), ELF has no such concept and
170 // uses a normal private symbol.
171 if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
172 return OutContext.GetOrCreateSymbol(
173 Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
174 Twine(getFunctionNumber()) + "_" + Twine(CPID));
175
176 return OutContext.GetOrCreateSymbol(
177 Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
178 Twine(getFunctionNumber()) + "_" + Twine(CPID));
179 }
180
181 void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
182 raw_ostream &O) {
183 const MachineOperand &MO = MI->getOperand(OpNum);
184 switch (MO.getType()) {
185 default:
186 assert(0 && "");
187 case MachineOperand::MO_Register: {
188 unsigned Reg = MO.getReg();
189 assert(TargetRegisterInfo::isPhysicalRegister(Reg));
190 assert(!MO.getSubReg() && "Subregs should be eliminated!");
191 O << ARM64InstPrinter::getRegisterName(Reg);
192 break;
193 }
194 case MachineOperand::MO_Immediate: {
195 int64_t Imm = MO.getImm();
196 O << '#' << Imm;
197 break;
198 }
199 }
200 }
201
202 bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
203 raw_ostream &O) {
204 unsigned Reg = MO.getReg();
205 switch (Mode) {
206 default:
207 return true; // Unknown mode.
208 case 'w':
209 Reg = getWRegFromXReg(Reg);
210 break;
211 case 'x':
212 Reg = getXRegFromWReg(Reg);
213 break;
214 }
215
216 O << ARM64InstPrinter::getRegisterName(Reg);
217 return false;
218 }
219
220 // Prints the register in MO using class RC using the offset in the
221 // new register class. This should not be used for cross class
222 // printing.
223 bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
224 const TargetRegisterClass *RC,
225 bool isVector, raw_ostream &O) {
226 assert(MO.isReg() && "Should only get here with a register!");
227 const ARM64RegisterInfo *RI =
228 static_cast(TM.getRegisterInfo());
229 unsigned Reg = MO.getReg();
230 unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
231 assert(RI->regsOverlap(RegToPrint, Reg));
232 O << ARM64InstPrinter::getRegisterName(
233 RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
234 return false;
235 }
236
237 bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
238 unsigned AsmVariant,
239 const char *ExtraCode, raw_ostream &O) {
240 const MachineOperand &MO = MI->getOperand(OpNum);
241 // Does this asm operand have a single letter operand modifier?
242 if (ExtraCode && ExtraCode[0]) {
243 if (ExtraCode[1] != 0)
244 return true; // Unknown modifier.
245
246 switch (ExtraCode[0]) {
247 default:
248 return true; // Unknown modifier.
249 case 'w': // Print W register
250 case 'x': // Print X register
251 if (MO.isReg())
252 return printAsmMRegister(MO, ExtraCode[0], O);
253 if (MO.isImm() && MO.getImm() == 0) {
254 unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
255 O << ARM64InstPrinter::getRegisterName(Reg);
256 return false;
257 }
258 printOperand(MI, OpNum, O);
259 return false;
260 case 'b': // Print B register.
261 case 'h': // Print H register.
262 case 's': // Print S register.
263 case 'd': // Print D register.
264 case 'q': // Print Q register.
265 if (MO.isReg()) {
266 const TargetRegisterClass *RC;
267 switch (ExtraCode[0]) {
268 case 'b':
269 RC = &ARM64::FPR8RegClass;
270 break;
271 case 'h':
272 RC = &ARM64::FPR16RegClass;
273 break;
274 case 's':
275 RC = &ARM64::FPR32RegClass;
276 break;
277 case 'd':
278 RC = &ARM64::FPR64RegClass;
279 break;
280 case 'q':
281 RC = &ARM64::FPR128RegClass;
282 break;
283 default:
284 return true;
285 }
286 return printAsmRegInClass(MO, RC, false /* vector */, O);
287 }
288 printOperand(MI, OpNum, O);
289 return false;
290 }
291 }
292
293 // According to ARM, we should emit x and v registers unless we have a
294 // modifier.
295 if (MO.isReg()) {
296 unsigned Reg = MO.getReg();
297
298 // If this is a w or x register, print an x register.
299 if (ARM64::GPR32allRegClass.contains(Reg) ||
300 ARM64::GPR64allRegClass.contains(Reg))
301 return printAsmMRegister(MO, 'x', O);
302
303 // If this is a b, h, s, d, or q register, print it as a v register.
304 return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
305 }
306
307 printOperand(MI, OpNum, O);
308 return false;
309 }
310
311 bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
312 unsigned OpNum, unsigned AsmVariant,
313 const char *ExtraCode,
314 raw_ostream &O) {
315 if (ExtraCode && ExtraCode[0])
316 return true; // Unknown modifier.
317
318 const MachineOperand &MO = MI->getOperand(OpNum);
319 assert(MO.isReg() && "unexpected inline asm memory operand");
320 O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
321 return false;
322 }
323
324 void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
325 raw_ostream &OS) {
326 unsigned NOps = MI->getNumOperands();
327 assert(NOps == 4);
328 OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
329 // cast away const; DIetc do not take const operands for some reason.
330 DIVariable V(const_cast(MI->getOperand(NOps - 1).getMetadata()));
331 OS << V.getName();
332 OS << " <- ";
333 // Frame address. Currently handles register +- offset only.
334 assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
335 OS << '[';
336 printOperand(MI, 0, OS);
337 OS << '+';
338 printOperand(MI, 1, OS);
339 OS << ']';
340 OS << "+";
341 printOperand(MI, NOps - 2, OS);
342 }
343
344 void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
345 const MachineInstr &MI) {
346 unsigned NumNOPBytes = MI.getOperand(1).getImm();
347
348 SM.recordStackMap(MI);
349 // Emit padding.
350 assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
351 for (unsigned i = 0; i < NumNOPBytes; i += 4)
352 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
353 }
354
355 // Lower a patchpoint of the form:
356 // [], , , ,
357 void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
358 const MachineInstr &MI) {
359 SM.recordPatchPoint(MI);
360
361 PatchPointOpers Opers(&MI);
362
363 int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
364 unsigned EncodedBytes = 0;
365 if (CallTarget) {
366 assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
367 "High 16 bits of call target should be zero.");
368 unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
369 EncodedBytes = 16;
370 // Materialize the jump address:
371 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
372 .addReg(ScratchReg)
373 .addImm((CallTarget >> 32) & 0xFFFF)
374 .addImm(32));
375 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
376 .addReg(ScratchReg)
377 .addReg(ScratchReg)
378 .addImm((CallTarget >> 16) & 0xFFFF)
379 .addImm(16));
380 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
381 .addReg(ScratchReg)
382 .addReg(ScratchReg)
383 .addImm(CallTarget & 0xFFFF)
384 .addImm(0));
385 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
386 }
387 // Emit padding.
388 unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
389 assert(NumBytes >= EncodedBytes &&
390 "Patchpoint can't request size less than the length of a call.");
391 assert((NumBytes - EncodedBytes) % 4 == 0 &&
392 "Invalid number of NOP bytes requested!");
393 for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
394 EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
395 }
396
397 // Simple pseudo-instructions have their lowering (with expansion to real
398 // instructions) auto-generated.
399 #include "ARM64GenMCPseudoLowering.inc"
400
401 static unsigned getRealIndexedOpcode(unsigned Opc) {
402 switch (Opc) {
403 case ARM64::LDRXpre_isel: return ARM64::LDRXpre;
404 case ARM64::LDRWpre_isel: return ARM64::LDRWpre;
405 case ARM64::LDRDpre_isel: return ARM64::LDRDpre;
406 case ARM64::LDRSpre_isel: return ARM64::LDRSpre;
407 case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre;
408 case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre;
409 case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre;
410 case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre;
411 case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre;
412 case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre;
413 case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre;
414
415 case ARM64::LDRDpost_isel: return ARM64::LDRDpost;
416 case ARM64::LDRSpost_isel: return ARM64::LDRSpost;
417 case ARM64::LDRXpost_isel: return ARM64::LDRXpost;
418 case ARM64::LDRWpost_isel: return ARM64::LDRWpost;
419 case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost;
420 case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost;
421 case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost;
422 case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost;
423 case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost;
424 case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost;
425 case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost;
426
427 case ARM64::STRXpre_isel: return ARM64::STRXpre;
428 case ARM64::STRWpre_isel: return ARM64::STRWpre;
429 case ARM64::STRHHpre_isel: return ARM64::STRHHpre;
430 case ARM64::STRBBpre_isel: return ARM64::STRBBpre;
431 case ARM64::STRDpre_isel: return ARM64::STRDpre;
432 case ARM64::STRSpre_isel: return ARM64::STRSpre;
433 }
434 llvm_unreachable("Unexpected pre-indexed opcode!");
435 }
436
437 void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
438 // Do any auto-generated pseudo lowerings.
439 if (emitPseudoExpansionLowering(OutStreamer, MI))
440 return;
441
442 if (ARM64FI->getLOHRelated().count(MI)) {
443 // Generate a label for LOH related instruction
444 MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
445 // Associate the instruction with the label
446 LOHInstToLabel[MI] = LOHLabel;
447 OutStreamer.EmitLabel(LOHLabel);
448 }
449
450 // Do any manual lowerings.
451 switch (MI->getOpcode()) {
452 default:
453 break;
454 case ARM64::DBG_VALUE: {
455 if (isVerbose() && OutStreamer.hasRawTextSupport()) {
456 SmallString<128> TmpStr;
457 raw_svector_ostream OS(TmpStr);
458 PrintDebugValueComment(MI, OS);
459 OutStreamer.EmitRawText(StringRef(OS.str()));
460 }
461 return;
462 }
463 // Indexed loads and stores use a pseudo to handle complex operand
464 // tricks and writeback to the base register. We strip off the writeback
465 // operand and switch the opcode here. Post-indexed stores were handled by the
466 // tablegen'erated pseudos above. (The complex operand <--> simple
467 // operand isel is beyond tablegen's ability, so we do these manually).
468 case ARM64::LDRHHpre_isel:
469 case ARM64::LDRBBpre_isel:
470 case ARM64::LDRXpre_isel:
471 case ARM64::LDRWpre_isel:
472 case ARM64::LDRDpre_isel:
473 case ARM64::LDRSpre_isel:
474 case ARM64::LDRSBWpre_isel:
475 case ARM64::LDRSBXpre_isel:
476 case ARM64::LDRSHWpre_isel:
477 case ARM64::LDRSHXpre_isel:
478 case ARM64::LDRSWpre_isel:
479 case ARM64::LDRDpost_isel:
480 case ARM64::LDRSpost_isel:
481 case ARM64::LDRXpost_isel:
482 case ARM64::LDRWpost_isel:
483 case ARM64::LDRHHpost_isel:
484 case ARM64::LDRBBpost_isel:
485 case ARM64::LDRSWpost_isel:
486 case ARM64::LDRSHWpost_isel:
487 case ARM64::LDRSHXpost_isel:
488 case ARM64::LDRSBWpost_isel:
489 case ARM64::LDRSBXpost_isel: {
490 MCInst TmpInst;
491 // For loads, the writeback operand to be skipped is the second.
492 TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
493 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
494 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
495 TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
496 EmitToStreamer(OutStreamer, TmpInst);
497 return;
498 }
499 case ARM64::STRXpre_isel:
500 case ARM64::STRWpre_isel:
501 case ARM64::STRHHpre_isel:
502 case ARM64::STRBBpre_isel:
503 case ARM64::STRDpre_isel:
504 case ARM64::STRSpre_isel: {
505 MCInst TmpInst;
506 // For loads, the writeback operand to be skipped is the first.
507 TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
508 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
509 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
510 TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
511 EmitToStreamer(OutStreamer, TmpInst);
512 return;
513 }
514
515 // Tail calls use pseudo instructions so they have the proper code-gen
516 // attributes (isCall, isReturn, etc.). We lower them to the real
517 // instruction here.
518 case ARM64::TCRETURNri: {
519 MCInst TmpInst;
520 TmpInst.setOpcode(ARM64::BR);
521 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
522 EmitToStreamer(OutStreamer, TmpInst);
523 return;
524 }
525 case ARM64::TCRETURNdi: {
526 MCOperand Dest;
527 MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
528 MCInst TmpInst;
529 TmpInst.setOpcode(ARM64::B);
530 TmpInst.addOperand(Dest);
531 EmitToStreamer(OutStreamer, TmpInst);
532 return;
533 }
534 case ARM64::TLSDESC_BLR: {
535 MCOperand Callee, Sym;
536 MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
537 MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
538
539 // First emit a relocation-annotation. This expands to no code, but requests
540 // the following instruction gets an R_AARCH64_TLSDESC_CALL.
541 MCInst TLSDescCall;
542 TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
543 TLSDescCall.addOperand(Sym);
544 EmitToStreamer(OutStreamer, TLSDescCall);
545
546 // Other than that it's just a normal indirect call to the function loaded
547 // from the descriptor.
548 MCInst BLR;
549 BLR.setOpcode(ARM64::BLR);
550 BLR.addOperand(Callee);
551 EmitToStreamer(OutStreamer, BLR);
552
553 return;
554 }
555
556 case TargetOpcode::STACKMAP:
557 return LowerSTACKMAP(OutStreamer, SM, *MI);
558
559 case TargetOpcode::PATCHPOINT:
560 return LowerPATCHPOINT(OutStreamer, SM, *MI);
561 }
562
563 // Finally, do the automated lowerings for everything else.
564 MCInst TmpInst;
565 MCInstLowering.Lower(MI, TmpInst);
566 EmitToStreamer(OutStreamer, TmpInst);
567 }
568
569 // Force static initialization.
570 extern "C" void LLVMInitializeARM64AsmPrinter() {
571 RegisterAsmPrinter X(TheARM64Target);
572 }
0 //===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10
11 #define DEBUG_TYPE "arm64-branch-relax"
12 #include "ARM64.h"
13 #include "ARM64InstrInfo.h"
14 #include "ARM64MachineFunctionInfo.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/CodeGen/MachineFunctionPass.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/Format.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Support/CommandLine.h"
24 using namespace llvm;
25
26 static cl::opt
27 BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
28 cl::desc("Relax out of range conditional branches"));
29
30 static cl::opt
31 TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
32 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
33
34 static cl::opt
35 CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
36 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
37
38 static cl::opt
39 BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
40 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
41
42 STATISTIC(NumSplit, "Number of basic blocks split");
43 STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
44
45 namespace {
46 class ARM64BranchRelaxation : public MachineFunctionPass {
47 /// BasicBlockInfo - Information about the offset and size of a single
48 /// basic block.
49 struct BasicBlockInfo {
50 /// Offset - Distance from the beginning of the function to the beginning
51 /// of this basic block.
52 ///
53 /// The offset is always aligned as required by the basic block.
54 unsigned Offset;
55
56 /// Size - Size of the basic block in bytes. If the block contains
57 /// inline assembly, this is a worst case estimate.
58 ///
59 /// The size does not include any alignment padding whether from the
60 /// beginning of the block, or from an aligned jump table at the end.
61 unsigned Size;
62
63 BasicBlockInfo() : Offset(0), Size(0) {}
64
65 /// Compute the offset immediately following this block. If LogAlign is
66 /// specified, return the offset the successor block will get if it has
67 /// this alignment.
68 unsigned postOffset(unsigned LogAlign = 0) const {
69 unsigned PO = Offset + Size;
70 unsigned Align = 1 << LogAlign;
71 return (PO + Align - 1) / Align * Align;
72 }
73 };
74
75 SmallVector BlockInfo;
76
77 MachineFunction *MF;
78 const ARM64InstrInfo *TII;
79
80 bool relaxBranchInstructions();
81 void scanFunction();
82 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
83 void adjustBlockOffsets(MachineBasicBlock *BB);
84 bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
85 bool fixupConditionalBranch(MachineInstr *MI);
86 void computeBlockSize(MachineBasicBlock *MBB);
87 unsigned getInstrOffset(MachineInstr *MI) const;
88 void dumpBBs();
89 void verify();
90
91 public:
92 static char ID;
93 ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
94
95 virtual bool runOnMachineFunction(MachineFunction &MF);
96
97 virtual const char *getPassName() const {
98 return "ARM64 branch relaxation pass";
99 }
100 };
101 char ARM64BranchRelaxation::ID = 0;
102 }
103
104 /// verify - check BBOffsets, BBSizes, alignment of islands
105 void ARM64BranchRelaxation::verify() {
106 #ifndef NDEBUG
107 unsigned PrevNum = MF->begin()->getNumber();
108 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
109 ++MBBI) {
110 MachineBasicBlock *MBB = MBBI;
111 unsigned Align = MBB->getAlignment();
112 unsigned Num = MBB->getNumber();
113 assert(BlockInfo[Num].Offset % (1u << Align) == 0);
114 assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
115 PrevNum = Num;
116 }
117 #endif
118 }
119
120 /// print block size and offset information - debugging
121 void ARM64BranchRelaxation::dumpBBs() {
122 for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
123 ++MBBI) {
124 const BasicBlockInfo &BBI = BlockInfo[MBBI->getNumber()];
125 dbgs() << format("BB#%u\toffset=%08x\t", MBBI->getNumber(), BBI.Offset)
126 << format("size=%#x\n", BBI.Size);
127 }
128 }
129
130 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
131 /// into the block immediately after it.
132 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
133 // Get the next machine basic block in the function.
134 MachineFunction::iterator MBBI = MBB;
135 // Can't fall off end of function.
136 if (std::next(MBBI) == MBB->getParent()->end())
137 return false;
138
139 MachineBasicBlock *NextBB = std::next(MBBI);
140 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
141 E = MBB->succ_end();
142 I != E; ++I)
143 if (*I == NextBB)
144 return true;
145
146 return false;
147 }
148
149 /// scanFunction - Do the initial scan of the function, building up
150 /// information about each block.
151 void ARM64BranchRelaxation::scanFunction() {
152 BlockInfo.clear();
153 BlockInfo.resize(MF->getNumBlockIDs());
154
155 // First thing, compute the size of all basic blocks, and see if the function
156 // has any inline assembly in it. If so, we have to be conservative about
157 // alignment assumptions, as we don't know for sure the size of any
158 // instructions in the inline assembly.
159 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
160 computeBlockSize(I);
161
162 // Compute block offsets and known bits.
163 adjustBlockOffsets(MF->begin());
164 }
165
166 /// computeBlockSize - Compute the size for MBB.
167 /// This function updates BlockInfo directly.
168 void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) {
169 unsigned Size = 0;
170 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
171 ++I)
172 Size += TII->GetInstSizeInBytes(I);
173 BlockInfo[MBB->getNumber()].Size = Size;
174 }
175
176 /// getInstrOffset - Return the current offset of the specified machine
177 /// instruction from the start of the function. This offset changes as stuff is
178 /// moved around inside the function.
179 unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
180 MachineBasicBlock *MBB = MI->getParent();
181
182 // The offset is composed of two things: the sum of the sizes of all MBB's
183 // before this instruction's block, and the offset from the start of the block
184 // it is in.
185 unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
186
187 // Sum instructions before MI in MBB.
188 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
189 assert(I != MBB->end() && "Didn't find MI in its own basic block?");
190 Offset += TII->GetInstSizeInBytes(I);
191 }
192 return Offset;
193 }
194
195 void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
196 unsigned PrevNum = Start->getNumber();
197 MachineFunction::iterator MBBI = Start, E = MF->end();
198 for (++MBBI; MBBI != E; ++MBBI) {
199 MachineBasicBlock *MBB = MBBI;
200 unsigned Num = MBB->getNumber();
201 if (!Num) // block zero is never changed from offset zero.
202 continue;
203 // Get the offset and known bits at the end of the layout predecessor.
204 // Include the alignment of the current block.
205 unsigned LogAlign = MBBI->getAlignment();
206 BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
207 PrevNum = Num;
208 }
209 }
210
211 /// Split the basic block containing MI into two blocks, which are joined by
212 /// an unconditional branch. Update data structures and renumber blocks to
213 /// account for this change and returns the newly created block.
214 /// NOTE: Successor list of the original BB is out of date after this function,
215 /// and must be updated by the caller! Other transforms follow using this
216 /// utility function, so no point updating now rather than waiting.
217 MachineBasicBlock *
218 ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
219 MachineBasicBlock *OrigBB = MI->getParent();
220
221 // Create a new MBB for the code after the OrigBB.
222 MachineBasicBlock *NewBB =
223 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
224 MachineFunction::iterator MBBI = OrigBB;
225 ++MBBI;
226 MF->insert(MBBI, NewBB);
227
228 // Splice the instructions starting with MI over to NewBB.
229 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
230
231 // Add an unconditional branch from OrigBB to NewBB.
232 // Note the new unconditional branch is not being recorded.
233 // There doesn't seem to be meaningful DebugInfo available; this doesn't
234 // correspond to anything in the source.
235 BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
236
237 // Insert an entry into BlockInfo to align it properly with the block numbers.
238 BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
239
240 // Figure out how large the OrigBB is. As the first half of the original
241 // block, it cannot contain a tablejump. The size includes
242 // the new jump we added. (It should be possible to do this without
243 // recounting everything, but it's very confusing, and this is rarely
244 // executed.)
245 computeBlockSize(OrigBB);
246
247 // Figure out how large the NewMBB is. As the second half of the original
248 // block, it may contain a tablejump.
249 computeBlockSize(NewBB);
250
251 // All BBOffsets following these blocks must be modified.
252 adjustBlockOffsets(OrigBB);
253
254 ++NumSplit;
255
256 return NewBB;
257 }
258
259 /// isBlockInRange - Returns true if the distance between specific MI and
260 /// specific BB can fit in MI's displacement field.
261 bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
262 MachineBasicBlock *DestBB,
263 unsigned Bits) {
264 unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
265 unsigned BrOffset = getInstrOffset(MI);
266 unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
267
268 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
269 << " from BB#" << MI->getParent()->getNumber()
270 << " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
271 << " to " << DestOffset << " offset "
272 << int(DestOffset - BrOffset) << "\t" << *MI);
273
274 // Branch before the Dest.
275 if (BrOffset <= DestOffset)
276 return (DestOffset - BrOffset <= MaxOffs);
277 return (BrOffset - DestOffset <= MaxOffs);
278 }
279
280 static bool isConditionalBranch(unsigned Opc) {
281 switch (Opc) {
282 default:
283 return false;
284 case ARM64::TBZ:
285 case ARM64::TBNZ:
286 case ARM64::CBZW:
287 case ARM64::CBNZW:
288 case ARM64::CBZX:
289 case ARM64::CBNZX:
290 case ARM64::Bcc:
291 return true;
292 }
293 }
294
295 static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
296 switch (MI->getOpcode()) {
297 default:
298 assert(0 && "unexpected opcode!");
299 case ARM64::TBZ:
300 case ARM64::TBNZ:
301 return MI->getOperand(2).getMBB();
302 case ARM64::CBZW:
303 case ARM64::CBNZW:
304 case ARM64::CBZX:
305 case ARM64::CBNZX:
306 case ARM64::Bcc:
307 return MI->getOperand(1).getMBB();
308 }
309 }
310
311 static unsigned getOppositeConditionOpcode(unsigned Opc) {
312 switch (Opc) {
313 default:
314 assert(0 && "unexpected opcode!");
315 case ARM64::TBNZ: return ARM64::TBZ;
316 case ARM64::TBZ: return ARM64::TBNZ;
317 case ARM64::CBNZW: return ARM64::CBZW;
318 case ARM64::CBNZX: return ARM64::CBZX;
319 case ARM64::CBZW: return ARM64::CBNZW;
320 case ARM64::CBZX: return ARM64::CBNZX;
321 case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc.
322 }
323 }
324
325 static unsigned getBranchDisplacementBits(unsigned Opc) {
326 switch (Opc) {
327 default:
328 assert(0 && "unexpected opcode!");
329 case ARM64::TBNZ:
330 case ARM64::TBZ:
331 return TBZDisplacementBits;
332 case ARM64::CBNZW:
333 case ARM64::CBZW:
334 case ARM64::CBNZX:
335 case ARM64::CBZX:
336 return CBZDisplacementBits;
337 case ARM64::Bcc:
338 return BCCDisplacementBits;
339 }
340 }
341
342 static inline void invertBccCondition(MachineInstr *MI) {
343 assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
344 ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
345 CC = ARM64CC::getInvertedCondCode(CC);
346 MI->getOperand(0).setImm((int64_t)CC);
347 }
348
349 /// fixupConditionalBranch - Fix up a conditional branch whose destination is
350 /// too far away to fit in its displacement field. It is converted to an inverse
351 /// conditional branch + an unconditional branch to the destination.
352 bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
353 MachineBasicBlock *DestBB = getDestBlock(MI);
354
355 // Add an unconditional branch to the destination and invert the branch
356 // condition to jump over it:
357 // tbz L1
358 // =>
359 // tbnz L2
360 // b L1
361 // L2:
362
363 // If the branch is at the end of its MBB and that has a fall-through block,
364 // direct the updated conditional branch to the fall-through block. Otherwise,
365 // split the MBB before the next instruction.
366 MachineBasicBlock *MBB = MI->getParent();
367 MachineInstr *BMI = &MBB->back();
368 bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
369
370 if (BMI != MI) {
371 if (std::next(MachineBasicBlock::iterator(MI)) ==
372 std::prev(MBB->getLastNonDebugInstr()) &&
373 BMI->getOpcode() == ARM64::B) {
374 // Last MI in the BB is an unconditional branch. Can we simply invert the
375 // condition and swap destinations:
376 // beq L1
377 // b L2
378 // =>
379 // bne L2
380 // b L1
381 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
382 if (isBlockInRange(MI, NewDest,
383 getBranchDisplacementBits(MI->getOpcode()))) {
384 DEBUG(dbgs() << " Invert condition and swap its destination with "
385 << *BMI);
386 BMI->getOperand(0).setMBB(DestBB);
387 unsigned OpNum =
388 (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
389 ? 2
390 : 1;
391 MI->getOperand(OpNum).setMBB(NewDest);
392 MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
393 if (MI->getOpcode() == ARM64::Bcc)
394 invertBccCondition(MI);
395 return true;
396 }
397 }
398 }
399
400 if (NeedSplit) {
401 // Analyze the branch so we know how to update the successor lists.
402 MachineBasicBlock *TBB, *FBB;
403 SmallVector Cond;
404 TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false);
405
406 MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
407 // No need for the branch to the next block. We're adding an unconditional
408 // branch to the destination.
409 int delta = TII->GetInstSizeInBytes(&MBB->back());
410 BlockInfo[MBB->getNumber()].Size -= delta;
411 MBB->back().eraseFromParent();
412 // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
413
414 // Update the successor lists according to the transformation to follow.
415 // Do it here since if there's no split, no update is needed.
416 MBB->replaceSuccessor(FBB, NewBB);
417 NewBB->addSuccessor(FBB);
418 }
419 MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
420
421 DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
422 << ", invert condition and change dest. to BB#"
423 << NextBB->getNumber() << "\n");
424
425 // Insert a new conditional branch and a new unconditional branch.
426 MachineInstrBuilder MIB = BuildMI(
427 MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
428 .addOperand(MI->getOperand(0));
429 if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
430 MIB.addOperand(MI->getOperand(1));
431 if (MI->getOpcode() == ARM64::Bcc)
432 invertBccCondition(MIB);
433 MIB.addMBB(NextBB);
434 BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
435 BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
436 BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
437
438 // Remove the old conditional branch. It may or may not still be in MBB.
439 BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
440 MI->eraseFromParent();
441
442 // Finally, keep the block offsets up to date.
443 adjustBlockOffsets(MBB);
444 return true;
445 }
446
447 bool ARM64BranchRelaxation::relaxBranchInstructions() {
448 bool Changed = false;
449 // Relaxing branches involves creating new basic blocks, so re-eval
450 // end() for termination.
451 for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
452 MachineInstr *MI = I->getFirstTerminator();
453 if (isConditionalBranch(MI->getOpcode()) &&
454 !isBlockInRange(MI, getDestBlock(MI),
455 getBranchDisplacementBits(MI->getOpcode()))) {
456 fixupConditionalBranch(MI);
457 ++NumRelaxed;
458 Changed = true;
459 }
460 }
461 return Changed;
462 }
463
464 bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
465 MF = &mf;
466
467 // If the pass is disabled, just bail early.
468 if (!BranchRelaxation)
469 return false;
470
471 DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
472
473 TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
474
475 // Renumber all of the machine basic blocks in the function, guaranteeing that
476 // the numbers agree with the position of the block in the function.
477 MF->RenumberBlocks();
478
479 // Do the initial scan of the function, building up information about the
480 // sizes of each block.
481 scanFunction();
482
483 DEBUG(dbgs() << " Basic blocks before relaxation\n");
484 DEBUG(dumpBBs());
485
486 bool MadeChange = false;
487 while (relaxBranchInstructions())
488 MadeChange = true;
489
490 // After a while, this might be made debug-only, but it is not expensive.
491 verify();
492
493 DEBUG(dbgs() << " Basic blocks after relaxation\n");
494 DEBUG(dbgs() << '\n'; dumpBBs());
495
496 BlockInfo.clear();
497
498 return MadeChange;
499 }
500
501 /// createARM64BranchRelaxation - returns an instance of the constpool
502 /// island pass.
503 FunctionPass *llvm::createARM64BranchRelaxation() {
504 return new ARM64BranchRelaxation();
505 }
0 //=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the custom routines for the ARM64 Calling Convention that
10 // aren't done by tablegen.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef ARM64CALLINGCONV_H
15 #define ARM64CALLINGCONV_H
16
17 #include "ARM64InstrInfo.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20