llvm.org GIT mirror llvm / 29f94c7
AArch64/ARM64: move ARM64 into AArch64's place This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209577 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 5 years ago
1063 changed file(s) with 132791 addition(s) and 132897 deletion(s). Raw diff Collapse all Expand all
126126 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
127127
128128 set(LLVM_ALL_TARGETS
129 ARM64
129 AArch64
130130 ARM
131131 CppBackend
132132 Hexagon
142142 )
143143
144144 # List of targets with JIT support:
145 set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM64 ARM Mips SystemZ)
145 set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ)
146146
147147 set(LLVM_TARGETS_TO_BUILD "all"
148148 CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
418418 amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
419419 sparc*-*) llvm_cv_target_arch="Sparc" ;;
420420 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
421 arm64*-*) llvm_cv_target_arch="ARM64" ;;
421 arm64*-*) llvm_cv_target_arch="AArch64" ;;
422422 arm*-*) llvm_cv_target_arch="ARM" ;;
423 aarch64*-*) llvm_cv_target_arch="ARM64" ;;
423 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
424424 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
425425 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
426426 xcore-*) llvm_cv_target_arch="XCore" ;;
454454 amd64-* | x86_64-*) host_arch="x86_64" ;;
455455 sparc*-*) host_arch="Sparc" ;;
456456 powerpc*-*) host_arch="PowerPC" ;;
457 arm64*-*) host_arch="ARM64" ;;
457 arm64*-*) host_arch="AArch64" ;;
458458 arm*-*) host_arch="ARM" ;;
459 aarch64*-*) host_arch="ARM64" ;;
459 aarch64*-*) host_arch="AArch64" ;;
460460 mips-* | mips64-*) host_arch="Mips" ;;
461461 mipsel-* | mips64el-*) host_arch="Mips" ;;
462462 xcore-*) host_arch="XCore" ;;
795795 esac
796796 fi
797797
798 TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
798 TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86"
799799 AC_SUBST(TARGETS_WITH_JIT,$TARGETS_WITH_JIT)
800800
801801 dnl Allow enablement of building and installing docs
948948 fi
949949
950950 dnl List all possible targets
951 ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
951 ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
952952 AC_SUBST(ALL_TARGETS,$ALL_TARGETS)
953953
954954 dnl Allow specific targets to be specified for building (or not)
969969 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
970970 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
971971 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
972 aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
973 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
972 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
973 arm64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
974974 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
975975 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
976976 mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
988988 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
989989 Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
990990 PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
991 AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
991 AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
992992 ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
993993 Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
994994 XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
371371 elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
372372 set(LLVM_NATIVE_ARCH AArch64)
373373 elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
374 set(LLVM_NATIVE_ARCH ARM64)
374 set(LLVM_NATIVE_ARCH AArch64)
375375 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
376376 set(LLVM_NATIVE_ARCH ARM)
377377 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
41504150 amd64-* | x86_64-*) llvm_cv_target_arch="x86_64" ;;
41514151 sparc*-*) llvm_cv_target_arch="Sparc" ;;
41524152 powerpc*-*) llvm_cv_target_arch="PowerPC" ;;
4153 arm64*-*) llvm_cv_target_arch="ARM64" ;;
4153 arm64*-*) llvm_cv_target_arch="AArch64" ;;
41544154 arm*-*) llvm_cv_target_arch="ARM" ;;
4155 aarch64*-*) llvm_cv_target_arch="ARM64" ;;
4155 aarch64*-*) llvm_cv_target_arch="AArch64" ;;
41564156 mips-* | mips64-*) llvm_cv_target_arch="Mips" ;;
41574157 mipsel-* | mips64el-*) llvm_cv_target_arch="Mips" ;;
41584158 xcore-*) llvm_cv_target_arch="XCore" ;;
41874187 amd64-* | x86_64-*) host_arch="x86_64" ;;
41884188 sparc*-*) host_arch="Sparc" ;;
41894189 powerpc*-*) host_arch="PowerPC" ;;
4190 arm64*-*) host_arch="ARM64" ;;
4190 arm64*-*) host_arch="AArch64" ;;
41914191 arm*-*) host_arch="ARM" ;;
4192 aarch64*-*) host_arch="ARM64" ;;
4192 aarch64*-*) host_arch="AArch64" ;;
41934193 mips-* | mips64-*) host_arch="Mips" ;;
41944194 mipsel-* | mips64el-*) host_arch="Mips" ;;
41954195 xcore-*) host_arch="XCore" ;;
51195119 esac
51205120 fi
51215121
5122 TARGETS_WITH_JIT="ARM ARM64 Mips PowerPC SystemZ X86"
5122 TARGETS_WITH_JIT="ARM AArch64 Mips PowerPC SystemZ X86"
51235123 TARGETS_WITH_JIT=$TARGETS_WITH_JIT
51245124
51255125
53565356
53575357 fi
53585358
5359 ALL_TARGETS="X86 Sparc PowerPC ARM ARM64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
5359 ALL_TARGETS="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 CppBackend NVPTX Hexagon SystemZ R600"
53605360 ALL_TARGETS=$ALL_TARGETS
53615361
53625362
53795379 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53805380 sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
53815381 powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
5382 aarch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
5383 arm64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
5382 aarch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
5383 arm64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
53845384 arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
53855385 mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
53865386 mipsel) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
53985398 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53995399 Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
54005400 PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
5401 AArch64) TARGETS_TO_BUILD="ARM64 $TARGETS_TO_BUILD" ;;
5401 AArch64) TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
54025402 ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
54035403 Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
54045404 XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
68766876 allocatable registers are not supported.
68776877
68786878 Warning: So far it only works with the stack pointer on selected
6879 architectures (ARM, ARM64, AArch64, PowerPC and x86_64). Significant amount of
6879 architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of
68806880 work is needed to support other registers and even more so, allocatable
68816881 registers.
68826882
532532 include "llvm/IR/IntrinsicsPowerPC.td"
533533 include "llvm/IR/IntrinsicsX86.td"
534534 include "llvm/IR/IntrinsicsARM.td"
535 include "llvm/IR/IntrinsicsARM64.td"
535 include "llvm/IR/IntrinsicsAArch64.td"
536536 include "llvm/IR/IntrinsicsXCore.td"
537537 include "llvm/IR/IntrinsicsHexagon.td"
538538 include "llvm/IR/IntrinsicsNVVM.td"
0 //===- IntrinsicsAARCH64.td - Defines AARCH64 intrinsics ---*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the AARCH64-specific intrinsics.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "aarch64" in {
14
15 def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
16 def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
17 def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
18 def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
19
20 def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
21 def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
22 def int_aarch64_stxp : Intrinsic<[llvm_i32_ty],
23 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
24 def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty],
25 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
26
27 def int_aarch64_clrex : Intrinsic<[]>;
28
29 def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
30 LLVMMatchType<0>], [IntrNoMem]>;
31 def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
32 LLVMMatchType<0>], [IntrNoMem]>;
33 }
34
35 //===----------------------------------------------------------------------===//
36 // Advanced SIMD (NEON)
37
38 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
39 class AdvSIMD_2Scalar_Float_Intrinsic
40 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
41 [IntrNoMem]>;
42
43 class AdvSIMD_FPToIntRounding_Intrinsic
44 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
45
46 class AdvSIMD_1IntArg_Intrinsic
47 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
48 class AdvSIMD_1FloatArg_Intrinsic
49 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
50 class AdvSIMD_1VectorArg_Intrinsic
51 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
52 class AdvSIMD_1VectorArg_Expand_Intrinsic
53 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
54 class AdvSIMD_1VectorArg_Long_Intrinsic
55 : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
56 class AdvSIMD_1IntArg_Narrow_Intrinsic
57 : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
58 class AdvSIMD_1VectorArg_Narrow_Intrinsic
59 : Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
60 class AdvSIMD_1VectorArg_Int_Across_Intrinsic
61 : Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
62 class AdvSIMD_1VectorArg_Float_Across_Intrinsic
63 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
64
65 class AdvSIMD_2IntArg_Intrinsic
66 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
67 [IntrNoMem]>;
68 class AdvSIMD_2FloatArg_Intrinsic
69 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
70 [IntrNoMem]>;
71 class AdvSIMD_2VectorArg_Intrinsic
72 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
73 [IntrNoMem]>;
74 class AdvSIMD_2VectorArg_Compare_Intrinsic
75 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
76 [IntrNoMem]>;
77 class AdvSIMD_2Arg_FloatCompare_Intrinsic
78 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
79 [IntrNoMem]>;
80 class AdvSIMD_2VectorArg_Long_Intrinsic
81 : Intrinsic<[llvm_anyvector_ty],
82 [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
83 [IntrNoMem]>;
84 class AdvSIMD_2VectorArg_Wide_Intrinsic
85 : Intrinsic<[llvm_anyvector_ty],
86 [LLVMMatchType<0>, LLVMTruncatedType<0>],
87 [IntrNoMem]>;
88 class AdvSIMD_2VectorArg_Narrow_Intrinsic
89 : Intrinsic<[llvm_anyvector_ty],
90 [LLVMExtendedType<0>, LLVMExtendedType<0>],
91 [IntrNoMem]>;
92 class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
93 : Intrinsic<[llvm_anyint_ty],
94 [LLVMExtendedType<0>, llvm_i32_ty],
95 [IntrNoMem]>;
96 class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
97 : Intrinsic<[llvm_anyvector_ty],
98 [llvm_anyvector_ty],
99 [IntrNoMem]>;
100 class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
101 : Intrinsic<[llvm_anyvector_ty],
102 [LLVMTruncatedType<0>],
103 [IntrNoMem]>;
104 class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
105 : Intrinsic<[llvm_anyvector_ty],
106 [LLVMTruncatedType<0>, llvm_i32_ty],
107 [IntrNoMem]>;
108 class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
109 : Intrinsic<[llvm_anyvector_ty],
110 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
111 [IntrNoMem]>;
112
113 class AdvSIMD_3VectorArg_Intrinsic
114 : Intrinsic<[llvm_anyvector_ty],
115 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
116 [IntrNoMem]>;
117 class AdvSIMD_3VectorArg_Scalar_Intrinsic
118 : Intrinsic<[llvm_anyvector_ty],
119 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
120 [IntrNoMem]>;
121 class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
122 : Intrinsic<[llvm_anyvector_ty],
123 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
124 LLVMMatchType<1>], [IntrNoMem]>;
125 class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
126 : Intrinsic<[llvm_anyvector_ty],
127 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
128 [IntrNoMem]>;
129 class AdvSIMD_CvtFxToFP_Intrinsic
130 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
131 [IntrNoMem]>;
132 class AdvSIMD_CvtFPToFx_Intrinsic
133 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
134 [IntrNoMem]>;
135 }
136
137 // Arithmetic ops
138
139 let Properties = [IntrNoMem] in {
140 // Vector Add Across Lanes
141 def int_aarch64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
142 def int_aarch64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
143 def int_aarch64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
144
145 // Vector Long Add Across Lanes
146 def int_aarch64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
147 def int_aarch64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
148
149 // Vector Halving Add
150 def int_aarch64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
151 def int_aarch64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
152
153 // Vector Rounding Halving Add
154 def int_aarch64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
155 def int_aarch64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
156
157 // Vector Saturating Add
158 def int_aarch64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
159 def int_aarch64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
160 def int_aarch64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
161 def int_aarch64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
162
163 // Vector Add High-Half
164 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
165 // header is no longer supported.
166 def int_aarch64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
167
168 // Vector Rounding Add High-Half
169 def int_aarch64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
170
171 // Vector Saturating Doubling Multiply High
172 def int_aarch64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
173
174 // Vector Saturating Rounding Doubling Multiply High
175 def int_aarch64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
176
177 // Vector Polynominal Multiply
178 def int_aarch64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
179
180 // Vector Long Multiply
181 def int_aarch64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
182 def int_aarch64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
183 def int_aarch64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
184
185 // 64-bit polynomial multiply really returns an i128, which is not legal. Fake
186 // it with a v16i8.
187 def int_aarch64_neon_pmull64 :
188 Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
189
190 // Vector Extending Multiply
191 def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
192 let Properties = [IntrNoMem, Commutative];
193 }
194
195 // Vector Saturating Doubling Long Multiply
196 def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
197 def int_aarch64_neon_sqdmulls_scalar
198 : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
199
200 // Vector Halving Subtract
201 def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
202 def int_aarch64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
203
204 // Vector Saturating Subtract
205 def int_aarch64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
206 def int_aarch64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
207
208 // Vector Subtract High-Half
209 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
210 // header is no longer supported.
211 def int_aarch64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
212
213 // Vector Rounding Subtract High-Half
214 def int_aarch64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
215
216 // Vector Compare Absolute Greater-than-or-equal
217 def int_aarch64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
218
219 // Vector Compare Absolute Greater-than
220 def int_aarch64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
221
222 // Vector Absolute Difference
223 def int_aarch64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
224 def int_aarch64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
225 def int_aarch64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
226
227 // Scalar Absolute Difference
228 def int_aarch64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
229
230 // Vector Max
231 def int_aarch64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
232 def int_aarch64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
233 def int_aarch64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
234 def int_aarch64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
235
236 // Vector Max Across Lanes
237 def int_aarch64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
238 def int_aarch64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
239 def int_aarch64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
240 def int_aarch64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
241
242 // Vector Min
243 def int_aarch64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
244 def int_aarch64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
245 def int_aarch64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
246 def int_aarch64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
247
248 // Vector Min/Max Number
249 def int_aarch64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
250 def int_aarch64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
251
252 // Vector Min Across Lanes
253 def int_aarch64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
254 def int_aarch64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
255 def int_aarch64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
256 def int_aarch64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
257
258 // Pairwise Add
259 def int_aarch64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
260
261 // Long Pairwise Add
262 // FIXME: In theory, we shouldn't need intrinsics for saddlp or
263 // uaddlp, but tblgen's type inference currently can't handle the
264 // pattern fragments this ends up generating.
265 def int_aarch64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
266 def int_aarch64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
267
268 // Folding Maximum
269 def int_aarch64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
270 def int_aarch64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
271 def int_aarch64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
272
273 // Folding Minimum
274 def int_aarch64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
275 def int_aarch64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
276 def int_aarch64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
277
278 // Reciprocal Estimate/Step
279 def int_aarch64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
280 def int_aarch64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
281
282 // Reciprocal Exponent
283 def int_aarch64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic;
284
285 // Vector Saturating Shift Left
286 def int_aarch64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
287 def int_aarch64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
288
289 // Vector Rounding Shift Left
290 def int_aarch64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
291 def int_aarch64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
292
293 // Vector Saturating Rounding Shift Left
294 def int_aarch64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
295 def int_aarch64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
296
297 // Vector Signed->Unsigned Shift Left by Constant
298 def int_aarch64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
299
300 // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
301 def int_aarch64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
302
303 // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
304 def int_aarch64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
305
306 // Vector Narrowing Shift Right by Constant
307 def int_aarch64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
308 def int_aarch64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
309
310 // Vector Rounding Narrowing Shift Right by Constant
311 def int_aarch64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
312
313 // Vector Rounding Narrowing Saturating Shift Right by Constant
314 def int_aarch64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
315 def int_aarch64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
316
317 // Vector Shift Left
318 def int_aarch64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
319 def int_aarch64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
320
321 // Vector Widening Shift Left by Constant
322 def int_aarch64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
323 def int_aarch64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
324 def int_aarch64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
325
326 // Vector Shift Right by Constant and Insert
327 def int_aarch64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
328
329 // Vector Shift Left by Constant and Insert
330 def int_aarch64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
331
332 // Vector Saturating Narrow
333 def int_aarch64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
334 def int_aarch64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
335 def int_aarch64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
336 def int_aarch64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
337
338 // Vector Saturating Extract and Unsigned Narrow
339 def int_aarch64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
340 def int_aarch64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
341
342 // Vector Absolute Value
343 def int_aarch64_neon_abs : AdvSIMD_1IntArg_Intrinsic;
344
345 // Vector Saturating Absolute Value
346 def int_aarch64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
347
348 // Vector Saturating Negation
349 def int_aarch64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
350
351 // Vector Count Leading Sign Bits
352 def int_aarch64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
353
354 // Vector Reciprocal Estimate
355 def int_aarch64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
356 def int_aarch64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic;
357
358 // Vector Square Root Estimate
359 def int_aarch64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
360 def int_aarch64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
361
362 // Vector Bitwise Reverse
363 def int_aarch64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
364
365 // Vector Conversions Between Half-Precision and Single-Precision.
366 def int_aarch64_neon_vcvtfp2hf
367 : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
368 def int_aarch64_neon_vcvthf2fp
369 : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
370
371 // Vector Conversions Between Floating-point and Fixed-point.
372 def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
373 def int_aarch64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
374 def int_aarch64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
375 def int_aarch64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
376
377 // Vector FP->Int Conversions
378 def int_aarch64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
379 def int_aarch64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
380 def int_aarch64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
381 def int_aarch64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
382 def int_aarch64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
383 def int_aarch64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
384 def int_aarch64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
385 def int_aarch64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
386 def int_aarch64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
387 def int_aarch64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
388
389 // Vector FP Rounding: only ties to even is unrepresented by a normal
390 // intrinsic.
391 def int_aarch64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
392
393 // Scalar FP->Int conversions
394
395 // Vector FP Inexact Narrowing
396 def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
397
398 // Scalar FP Inexact Narrowing
399 def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
400 [IntrNoMem]>;
401 }
402
403 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
404 class AdvSIMD_2Vector2Index_Intrinsic
405 : Intrinsic<[llvm_anyvector_ty],
406 [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
407 [IntrNoMem]>;
408 }
409
410 // Vector element to element moves
411 def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
412
413 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
414 class AdvSIMD_1Vec_Load_Intrinsic
415 : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>],
416 [IntrReadArgMem]>;
417 class AdvSIMD_1Vec_Store_Lane_Intrinsic
418 : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
419 [IntrReadWriteArgMem, NoCapture<2>]>;
420
421 class AdvSIMD_2Vec_Load_Intrinsic
422 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
423 [LLVMAnyPointerType>],
424 [IntrReadArgMem]>;
425 class AdvSIMD_2Vec_Load_Lane_Intrinsic
426 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
427 [LLVMMatchType<0>, LLVMMatchType<0>,
428 llvm_i64_ty, llvm_anyptr_ty],
429 [IntrReadArgMem]>;
430 class AdvSIMD_2Vec_Store_Intrinsic
431 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
432 LLVMAnyPointerType>],
433 [IntrReadWriteArgMem, NoCapture<2>]>;
434 class AdvSIMD_2Vec_Store_Lane_Intrinsic
435 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
436 llvm_i64_ty, llvm_anyptr_ty],
437 [IntrReadWriteArgMem, NoCapture<3>]>;
438
439 class AdvSIMD_3Vec_Load_Intrinsic
440 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
441 [LLVMAnyPointerType>],
442 [IntrReadArgMem]>;
443 class AdvSIMD_3Vec_Load_Lane_Intrinsic
444 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
445 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
446 llvm_i64_ty, llvm_anyptr_ty],
447 [IntrReadArgMem]>;
448 class AdvSIMD_3Vec_Store_Intrinsic
449 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
450 LLVMMatchType<0>, LLVMAnyPointerType>],
451 [IntrReadWriteArgMem, NoCapture<3>]>;
452 class AdvSIMD_3Vec_Store_Lane_Intrinsic
453 : Intrinsic<[], [llvm_anyvector_ty,
454 LLVMMatchType<0>, LLVMMatchType<0>,
455 llvm_i64_ty, llvm_anyptr_ty],
456 [IntrReadWriteArgMem, NoCapture<4>]>;
457
458 class AdvSIMD_4Vec_Load_Intrinsic
459 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
460 LLVMMatchType<0>, LLVMMatchType<0>],
461 [LLVMAnyPointerType>],
462 [IntrReadArgMem]>;
463 class AdvSIMD_4Vec_Load_Lane_Intrinsic
464 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
465 LLVMMatchType<0>, LLVMMatchType<0>],
466 [LLVMMatchType<0>, LLVMMatchType<0>,
467 LLVMMatchType<0>, LLVMMatchType<0>,
468 llvm_i64_ty, llvm_anyptr_ty],
469 [IntrReadArgMem]>;
470 class AdvSIMD_4Vec_Store_Intrinsic
471 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
472 LLVMMatchType<0>, LLVMMatchType<0>,
473 LLVMAnyPointerType>],
474 [IntrReadWriteArgMem, NoCapture<4>]>;
475 class AdvSIMD_4Vec_Store_Lane_Intrinsic
476 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
477 LLVMMatchType<0>, LLVMMatchType<0>,
478 llvm_i64_ty, llvm_anyptr_ty],
479 [IntrReadWriteArgMem, NoCapture<5>]>;
480 }
481
482 // Memory ops
483
484 def int_aarch64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
485 def int_aarch64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
486 def int_aarch64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
487
488 def int_aarch64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
489 def int_aarch64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
490 def int_aarch64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
491
492 def int_aarch64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
493 def int_aarch64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
494 def int_aarch64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
495
496 def int_aarch64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
497 def int_aarch64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
498 def int_aarch64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
499
500 def int_aarch64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
501 def int_aarch64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
502 def int_aarch64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
503
504 def int_aarch64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
505 def int_aarch64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
506 def int_aarch64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
507
508 def int_aarch64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
509 def int_aarch64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
510 def int_aarch64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
511
512 let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
513 class AdvSIMD_Tbl1_Intrinsic
514 : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
515 [IntrNoMem]>;
516 class AdvSIMD_Tbl2_Intrinsic
517 : Intrinsic<[llvm_anyvector_ty],
518 [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
519 class AdvSIMD_Tbl3_Intrinsic
520 : Intrinsic<[llvm_anyvector_ty],
521 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
522 LLVMMatchType<0>],
523 [IntrNoMem]>;
524 class AdvSIMD_Tbl4_Intrinsic
525 : Intrinsic<[llvm_anyvector_ty],
526 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
527 LLVMMatchType<0>],
528 [IntrNoMem]>;
529
530 class AdvSIMD_Tbx1_Intrinsic
531 : Intrinsic<[llvm_anyvector_ty],
532 [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
533 [IntrNoMem]>;
534 class AdvSIMD_Tbx2_Intrinsic
535 : Intrinsic<[llvm_anyvector_ty],
536 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
537 LLVMMatchType<0>],
538 [IntrNoMem]>;
539 class AdvSIMD_Tbx3_Intrinsic
540 : Intrinsic<[llvm_anyvector_ty],
541 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
542 llvm_v16i8_ty, LLVMMatchType<0>],
543 [IntrNoMem]>;
544 class AdvSIMD_Tbx4_Intrinsic
545 : Intrinsic<[llvm_anyvector_ty],
546 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
547 llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
548 [IntrNoMem]>;
549 }
550 def int_aarch64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
551 def int_aarch64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
552 def int_aarch64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
553 def int_aarch64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
554
555 def int_aarch64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
556 def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
557 def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
558 def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
559
560 let TargetPrefix = "aarch64" in {
561 class Crypto_AES_DataKey_Intrinsic
562 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
563
564 class Crypto_AES_Data_Intrinsic
565 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
566
567 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
568 // (v4i32).
569 class Crypto_SHA_5Hash4Schedule_Intrinsic
570 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
571 [IntrNoMem]>;
572
573 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
574 // (v4i32).
575 class Crypto_SHA_1Hash_Intrinsic
576 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
577
578 // SHA intrinsic taking 8 words of the schedule
579 class Crypto_SHA_8Schedule_Intrinsic
580 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
581
582 // SHA intrinsic taking 12 words of the schedule
583 class Crypto_SHA_12Schedule_Intrinsic
584 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
585 [IntrNoMem]>;
586
587 // SHA intrinsic taking 8 words of the hash and 4 of the schedule.
588 class Crypto_SHA_8Hash4Schedule_Intrinsic
589 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
590 [IntrNoMem]>;
591 }
592
593 // AES
594 def int_aarch64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
595 def int_aarch64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
596 def int_aarch64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
597 def int_aarch64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
598
599 // SHA1
600 def int_aarch64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
601 def int_aarch64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
602 def int_aarch64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
603 def int_aarch64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
604
605 def int_aarch64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
606 def int_aarch64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
607
608 // SHA256
609 def int_aarch64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
610 def int_aarch64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
611 def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
612 def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
613
614 //===----------------------------------------------------------------------===//
615 // CRC32
616
617 let TargetPrefix = "aarch64" in {
618
619 def int_aarch64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
620 [IntrNoMem]>;
621 def int_aarch64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
622 [IntrNoMem]>;
623 def int_aarch64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
624 [IntrNoMem]>;
625 def int_aarch64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
626 [IntrNoMem]>;
627 def int_aarch64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
628 [IntrNoMem]>;
629 def int_aarch64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
630 [IntrNoMem]>;
631 def int_aarch64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
632 [IntrNoMem]>;
633 def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
634 [IntrNoMem]>;
635 }
+0
-636
include/llvm/IR/IntrinsicsARM64.td less more
None //===- IntrinsicsARM64.td - Defines ARM64 intrinsics -------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the ARM64-specific intrinsics.
10 //
11 //===----------------------------------------------------------------------===//
12
13 let TargetPrefix = "arm64" in {
14
15 def int_arm64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
16 def int_arm64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
17 def int_arm64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
18 def int_arm64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
19
20 def int_arm64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
21 def int_arm64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>;
22 def int_arm64_stxp : Intrinsic<[llvm_i32_ty],
23 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
24 def int_arm64_stlxp : Intrinsic<[llvm_i32_ty],
25 [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>;
26
27 def int_arm64_clrex : Intrinsic<[]>;
28
29 def int_arm64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
30 LLVMMatchType<0>], [IntrNoMem]>;
31 def int_arm64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
32 LLVMMatchType<0>], [IntrNoMem]>;
33 }
34
35 //===----------------------------------------------------------------------===//
36 // Advanced SIMD (NEON)
37
38 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
39 class AdvSIMD_2Scalar_Float_Intrinsic
40 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
41 [IntrNoMem]>;
42
43 class AdvSIMD_FPToIntRounding_Intrinsic
44 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
45
46 class AdvSIMD_1IntArg_Intrinsic
47 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
48 class AdvSIMD_1FloatArg_Intrinsic
49 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
50 class AdvSIMD_1VectorArg_Intrinsic
51 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
52 class AdvSIMD_1VectorArg_Expand_Intrinsic
53 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
54 class AdvSIMD_1VectorArg_Long_Intrinsic
55 : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>;
56 class AdvSIMD_1IntArg_Narrow_Intrinsic
57 : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>;
58 class AdvSIMD_1VectorArg_Narrow_Intrinsic
59 : Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>;
60 class AdvSIMD_1VectorArg_Int_Across_Intrinsic
61 : Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>;
62 class AdvSIMD_1VectorArg_Float_Across_Intrinsic
63 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
64
65 class AdvSIMD_2IntArg_Intrinsic
66 : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
67 [IntrNoMem]>;
68 class AdvSIMD_2FloatArg_Intrinsic
69 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
70 [IntrNoMem]>;
71 class AdvSIMD_2VectorArg_Intrinsic
72 : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
73 [IntrNoMem]>;
74 class AdvSIMD_2VectorArg_Compare_Intrinsic
75 : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>],
76 [IntrNoMem]>;
77 class AdvSIMD_2Arg_FloatCompare_Intrinsic
78 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>],
79 [IntrNoMem]>;
80 class AdvSIMD_2VectorArg_Long_Intrinsic
81 : Intrinsic<[llvm_anyvector_ty],
82 [LLVMTruncatedType<0>, LLVMTruncatedType<0>],
83 [IntrNoMem]>;
84 class AdvSIMD_2VectorArg_Wide_Intrinsic
85 : Intrinsic<[llvm_anyvector_ty],
86 [LLVMMatchType<0>, LLVMTruncatedType<0>],
87 [IntrNoMem]>;
88 class AdvSIMD_2VectorArg_Narrow_Intrinsic
89 : Intrinsic<[llvm_anyvector_ty],
90 [LLVMExtendedType<0>, LLVMExtendedType<0>],
91 [IntrNoMem]>;
92 class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic
93 : Intrinsic<[llvm_anyint_ty],
94 [LLVMExtendedType<0>, llvm_i32_ty],
95 [IntrNoMem]>;
96 class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic
97 : Intrinsic<[llvm_anyvector_ty],
98 [llvm_anyvector_ty],
99 [IntrNoMem]>;
100 class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic
101 : Intrinsic<[llvm_anyvector_ty],
102 [LLVMTruncatedType<0>],
103 [IntrNoMem]>;
104 class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic
105 : Intrinsic<[llvm_anyvector_ty],
106 [LLVMTruncatedType<0>, llvm_i32_ty],
107 [IntrNoMem]>;
108 class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic
109 : Intrinsic<[llvm_anyvector_ty],
110 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty],
111 [IntrNoMem]>;
112
113 class AdvSIMD_3VectorArg_Intrinsic
114 : Intrinsic<[llvm_anyvector_ty],
115 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
116 [IntrNoMem]>;
117 class AdvSIMD_3VectorArg_Scalar_Intrinsic
118 : Intrinsic<[llvm_anyvector_ty],
119 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
120 [IntrNoMem]>;
121 class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic
122 : Intrinsic<[llvm_anyvector_ty],
123 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty,
124 LLVMMatchType<1>], [IntrNoMem]>;
125 class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic
126 : Intrinsic<[llvm_anyvector_ty],
127 [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty],
128 [IntrNoMem]>;
129 class AdvSIMD_CvtFxToFP_Intrinsic
130 : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty],
131 [IntrNoMem]>;
132 class AdvSIMD_CvtFPToFx_Intrinsic
133 : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty],
134 [IntrNoMem]>;
135 }
136
137 // Arithmetic ops
138
139 let Properties = [IntrNoMem] in {
140 // Vector Add Across Lanes
141 def int_arm64_neon_saddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
142 def int_arm64_neon_uaddv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
143 def int_arm64_neon_faddv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
144
145 // Vector Long Add Across Lanes
146 def int_arm64_neon_saddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
147 def int_arm64_neon_uaddlv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
148
149 // Vector Halving Add
150 def int_arm64_neon_shadd : AdvSIMD_2VectorArg_Intrinsic;
151 def int_arm64_neon_uhadd : AdvSIMD_2VectorArg_Intrinsic;
152
153 // Vector Rounding Halving Add
154 def int_arm64_neon_srhadd : AdvSIMD_2VectorArg_Intrinsic;
155 def int_arm64_neon_urhadd : AdvSIMD_2VectorArg_Intrinsic;
156
157 // Vector Saturating Add
158 def int_arm64_neon_sqadd : AdvSIMD_2IntArg_Intrinsic;
159 def int_arm64_neon_suqadd : AdvSIMD_2IntArg_Intrinsic;
160 def int_arm64_neon_usqadd : AdvSIMD_2IntArg_Intrinsic;
161 def int_arm64_neon_uqadd : AdvSIMD_2IntArg_Intrinsic;
162
163 // Vector Add High-Half
164 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
165 // header is no longer supported.
166 def int_arm64_neon_addhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
167
168 // Vector Rounding Add High-Half
169 def int_arm64_neon_raddhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
170
171 // Vector Saturating Doubling Multiply High
172 def int_arm64_neon_sqdmulh : AdvSIMD_2IntArg_Intrinsic;
173
174 // Vector Saturating Rounding Doubling Multiply High
175 def int_arm64_neon_sqrdmulh : AdvSIMD_2IntArg_Intrinsic;
176
177 // Vector Polynominal Multiply
178 def int_arm64_neon_pmul : AdvSIMD_2VectorArg_Intrinsic;
179
180 // Vector Long Multiply
181 def int_arm64_neon_smull : AdvSIMD_2VectorArg_Long_Intrinsic;
182 def int_arm64_neon_umull : AdvSIMD_2VectorArg_Long_Intrinsic;
183 def int_arm64_neon_pmull : AdvSIMD_2VectorArg_Long_Intrinsic;
184
185 // 64-bit polynomial multiply really returns an i128, which is not legal. Fake
186 // it with a v16i8.
187 def int_arm64_neon_pmull64 :
188 Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
189
190 // Vector Extending Multiply
191 def int_arm64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic {
192 let Properties = [IntrNoMem, Commutative];
193 }
194
195 // Vector Saturating Doubling Long Multiply
196 def int_arm64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic;
197 def int_arm64_neon_sqdmulls_scalar
198 : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
199
200 // Vector Halving Subtract
201 def int_arm64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic;
202 def int_arm64_neon_uhsub : AdvSIMD_2VectorArg_Intrinsic;
203
204 // Vector Saturating Subtract
205 def int_arm64_neon_sqsub : AdvSIMD_2IntArg_Intrinsic;
206 def int_arm64_neon_uqsub : AdvSIMD_2IntArg_Intrinsic;
207
208 // Vector Subtract High-Half
209 // FIXME: this is a legacy intrinsic for aarch64_simd.h. Remove it when that
210 // header is no longer supported.
211 def int_arm64_neon_subhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
212
213 // Vector Rounding Subtract High-Half
214 def int_arm64_neon_rsubhn : AdvSIMD_2VectorArg_Narrow_Intrinsic;
215
216 // Vector Compare Absolute Greater-than-or-equal
217 def int_arm64_neon_facge : AdvSIMD_2Arg_FloatCompare_Intrinsic;
218
219 // Vector Compare Absolute Greater-than
220 def int_arm64_neon_facgt : AdvSIMD_2Arg_FloatCompare_Intrinsic;
221
222 // Vector Absolute Difference
223 def int_arm64_neon_sabd : AdvSIMD_2VectorArg_Intrinsic;
224 def int_arm64_neon_uabd : AdvSIMD_2VectorArg_Intrinsic;
225 def int_arm64_neon_fabd : AdvSIMD_2VectorArg_Intrinsic;
226
227 // Scalar Absolute Difference
228 def int_arm64_sisd_fabd : AdvSIMD_2Scalar_Float_Intrinsic;
229
230 // Vector Max
231 def int_arm64_neon_smax : AdvSIMD_2VectorArg_Intrinsic;
232 def int_arm64_neon_umax : AdvSIMD_2VectorArg_Intrinsic;
233 def int_arm64_neon_fmax : AdvSIMD_2VectorArg_Intrinsic;
234 def int_arm64_neon_fmaxnmp : AdvSIMD_2VectorArg_Intrinsic;
235
236 // Vector Max Across Lanes
237 def int_arm64_neon_smaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
238 def int_arm64_neon_umaxv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
239 def int_arm64_neon_fmaxv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
240 def int_arm64_neon_fmaxnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
241
242 // Vector Min
243 def int_arm64_neon_smin : AdvSIMD_2VectorArg_Intrinsic;
244 def int_arm64_neon_umin : AdvSIMD_2VectorArg_Intrinsic;
245 def int_arm64_neon_fmin : AdvSIMD_2VectorArg_Intrinsic;
246 def int_arm64_neon_fminnmp : AdvSIMD_2VectorArg_Intrinsic;
247
248 // Vector Min/Max Number
249 def int_arm64_neon_fminnm : AdvSIMD_2FloatArg_Intrinsic;
250 def int_arm64_neon_fmaxnm : AdvSIMD_2FloatArg_Intrinsic;
251
252 // Vector Min Across Lanes
253 def int_arm64_neon_sminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
254 def int_arm64_neon_uminv : AdvSIMD_1VectorArg_Int_Across_Intrinsic;
255 def int_arm64_neon_fminv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
256 def int_arm64_neon_fminnmv : AdvSIMD_1VectorArg_Float_Across_Intrinsic;
257
258 // Pairwise Add
259 def int_arm64_neon_addp : AdvSIMD_2VectorArg_Intrinsic;
260
261 // Long Pairwise Add
262 // FIXME: In theory, we shouldn't need intrinsics for saddlp or
263 // uaddlp, but tblgen's type inference currently can't handle the
264 // pattern fragments this ends up generating.
265 def int_arm64_neon_saddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
266 def int_arm64_neon_uaddlp : AdvSIMD_1VectorArg_Expand_Intrinsic;
267
268 // Folding Maximum
269 def int_arm64_neon_smaxp : AdvSIMD_2VectorArg_Intrinsic;
270 def int_arm64_neon_umaxp : AdvSIMD_2VectorArg_Intrinsic;
271 def int_arm64_neon_fmaxp : AdvSIMD_2VectorArg_Intrinsic;
272
273 // Folding Minimum
274 def int_arm64_neon_sminp : AdvSIMD_2VectorArg_Intrinsic;
275 def int_arm64_neon_uminp : AdvSIMD_2VectorArg_Intrinsic;
276 def int_arm64_neon_fminp : AdvSIMD_2VectorArg_Intrinsic;
277
278 // Reciprocal Estimate/Step
279 def int_arm64_neon_frecps : AdvSIMD_2FloatArg_Intrinsic;
280 def int_arm64_neon_frsqrts : AdvSIMD_2FloatArg_Intrinsic;
281
282 // Reciprocal Exponent
283 def int_arm64_neon_frecpx : AdvSIMD_1FloatArg_Intrinsic;
284
285 // Vector Saturating Shift Left
286 def int_arm64_neon_sqshl : AdvSIMD_2IntArg_Intrinsic;
287 def int_arm64_neon_uqshl : AdvSIMD_2IntArg_Intrinsic;
288
289 // Vector Rounding Shift Left
290 def int_arm64_neon_srshl : AdvSIMD_2IntArg_Intrinsic;
291 def int_arm64_neon_urshl : AdvSIMD_2IntArg_Intrinsic;
292
293 // Vector Saturating Rounding Shift Left
294 def int_arm64_neon_sqrshl : AdvSIMD_2IntArg_Intrinsic;
295 def int_arm64_neon_uqrshl : AdvSIMD_2IntArg_Intrinsic;
296
297 // Vector Signed->Unsigned Shift Left by Constant
298 def int_arm64_neon_sqshlu : AdvSIMD_2IntArg_Intrinsic;
299
300 // Vector Signed->Unsigned Narrowing Saturating Shift Right by Constant
301 def int_arm64_neon_sqshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
302
303 // Vector Signed->Unsigned Rounding Narrowing Saturating Shift Right by Const
304 def int_arm64_neon_sqrshrun : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
305
306 // Vector Narrowing Shift Right by Constant
307 def int_arm64_neon_sqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
308 def int_arm64_neon_uqshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
309
310 // Vector Rounding Narrowing Shift Right by Constant
311 def int_arm64_neon_rshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
312
313 // Vector Rounding Narrowing Saturating Shift Right by Constant
314 def int_arm64_neon_sqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
315 def int_arm64_neon_uqrshrn : AdvSIMD_2Arg_Scalar_Narrow_Intrinsic;
316
317 // Vector Shift Left
318 def int_arm64_neon_sshl : AdvSIMD_2IntArg_Intrinsic;
319 def int_arm64_neon_ushl : AdvSIMD_2IntArg_Intrinsic;
320
321 // Vector Widening Shift Left by Constant
322 def int_arm64_neon_shll : AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic;
323 def int_arm64_neon_sshll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
324 def int_arm64_neon_ushll : AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic;
325
326 // Vector Shift Right by Constant and Insert
327 def int_arm64_neon_vsri : AdvSIMD_3VectorArg_Scalar_Intrinsic;
328
329 // Vector Shift Left by Constant and Insert
330 def int_arm64_neon_vsli : AdvSIMD_3VectorArg_Scalar_Intrinsic;
331
332 // Vector Saturating Narrow
333 def int_arm64_neon_scalar_sqxtn: AdvSIMD_1IntArg_Narrow_Intrinsic;
334 def int_arm64_neon_scalar_uqxtn : AdvSIMD_1IntArg_Narrow_Intrinsic;
335 def int_arm64_neon_sqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
336 def int_arm64_neon_uqxtn : AdvSIMD_1VectorArg_Narrow_Intrinsic;
337
338 // Vector Saturating Extract and Unsigned Narrow
339 def int_arm64_neon_scalar_sqxtun : AdvSIMD_1IntArg_Narrow_Intrinsic;
340 def int_arm64_neon_sqxtun : AdvSIMD_1VectorArg_Narrow_Intrinsic;
341
342 // Vector Absolute Value
343 def int_arm64_neon_abs : AdvSIMD_1IntArg_Intrinsic;
344
345 // Vector Saturating Absolute Value
346 def int_arm64_neon_sqabs : AdvSIMD_1IntArg_Intrinsic;
347
348 // Vector Saturating Negation
349 def int_arm64_neon_sqneg : AdvSIMD_1IntArg_Intrinsic;
350
351 // Vector Count Leading Sign Bits
352 def int_arm64_neon_cls : AdvSIMD_1VectorArg_Intrinsic;
353
354 // Vector Reciprocal Estimate
355 def int_arm64_neon_urecpe : AdvSIMD_1VectorArg_Intrinsic;
356 def int_arm64_neon_frecpe : AdvSIMD_1FloatArg_Intrinsic;
357
358 // Vector Square Root Estimate
359 def int_arm64_neon_ursqrte : AdvSIMD_1VectorArg_Intrinsic;
360 def int_arm64_neon_frsqrte : AdvSIMD_1FloatArg_Intrinsic;
361
362 // Vector Bitwise Reverse
363 def int_arm64_neon_rbit : AdvSIMD_1VectorArg_Intrinsic;
364
365 // Vector Conversions Between Half-Precision and Single-Precision.
366 def int_arm64_neon_vcvtfp2hf
367 : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
368 def int_arm64_neon_vcvthf2fp
369 : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
370
371 // Vector Conversions Between Floating-point and Fixed-point.
372 def int_arm64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic;
373 def int_arm64_neon_vcvtfp2fxu : AdvSIMD_CvtFPToFx_Intrinsic;
374 def int_arm64_neon_vcvtfxs2fp : AdvSIMD_CvtFxToFP_Intrinsic;
375 def int_arm64_neon_vcvtfxu2fp : AdvSIMD_CvtFxToFP_Intrinsic;
376
377 // Vector FP->Int Conversions
378 def int_arm64_neon_fcvtas : AdvSIMD_FPToIntRounding_Intrinsic;
379 def int_arm64_neon_fcvtau : AdvSIMD_FPToIntRounding_Intrinsic;
380 def int_arm64_neon_fcvtms : AdvSIMD_FPToIntRounding_Intrinsic;
381 def int_arm64_neon_fcvtmu : AdvSIMD_FPToIntRounding_Intrinsic;
382 def int_arm64_neon_fcvtns : AdvSIMD_FPToIntRounding_Intrinsic;
383 def int_arm64_neon_fcvtnu : AdvSIMD_FPToIntRounding_Intrinsic;
384 def int_arm64_neon_fcvtps : AdvSIMD_FPToIntRounding_Intrinsic;
385 def int_arm64_neon_fcvtpu : AdvSIMD_FPToIntRounding_Intrinsic;
386 def int_arm64_neon_fcvtzs : AdvSIMD_FPToIntRounding_Intrinsic;
387 def int_arm64_neon_fcvtzu : AdvSIMD_FPToIntRounding_Intrinsic;
388
389 // Vector FP Rounding: only ties to even is unrepresented by a normal
390 // intrinsic.
391 def int_arm64_neon_frintn : AdvSIMD_1FloatArg_Intrinsic;
392
393 // Scalar FP->Int conversions
394
395 // Vector FP Inexact Narrowing
396 def int_arm64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic;
397
398 // Scalar FP Inexact Narrowing
399 def int_arm64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty],
400 [IntrNoMem]>;
401 }
402
403 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
404 class AdvSIMD_2Vector2Index_Intrinsic
405 : Intrinsic<[llvm_anyvector_ty],
406 [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty],
407 [IntrNoMem]>;
408 }
409
410 // Vector element to element moves
411 def int_arm64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
412
413 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
414 class AdvSIMD_1Vec_Load_Intrinsic
415 : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType>],
416 [IntrReadArgMem]>;
417 class AdvSIMD_1Vec_Store_Lane_Intrinsic
418 : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
419 [IntrReadWriteArgMem, NoCapture<2>]>;
420
421 class AdvSIMD_2Vec_Load_Intrinsic
422 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
423 [LLVMAnyPointerType>],
424 [IntrReadArgMem]>;
425 class AdvSIMD_2Vec_Load_Lane_Intrinsic
426 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
427 [LLVMMatchType<0>, LLVMMatchType<0>,
428 llvm_i64_ty, llvm_anyptr_ty],
429 [IntrReadArgMem]>;
430 class AdvSIMD_2Vec_Store_Intrinsic
431 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
432 LLVMAnyPointerType>],
433 [IntrReadWriteArgMem, NoCapture<2>]>;
434 class AdvSIMD_2Vec_Store_Lane_Intrinsic
435 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
436 llvm_i64_ty, llvm_anyptr_ty],
437 [IntrReadWriteArgMem, NoCapture<3>]>;
438
439 class AdvSIMD_3Vec_Load_Intrinsic
440 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
441 [LLVMAnyPointerType>],
442 [IntrReadArgMem]>;
443 class AdvSIMD_3Vec_Load_Lane_Intrinsic
444 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
445 [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>,
446 llvm_i64_ty, llvm_anyptr_ty],
447 [IntrReadArgMem]>;
448 class AdvSIMD_3Vec_Store_Intrinsic
449 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
450 LLVMMatchType<0>, LLVMAnyPointerType>],
451 [IntrReadWriteArgMem, NoCapture<3>]>;
452 class AdvSIMD_3Vec_Store_Lane_Intrinsic
453 : Intrinsic<[], [llvm_anyvector_ty,
454 LLVMMatchType<0>, LLVMMatchType<0>,
455 llvm_i64_ty, llvm_anyptr_ty],
456 [IntrReadWriteArgMem, NoCapture<4>]>;
457
458 class AdvSIMD_4Vec_Load_Intrinsic
459 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
460 LLVMMatchType<0>, LLVMMatchType<0>],
461 [LLVMAnyPointerType>],
462 [IntrReadArgMem]>;
463 class AdvSIMD_4Vec_Load_Lane_Intrinsic
464 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
465 LLVMMatchType<0>, LLVMMatchType<0>],
466 [LLVMMatchType<0>, LLVMMatchType<0>,
467 LLVMMatchType<0>, LLVMMatchType<0>,
468 llvm_i64_ty, llvm_anyptr_ty],
469 [IntrReadArgMem]>;
470 class AdvSIMD_4Vec_Store_Intrinsic
471 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
472 LLVMMatchType<0>, LLVMMatchType<0>,
473 LLVMAnyPointerType>],
474 [IntrReadWriteArgMem, NoCapture<4>]>;
475 class AdvSIMD_4Vec_Store_Lane_Intrinsic
476 : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
477 LLVMMatchType<0>, LLVMMatchType<0>,
478 llvm_i64_ty, llvm_anyptr_ty],
479 [IntrReadWriteArgMem, NoCapture<5>]>;
480 }
481
482 // Memory ops
483
484 def int_arm64_neon_ld1x2 : AdvSIMD_2Vec_Load_Intrinsic;
485 def int_arm64_neon_ld1x3 : AdvSIMD_3Vec_Load_Intrinsic;
486 def int_arm64_neon_ld1x4 : AdvSIMD_4Vec_Load_Intrinsic;
487
488 def int_arm64_neon_st1x2 : AdvSIMD_2Vec_Store_Intrinsic;
489 def int_arm64_neon_st1x3 : AdvSIMD_3Vec_Store_Intrinsic;
490 def int_arm64_neon_st1x4 : AdvSIMD_4Vec_Store_Intrinsic;
491
492 def int_arm64_neon_ld2 : AdvSIMD_2Vec_Load_Intrinsic;
493 def int_arm64_neon_ld3 : AdvSIMD_3Vec_Load_Intrinsic;
494 def int_arm64_neon_ld4 : AdvSIMD_4Vec_Load_Intrinsic;
495
496 def int_arm64_neon_ld2lane : AdvSIMD_2Vec_Load_Lane_Intrinsic;
497 def int_arm64_neon_ld3lane : AdvSIMD_3Vec_Load_Lane_Intrinsic;
498 def int_arm64_neon_ld4lane : AdvSIMD_4Vec_Load_Lane_Intrinsic;
499
500 def int_arm64_neon_ld2r : AdvSIMD_2Vec_Load_Intrinsic;
501 def int_arm64_neon_ld3r : AdvSIMD_3Vec_Load_Intrinsic;
502 def int_arm64_neon_ld4r : AdvSIMD_4Vec_Load_Intrinsic;
503
504 def int_arm64_neon_st2 : AdvSIMD_2Vec_Store_Intrinsic;
505 def int_arm64_neon_st3 : AdvSIMD_3Vec_Store_Intrinsic;
506 def int_arm64_neon_st4 : AdvSIMD_4Vec_Store_Intrinsic;
507
508 def int_arm64_neon_st2lane : AdvSIMD_2Vec_Store_Lane_Intrinsic;
509 def int_arm64_neon_st3lane : AdvSIMD_3Vec_Store_Lane_Intrinsic;
510 def int_arm64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic;
511
512 let TargetPrefix = "arm64" in { // All intrinsics start with "llvm.arm64.".
513 class AdvSIMD_Tbl1_Intrinsic
514 : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>],
515 [IntrNoMem]>;
516 class AdvSIMD_Tbl2_Intrinsic
517 : Intrinsic<[llvm_anyvector_ty],
518 [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>;
519 class AdvSIMD_Tbl3_Intrinsic
520 : Intrinsic<[llvm_anyvector_ty],
521 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
522 LLVMMatchType<0>],
523 [IntrNoMem]>;
524 class AdvSIMD_Tbl4_Intrinsic
525 : Intrinsic<[llvm_anyvector_ty],
526 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,
527 LLVMMatchType<0>],
528 [IntrNoMem]>;
529
530 class AdvSIMD_Tbx1_Intrinsic
531 : Intrinsic<[llvm_anyvector_ty],
532 [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>],
533 [IntrNoMem]>;
534 class AdvSIMD_Tbx2_Intrinsic
535 : Intrinsic<[llvm_anyvector_ty],
536 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
537 LLVMMatchType<0>],
538 [IntrNoMem]>;
539 class AdvSIMD_Tbx3_Intrinsic
540 : Intrinsic<[llvm_anyvector_ty],
541 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
542 llvm_v16i8_ty, LLVMMatchType<0>],
543 [IntrNoMem]>;
544 class AdvSIMD_Tbx4_Intrinsic
545 : Intrinsic<[llvm_anyvector_ty],
546 [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty,
547 llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>],
548 [IntrNoMem]>;
549 }
550 def int_arm64_neon_tbl1 : AdvSIMD_Tbl1_Intrinsic;
551 def int_arm64_neon_tbl2 : AdvSIMD_Tbl2_Intrinsic;
552 def int_arm64_neon_tbl3 : AdvSIMD_Tbl3_Intrinsic;
553 def int_arm64_neon_tbl4 : AdvSIMD_Tbl4_Intrinsic;
554
555 def int_arm64_neon_tbx1 : AdvSIMD_Tbx1_Intrinsic;
556 def int_arm64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic;
557 def int_arm64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic;
558 def int_arm64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic;
559
560 let TargetPrefix = "arm64" in {
561 class Crypto_AES_DataKey_Intrinsic
562 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
563
564 class Crypto_AES_Data_Intrinsic
565 : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
566
567 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
568 // (v4i32).
569 class Crypto_SHA_5Hash4Schedule_Intrinsic
570 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty],
571 [IntrNoMem]>;
572
573 // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule
574 // (v4i32).
575 class Crypto_SHA_1Hash_Intrinsic
576 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
577
578 // SHA intrinsic taking 8 words of the schedule
579 class Crypto_SHA_8Schedule_Intrinsic
580 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
581
582 // SHA intrinsic taking 12 words of the schedule
583 class Crypto_SHA_12Schedule_Intrinsic
584 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
585 [IntrNoMem]>;
586
587 // SHA intrinsic taking 8 words of the hash and 4 of the schedule.
588 class Crypto_SHA_8Hash4Schedule_Intrinsic
589 : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
590 [IntrNoMem]>;
591 }
592
593 // AES
594 def int_arm64_crypto_aese : Crypto_AES_DataKey_Intrinsic;
595 def int_arm64_crypto_aesd : Crypto_AES_DataKey_Intrinsic;
596 def int_arm64_crypto_aesmc : Crypto_AES_Data_Intrinsic;
597 def int_arm64_crypto_aesimc : Crypto_AES_Data_Intrinsic;
598
599 // SHA1
600 def int_arm64_crypto_sha1c : Crypto_SHA_5Hash4Schedule_Intrinsic;
601 def int_arm64_crypto_sha1p : Crypto_SHA_5Hash4Schedule_Intrinsic;
602 def int_arm64_crypto_sha1m : Crypto_SHA_5Hash4Schedule_Intrinsic;
603 def int_arm64_crypto_sha1h : Crypto_SHA_1Hash_Intrinsic;
604
605 def int_arm64_crypto_sha1su0 : Crypto_SHA_12Schedule_Intrinsic;
606 def int_arm64_crypto_sha1su1 : Crypto_SHA_8Schedule_Intrinsic;
607
608 // SHA256
609 def int_arm64_crypto_sha256h : Crypto_SHA_8Hash4Schedule_Intrinsic;
610 def int_arm64_crypto_sha256h2 : Crypto_SHA_8Hash4Schedule_Intrinsic;
611 def int_arm64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic;
612 def int_arm64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic;
613
614 //===----------------------------------------------------------------------===//
615 // CRC32
616
617 let TargetPrefix = "arm64" in {
618
619 def int_arm64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
620 [IntrNoMem]>;
621 def int_arm64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
622 [IntrNoMem]>;
623 def int_arm64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
624 [IntrNoMem]>;
625 def int_arm64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
626 [IntrNoMem]>;
627 def int_arm64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
628 [IntrNoMem]>;
629 def int_arm64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
630 [IntrNoMem]>;
631 def int_arm64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
632 [IntrNoMem]>;
633 def int_arm64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
634 [IntrNoMem]>;
635 }
167167 case Triple::thumb:
168168 resolveARMRelocation(RE, Value);
169169 break;
170 case Triple::aarch64:
170171 case Triple::arm64:
171 resolveARM64Relocation(RE, Value);
172 resolveAArch64Relocation(RE, Value);
172173 break;
173174 }
174175 }
288289 return false;
289290 }
290291
291 bool RuntimeDyldMachO::resolveARM64Relocation(const RelocationEntry &RE,
292 uint64_t Value) {
292 bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE,
293 uint64_t Value) {
293294 const SectionEntry &Section = Sections[RE.SectionID];
294295 uint8_t* LocalAddress = Section.Address + RE.Offset;
295296
4040 bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value);
4141 bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value);
4242 bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value);
43 bool resolveARM64Relocation(const RelocationEntry &RE, uint64_t Value);
43 bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value);
4444
4545 // Populate stubs in __jump_table section.
4646 void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection,
311311 MCpu = "core2";
312312 else if (Triple.getArch() == llvm::Triple::x86)
313313 MCpu = "yonah";
314 else if (Triple.getArch() == llvm::Triple::arm64)
314 else if (Triple.getArch() == llvm::Triple::arm64 ||
315 Triple.getArch() == llvm::Triple::aarch64)
315316 MCpu = "cyclone";
316317 }
317318
167167 CPU = "core2";
168168 else if (Triple.getArch() == llvm::Triple::x86)
169169 CPU = "yonah";
170 else if (Triple.getArch() == llvm::Triple::arm64)
170 else if (Triple.getArch() == llvm::Triple::arm64 ||
171 Triple.getArch() == llvm::Triple::aarch64)
171172 CPU = "cyclone";
172173 }
173174
2222 IsFunctionEHFrameSymbolPrivate = false;
2323 SupportsWeakOmittedEHFrame = false;
2424
25 if (T.isOSDarwin() && T.getArch() == Triple::arm64)
25 if (T.isOSDarwin() &&
26 (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))
2627 SupportsCompactUnwindWithoutEHFrame = true;
2728
2829 PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
150151 COFFDebugSymbolsSection = nullptr;
151152
152153 if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
153 (T.isOSDarwin() && T.getArch() == Triple::arm64)) {
154 (T.isOSDarwin() &&
155 (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) {
154156 CompactUnwindSection =
155157 Ctx->getMachOSection("__LD", "__compact_unwind",
156158 MachO::S_ATTR_DEBUG,
158160
159161 if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
160162 CompactUnwindDwarfEHFrameOnly = 0x04000000;
161 else if (T.getArch() == Triple::arm64)
163 else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
162164 CompactUnwindDwarfEHFrameOnly = 0x03000000;
163165 }
164166
784786 // cellspu-apple-darwin. Perhaps we should fix in Triple?
785787 if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
786788 Arch == Triple::arm || Arch == Triple::thumb ||
787 Arch == Triple::arm64 ||
789 Arch == Triple::arm64 || Arch == Triple::aarch64 ||
788790 Arch == Triple::ppc || Arch == Triple::ppc64 ||
789791 Arch == Triple::UnknownArch) &&
790792 (T.isOSDarwin() || T.isOSBinFormatMachO())) {
0 //==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in the LLVM
10 // AArch64 back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef TARGET_AArch64_H
15 #define TARGET_AArch64_H
16
17 #include "Utils/AArch64BaseInfo.h"
18 #include "MCTargetDesc/AArch64MCTargetDesc.h"
19 #include "llvm/Target/TargetMachine.h"
20 #include "llvm/Support/DataTypes.h"
21
22 namespace llvm {
23
24 class AArch64TargetMachine;
25 class FunctionPass;
26 class MachineFunctionPass;
27
28 FunctionPass *createAArch64DeadRegisterDefinitions();
29 FunctionPass *createAArch64ConditionalCompares();
30 FunctionPass *createAArch64AdvSIMDScalar();
31 FunctionPass *createAArch64BranchRelaxation();
32 FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
33 CodeGenOpt::Level OptLevel);
34 FunctionPass *createAArch64StorePairSuppressPass();
35 FunctionPass *createAArch64ExpandPseudoPass();
36 FunctionPass *createAArch64LoadStoreOptimizationPass();
37 ModulePass *createAArch64PromoteConstantPass();
38 FunctionPass *createAArch64AddressTypePromotionPass();
39 /// \brief Creates an ARM-specific Target Transformation Info pass.
40 ImmutablePass *
41 createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
42
43 FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
44
45 FunctionPass *createAArch64CollectLOHPass();
46 } // end namespace llvm
47
48 #endif
0 //=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
11
12 //===----------------------------------------------------------------------===//
13 // Target-independent interfaces which we are implementing
14 //===----------------------------------------------------------------------===//
15
16 include "llvm/Target/Target.td"
17
18 //===----------------------------------------------------------------------===//
19 // AArch64 Subtarget features.
20 //
21
22 def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
23 "Enable ARMv8 FP">;
24
25 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
26 "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
27
28 def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
29 "Enable cryptographic instructions">;
30
31 def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
32 "Enable ARMv8 CRC-32 checksum instructions">;
33
34 /// Cyclone has register move instructions which are "free".
35 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
36 "Has zero-cycle register moves">;
37
38 /// Cyclone has instructions which zero registers for "free".
39 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
40 "Has zero-cycle zeroing instructions">;
41
42 //===----------------------------------------------------------------------===//
43 // Register File Description
44 //===----------------------------------------------------------------------===//
45
46 include "AArch64RegisterInfo.td"
47 include "AArch64CallingConvention.td"
48
49 //===----------------------------------------------------------------------===//
50 // Instruction Descriptions
51 //===----------------------------------------------------------------------===//
52
53 include "AArch64Schedule.td"
54 include "AArch64InstrInfo.td"
55
56 def AArch64InstrInfo : InstrInfo;
57
58 //===----------------------------------------------------------------------===//
59 // AArch64 Processors supported.
60 //
61 include "AArch64SchedA53.td"
62 include "AArch64SchedCyclone.td"
63
64 def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
65 "Cortex-A53 ARM processors",
66 [FeatureFPARMv8,
67 FeatureNEON,
68 FeatureCrypto,
69 FeatureCRC]>;
70
71 def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
72 "Cortex-A57 ARM processors",
73 [FeatureFPARMv8,
74 FeatureNEON,
75 FeatureCrypto,
76 FeatureCRC]>;
77
78 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
79 "Cyclone",
80 [FeatureFPARMv8,
81 FeatureNEON,
82 FeatureCrypto,
83 FeatureCRC,
84 FeatureZCRegMove, FeatureZCZeroing]>;
85
86 def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
87 FeatureNEON,
88 FeatureCRC]>;
89
90 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
91 def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>;
92 def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
93
94 //===----------------------------------------------------------------------===//
95 // Assembly parser
96 //===----------------------------------------------------------------------===//
97
98 def GenericAsmParserVariant : AsmParserVariant {
99 int Variant = 0;
100 string Name = "generic";
101 }
102
103 def AppleAsmParserVariant : AsmParserVariant {
104 int Variant = 1;
105 string Name = "apple-neon";
106 }
107
108 //===----------------------------------------------------------------------===//
109 // Assembly printer
110 //===----------------------------------------------------------------------===//
111 // AArch64 Uses the MC printer for asm output, so make sure the TableGen
112 // AsmWriter bits get associated with the correct class.
113 def GenericAsmWriter : AsmWriter {
114 string AsmWriterClassName = "InstPrinter";
115 int Variant = 0;
116 bit isMCAsmWriter = 1;
117 }
118
119 def AppleAsmWriter : AsmWriter {
120 let AsmWriterClassName = "AppleInstPrinter";
121 int Variant = 1;
122 int isMCAsmWriter = 1;
123 }
124
125 //===----------------------------------------------------------------------===//
126 // Target Declaration
127 //===----------------------------------------------------------------------===//
128
129 def AArch64 : Target {
130 let InstructionSet = AArch64InstrInfo;
131 let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
132 let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
133 }
0 //===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to promote the computations use to obtained a sign extended
10 // value used into memory accesses.
11 // E.g.
12 // a = add nsw i32 b, 3
13 // d = sext i32 a to i64
14 // e = getelementptr ..., i64 d
15 //
16 // =>
17 // f = sext i32 b to i64
18 // a = add nsw i64 f, 3
19 // e = getelementptr ..., i64 a
20 //
21 // This is legal to do so if the computations are markers with either nsw or nuw
22 // markers.
23 // Moreover, the current heuristic is simple: it does not create new sext
24 // operations, i.e., it gives up when a sext would have forked (e.g., if
25 // a = add i32 b, c, two sexts are required to promote the computation).
26 //
27 // FIXME: This pass may be useful for other targets too.
28 // ===---------------------------------------------------------------------===//
29
30 #include "AArch64.h"
31 #include "llvm/ADT/DenseMap.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Operator.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Debug.h"
43
44 using namespace llvm;
45
46 #define DEBUG_TYPE "aarch64-type-promotion"
47
48 static cl::opt
49 EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden,
50 cl::desc("Enable the type promotion pass"),
51 cl::init(true));
52 static cl::opt
53 EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
54 cl::desc("Enable merging of redundant sexts when one is dominating"
55 " the other."),
56 cl::init(true));
57
58 //===----------------------------------------------------------------------===//
59 // AArch64AddressTypePromotion
60 //===----------------------------------------------------------------------===//
61
62 namespace llvm {
63 void initializeAArch64AddressTypePromotionPass(PassRegistry &);
64 }
65
66 namespace {
67 class AArch64AddressTypePromotion : public FunctionPass {
68
69 public:
70 static char ID;
71 AArch64AddressTypePromotion()
72 : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
73 initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
74 }
75
76 const char *getPassName() const override {
77 return "AArch64 Address Type Promotion";
78 }
79
80 /// Iterate over the functions and promote the computation of interesting
81 // sext instructions.
82 bool runOnFunction(Function &F) override;
83
84 private:
85 /// The current function.
86 Function *Func;
87 /// Filter out all sexts that does not have this type.
88 /// Currently initialized with Int64Ty.
89 Type *ConsideredSExtType;
90
91 // This transformation requires dominator info.
92 void getAnalysisUsage(AnalysisUsage &AU) const override {
93 AU.setPreservesCFG();
94 AU.addRequired();
95 AU.addPreserved();
96 FunctionPass::getAnalysisUsage(AU);
97 }
98
99 typedef SmallPtrSet SetOfInstructions;
100 typedef SmallVector Instructions;
101 typedef DenseMap ValueToInsts;
102
103 /// Check if it is profitable to move a sext through this instruction.
104 /// Currently, we consider it is profitable if:
105 /// - Inst is used only once (no need to insert truncate).
106 /// - Inst has only one operand that will require a sext operation (we do
107 /// do not create new sext operation).
108 bool shouldGetThrough(const Instruction *Inst);
109
110 /// Check if it is possible and legal to move a sext through this
111 /// instruction.
112 /// Current heuristic considers that we can get through:
113 /// - Arithmetic operation marked with the nsw or nuw flag.
114 /// - Other sext operation.
115 /// - Truncate operation if it was just dropping sign extended bits.
116 bool canGetThrough(const Instruction *Inst);
117
118 /// Move sext operations through safe to sext instructions.
119 bool propagateSignExtension(Instructions &SExtInsts);
120
121 /// Is this sext should be considered for code motion.
122 /// We look for sext with ConsideredSExtType and uses in at least one
123 // GetElementPtrInst.
124 bool shouldConsiderSExt(const Instruction *SExt) const;
125
126 /// Collect all interesting sext operations, i.e., the ones with the right
127 /// type and used in memory accesses.
128 /// More precisely, a sext instruction is considered as interesting if it
129 /// is used in a "complex" getelementptr or it exits at least another
130 /// sext instruction that sign extended the same initial value.
131 /// A getelementptr is considered as "complex" if it has more than 2
132 // operands.
133 void analyzeSExtension(Instructions &SExtInsts);
134
135 /// Merge redundant sign extension operations in common dominator.
136 void mergeSExts(ValueToInsts &ValToSExtendedUses,
137 SetOfInstructions &ToRemove);
138 };
139 } // end anonymous namespace.
140
141 char AArch64AddressTypePromotion::ID = 0;
142
143 INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
144 "AArch64 Type Promotion Pass", false, false)
145 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
146 INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
147 "AArch64 Type Promotion Pass", false, false)
148
149 FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
150 return new AArch64AddressTypePromotion();
151 }
152
153 bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
154 if (isa(Inst))
155 return true;
156
157 const BinaryOperator *BinOp = dyn_cast(Inst);
158 if (BinOp && isa(BinOp) &&
159 (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap()))
160 return true;
161
162 // sext(trunc(sext)) --> sext
163 if (isa(Inst) && isa(Inst->getOperand(0))) {
164 const Instruction *Opnd = cast(Inst->getOperand(0));
165 // Check that the truncate just drop sign extended bits.
166 if (Inst->getType()->getIntegerBitWidth() >=
167 Opnd->getOperand(0)->getType()->getIntegerBitWidth() &&
168 Inst->getOperand(0)->getType()->getIntegerBitWidth() <=
169 ConsideredSExtType->getIntegerBitWidth())
170 return true;
171 }
172
173 return false;
174 }
175
176 bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
177 // If the type of the sext is the same as the considered one, this sext
178 // will become useless.
179 // Otherwise, we will have to do something to preserve the original value,
180 // unless it is used once.
181 if (isa(Inst) &&
182 (Inst->getType() == ConsideredSExtType || Inst->hasOneUse()))
183 return true;
184
185 // If the Inst is used more that once, we may need to insert truncate
186 // operations and we don't do that at the moment.
187 if (!Inst->hasOneUse())
188 return false;
189
190 // This truncate is used only once, thus if we can get thourgh, it will become
191 // useless.
192 if (isa(Inst))
193 return true;
194
195 // If both operands are not constant, a new sext will be created here.
196 // Current heuristic is: each step should be profitable.
197 // Therefore we don't allow to increase the number of sext even if it may
198 // be profitable later on.
199 if (isa(Inst) && isa(Inst->getOperand(1)))
200 return true;
201
202 return false;
203 }
204
205 static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
206 if (isa(Inst) && OpIdx == 0)
207 return false;
208 return true;
209 }
210
211 bool
212 AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
213 if (SExt->getType() != ConsideredSExtType)
214 return false;
215
216 for (const Use &U : SExt->uses()) {
217 if (isa(*U))
218 return true;
219 }
220
221 return false;
222 }
223
224 // Input:
225 // - SExtInsts contains all the sext instructions that are use direclty in
226 // GetElementPtrInst, i.e., access to memory.
227 // Algorithm:
228 // - For each sext operation in SExtInsts:
229 // Let var be the operand of sext.
230 // while it is profitable (see shouldGetThrough), legal, and safe
231 // (see canGetThrough) to move sext through var's definition:
232 // * promote the type of var's definition.
233 // * fold var into sext uses.
234 // * move sext above var's definition.
235 // * update sext operand to use the operand of var that should be sign
236 // extended (by construction there is only one).
237 //
238 // E.g.,
239 // a = ... i32 c, 3
240 // b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
241 // ...
242 // = b
243 // => Yes, update the code
244 // b = sext i32 c to i64
245 // a = ... i64 b, 3
246 // ...
247 // = a
248 // Iterate on 'c'.
249 bool
250 AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
251 DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
252
253 bool LocalChange = false;
254 SetOfInstructions ToRemove;
255 ValueToInsts ValToSExtendedUses;
256 while (!SExtInsts.empty()) {
257 // Get through simple chain.
258 Instruction *SExt = SExtInsts.pop_back_val();
259
260 DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');
261
262 // If this SExt has already been merged continue.
263 if (SExt->use_empty() && ToRemove.count(SExt)) {
264 DEBUG(dbgs() << "No uses => marked as delete\n");
265 continue;
266 }
267
268 // Now try to get through the chain of definitions.
269 while (isa(SExt->getOperand(0))) {
270 Instruction *Inst = dyn_cast(SExt->getOperand(0));
271 DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
272 if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
273 // We cannot get through something that is not an Instruction
274 // or not safe to SExt.
275 DEBUG(dbgs() << "Cannot get through\n");
276 break;
277 }
278
279 LocalChange = true;
280 // If this is a sign extend, it becomes useless.
281 if (isa(Inst) || isa(Inst)) {
282 DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
283 // We cannot use replaceAllUsesWith here because we may trigger some
284 // assertion on the type as all involved sext operation may have not
285 // been moved yet.
286 while (!Inst->use_empty()) {
287 Value::use_iterator UseIt = Inst->use_begin();
288 Instruction *UseInst = dyn_cast(*UseIt);
289 assert(UseInst && "Use of sext is not an Instruction!");
290 UseInst->setOperand(UseIt->getOperandNo(), SExt);
291 }
292 ToRemove.insert(Inst);
293 SExt->setOperand(0, Inst->getOperand(0));
294 SExt->moveBefore(Inst);
295 continue;
296 }
297
298 // Get through the Instruction:
299 // 1. Update its type.
300 // 2. Replace the uses of SExt by Inst.
301 // 3. Sign extend each operand that needs to be sign extended.
302
303 // Step #1.
304 Inst->mutateType(SExt->getType());
305 // Step #2.
306 SExt->replaceAllUsesWith(Inst);
307 // Step #3.
308 Instruction *SExtForOpnd = SExt;
309
310 DEBUG(dbgs() << "Propagate SExt to operands\n");
311 for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
312 ++OpIdx) {
313 DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
314 if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
315 !shouldSExtOperand(Inst, OpIdx)) {
316 DEBUG(dbgs() << "No need to propagate\n");
317 continue;
318 }
319 // Check if we can statically sign extend the operand.
320 Value *Opnd = Inst->getOperand(OpIdx);
321 if (const ConstantInt *Cst = dyn_cast(Opnd)) {
322 DEBUG(dbgs() << "Statically sign extend\n");
323 Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
324 Cst->getSExtValue()));
325 continue;
326 }
327 // UndefValue are typed, so we have to statically sign extend them.
328 if (isa(Opnd)) {
329 DEBUG(dbgs() << "Statically sign extend\n");
330 Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
331 continue;
332 }
333
334 // Otherwise we have to explicity sign extend it.
335 assert(SExtForOpnd &&
336 "Only one operand should have been sign extended");
337
338 SExtForOpnd->setOperand(0, Opnd);
339
340 DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
341 // Move the sign extension before the insertion point.
342 SExtForOpnd->moveBefore(Inst);
343 Inst->setOperand(OpIdx, SExtForOpnd);
344 // If more sext are required, new instructions will have to be created.
345 SExtForOpnd = nullptr;
346 }
347 if (SExtForOpnd == SExt) {
348 DEBUG(dbgs() << "Sign extension is useless now\n");
349 ToRemove.insert(SExt);
350 break;
351 }
352 }
353
354 // If the use is already of the right type, connect its uses to its argument
355 // and delete it.
356 // This can happen for an Instruction which all uses are sign extended.
357 if (!ToRemove.count(SExt) &&
358 SExt->getType() == SExt->getOperand(0)->getType()) {
359 DEBUG(dbgs() << "Sign extension is useless, attach its use to "
360 "its argument\n");
361 SExt->replaceAllUsesWith(SExt->getOperand(0));
362 ToRemove.insert(SExt);
363 } else
364 ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
365 }
366
367 if (EnableMerge)
368 mergeSExts(ValToSExtendedUses, ToRemove);
369
370 // Remove all instructions marked as ToRemove.
371 for (Instruction *I: ToRemove)
372 I->eraseFromParent();
373 return LocalChange;
374 }
375
376 void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
377 SetOfInstructions &ToRemove) {
378 DominatorTree &DT = getAnalysis().getDomTree();
379
380 for (auto &Entry : ValToSExtendedUses) {
381 Instructions &Insts = Entry.second;
382 Instructions CurPts;
383 for (Instruction *Inst : Insts) {
384 if (ToRemove.count(Inst))
385 continue;
386 bool inserted = false;
387 for (auto Pt : CurPts) {
388 if (DT.dominates(Inst, Pt)) {
389 DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
390 << *Inst << '\n');
391 (Pt)->replaceAllUsesWith(Inst);
392 ToRemove.insert(Pt);
393 Pt = Inst;
394 inserted = true;
395 break;
396 }
397 if (!DT.dominates(Pt, Inst))
398 // Give up if we need to merge in a common dominator as the
399 // expermients show it is not profitable.
400 continue;
401
402 DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n"
403 << *Pt << '\n');
404 Inst->replaceAllUsesWith(Pt);
405 ToRemove.insert(Inst);
406 inserted = true;
407 break;
408 }
409 if (!inserted)
410 CurPts.push_back(Inst);
411 }
412 }
413 }
414
415 void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
416 DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
417
418 DenseMap SeenChains;
419
420 for (auto &BB : *Func) {
421 for (auto &II : BB) {
422 Instruction *SExt = &II;
423
424 // Collect all sext operation per type.
425 if (!isa(SExt) || !shouldConsiderSExt(SExt))
426 continue;
427
428 DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n');
429
430 // Cases where we actually perform the optimization:
431 // 1. SExt is used in a getelementptr with more than 2 operand =>
432 // likely we can merge some computation if they are done on 64 bits.
433 // 2. The beginning of the SExt chain is SExt several time. =>
434 // code sharing is possible.
435
436 bool insert = false;
437 // #1.
438 for (const Use &U : SExt->uses()) {
439 const Instruction *Inst = dyn_cast(U);
440 if (Inst && Inst->getNumOperands() > 2) {
441 DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
442 << '\n');
443 insert = true;
444 break;
445 }
446 }
447
448 // #2.
449 // Check the head of the chain.
450 Instruction *Inst = SExt;
451 Value *Last;
452 do {
453 int OpdIdx = 0;
454 const BinaryOperator *BinOp = dyn_cast(Inst);
455 if (BinOp && isa(BinOp->getOperand(0)))
456 OpdIdx = 1;
457 Last = Inst->getOperand(OpdIdx);
458 Inst = dyn_cast(Last);
459 } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst));
460
461 DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n');
462 DenseMap::iterator AlreadySeen =
463 SeenChains.find(Last);
464 if (insert || AlreadySeen != SeenChains.end()) {
465 DEBUG(dbgs() << "Insert\n");
466 SExtInsts.push_back(SExt);
467 if (AlreadySeen != SeenChains.end() && AlreadySeen->second != nullptr) {
468 DEBUG(dbgs() << "Insert chain member\n");
469 SExtInsts.push_back(AlreadySeen->second);
470 SeenChains[Last] = nullptr;
471 }
472 } else {
473 DEBUG(dbgs() << "Record its chain membership\n");
474 SeenChains[Last] = SExt;
475 }
476 }
477 }
478 }
479
480 bool AArch64AddressTypePromotion::runOnFunction(Function &F) {
481 if (!EnableAddressTypePromotion || F.isDeclaration())
482 return false;
483 Func = &F;
484 ConsideredSExtType = Type::getInt64Ty(Func->getContext());
485
486 DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n');
487
488 Instructions SExtInsts;
489 analyzeSExtension(SExtInsts);
490 return propagateSignExtension(SExtInsts);
491 }
0 //===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // When profitable, replace GPR targeting i64 instructions with their
9 // AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined
10 // as minimizing the number of cross-class register copies.
11 //===----------------------------------------------------------------------===//
12
13 //===----------------------------------------------------------------------===//
14 // TODO: Graph based predicate heuristics.
15 // Walking the instruction list linearly will get many, perhaps most, of
16 // the cases, but to do a truly thorough job of this, we need a more
17 // wholistic approach.
18 //
19 // This optimization is very similar in spirit to the register allocator's
20 // spill placement, only here we're determining where to place cross-class
21 // register copies rather than spills. As such, a similar approach is
22 // called for.
23 //
24 // We want to build up a set of graphs of all instructions which are candidates
25 // for transformation along with instructions which generate their inputs and
26 // consume their outputs. For each edge in the graph, we assign a weight
27 // based on whether there is a copy required there (weight zero if not) and
28 // the block frequency of the block containing the defining or using
29 // instruction, whichever is less. Our optimization is then a graph problem
30 // to minimize the total weight of all the graphs, then transform instructions
31 // and add or remove copy instructions as called for to implement the
32 // solution.
33 //===----------------------------------------------------------------------===//
34
35 #include "AArch64.h"
36 #include "AArch64InstrInfo.h"
37 #include "AArch64RegisterInfo.h"
38 #include "llvm/ADT/Statistic.h"
39 #include "llvm/CodeGen/MachineFunctionPass.h"
40 #include "llvm/CodeGen/MachineFunction.h"
41 #include "llvm/CodeGen/MachineInstr.h"
42 #include "llvm/CodeGen/MachineInstrBuilder.h"
43 #include "llvm/CodeGen/MachineRegisterInfo.h"
44 #include "llvm/Support/CommandLine.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Support/raw_ostream.h"
47 using namespace llvm;
48
49 #define DEBUG_TYPE "aarch64-simd-scalar"
50
51 // Allow forcing all i64 operations with equivalent SIMD instructions to use
52 // them. For stress-testing the transformation function.
53 static cl::opt
54 TransformAll("aarch64-simd-scalar-force-all",
55 cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
56 cl::init(false), cl::Hidden);
57
58 STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
59 STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
60 STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
61
62 namespace {
63 class AArch64AdvSIMDScalar : public MachineFunctionPass {
64 MachineRegisterInfo *MRI;
65 const AArch64InstrInfo *TII;
66
67 private:
68 // isProfitableToTransform - Predicate function to determine whether an
69 // instruction should be transformed to its equivalent AdvSIMD scalar
70 // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
71 bool isProfitableToTransform(const MachineInstr *MI) const;
72
73 // transformInstruction - Perform the transformation of an instruction
74 // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
75 // to be the correct register class, minimizing cross-class copies.
76 void transformInstruction(MachineInstr *MI);
77
78 // processMachineBasicBlock - Main optimzation loop.
79 bool processMachineBasicBlock(MachineBasicBlock *MBB);
80
81 public:
82 static char ID; // Pass identification, replacement for typeid.
83 explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
84
85 bool runOnMachineFunction(MachineFunction &F) override;
86
87 const char *getPassName() const override {
88 return "AdvSIMD Scalar Operation Optimization";
89 }
90
91 void getAnalysisUsage(AnalysisUsage &AU) const override {
92 AU.setPreservesCFG();
93 MachineFunctionPass::getAnalysisUsage(AU);
94 }
95 };
96 char AArch64AdvSIMDScalar::ID = 0;
97 } // end anonymous namespace
98
99 static bool isGPR64(unsigned Reg, unsigned SubReg,
100 const MachineRegisterInfo *MRI) {
101 if (SubReg)
102 return false;
103 if (TargetRegisterInfo::isVirtualRegister(Reg))
104 return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass);
105 return AArch64::GPR64RegClass.contains(Reg);
106 }
107
108 static bool isFPR64(unsigned Reg, unsigned SubReg,
109 const MachineRegisterInfo *MRI) {
110 if (TargetRegisterInfo::isVirtualRegister(Reg))
111 return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) &&
112 SubReg == 0) ||
113 (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) &&
114 SubReg == AArch64::dsub);
115 // Physical register references just check the register class directly.
116 return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
117 (AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub);
118 }
119
120 // getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
121 // copy instruction. Return zero_reg if the instruction is not a copy.
122 static unsigned getSrcFromCopy(const MachineInstr *MI,
123 const MachineRegisterInfo *MRI,
124 unsigned &SubReg) {
125 SubReg = 0;
126 // The "FMOV Xd, Dn" instruction is the typical form.
127 if (MI->getOpcode() == AArch64::FMOVDXr ||
128 MI->getOpcode() == AArch64::FMOVXDr)
129 return MI->getOperand(1).getReg();
130 // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
131 // these at this stage, but it's easy to check for.
132 if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
133 SubReg = AArch64::dsub;
134 return MI->getOperand(1).getReg();
135 }
136 // Or just a plain COPY instruction. This can be directly to/from FPR64,
137 // or it can be a dsub subreg reference to an FPR128.
138 if (MI->getOpcode() == AArch64::COPY) {
139 if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
140 MRI) &&
141 isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
142 return MI->getOperand(1).getReg();
143 if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
144 MRI) &&
145 isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
146 MRI)) {
147 SubReg = MI->getOperand(1).getSubReg();
148 return MI->getOperand(1).getReg();
149 }
150 }
151
152 // Otherwise, this is some other kind of instruction.
153 return 0;
154 }
155
156 // getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent
157 // that we're considering transforming to, return that AdvSIMD opcode. For all
158 // others, return the original opcode.
159 static int getTransformOpcode(unsigned Opc) {
160 switch (Opc) {
161 default:
162 break;
163 // FIXME: Lots more possibilities.
164 case AArch64::ADDXrr:
165 return AArch64::ADDv1i64;
166 case AArch64::SUBXrr:
167 return AArch64::SUBv1i64;
168 }
169 // No AdvSIMD equivalent, so just return the original opcode.
170 return Opc;
171 }
172
173 static bool isTransformable(const MachineInstr *MI) {
174 int Opc = MI->getOpcode();
175 return Opc != getTransformOpcode(Opc);
176 }
177
178 // isProfitableToTransform - Predicate function to determine whether an
179 // instruction should be transformed to its equivalent AdvSIMD scalar
180 // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
181 bool
182 AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
183 // If this instruction isn't eligible to be transformed (no SIMD equivalent),
184 // early exit since that's the common case.
185 if (!isTransformable(MI))
186 return false;
187
188 // Count the number of copies we'll need to add and approximate the number
189 // of copies that a transform will enable us to remove.
190 unsigned NumNewCopies = 3;
191 unsigned NumRemovableCopies = 0;
192
193 unsigned OrigSrc0 = MI->getOperand(1).getReg();
194 unsigned OrigSrc1 = MI->getOperand(2).getReg();
195 unsigned Src0 = 0, SubReg0;
196 unsigned Src1 = 0, SubReg1;
197 if (!MRI->def_empty(OrigSrc0)) {
198 MachineRegisterInfo::def_instr_iterator Def =
199 MRI->def_instr_begin(OrigSrc0);
200 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
201 Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
202 // If the source was from a copy, we don't need to insert a new copy.
203 if (Src0)
204 --NumNewCopies;
205 // If there are no other users of the original source, we can delete
206 // that instruction.
207 if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0))
208 ++NumRemovableCopies;
209 }
210 if (!MRI->def_empty(OrigSrc1)) {
211 MachineRegisterInfo::def_instr_iterator Def =
212 MRI->def_instr_begin(OrigSrc1);
213 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
214 Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
215 if (Src1)
216 --NumNewCopies;
217 // If there are no other users of the original source, we can delete
218 // that instruction.
219 if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1))
220 ++NumRemovableCopies;
221 }
222
223 // If any of the uses of the original instructions is a cross class copy,
224 // that's a copy that will be removable if we transform. Likewise, if
225 // any of the uses is a transformable instruction, it's likely the tranforms
226 // will chain, enabling us to save a copy there, too. This is an aggressive
227 // heuristic that approximates the graph based cost analysis described above.
228 unsigned Dst = MI->getOperand(0).getReg();
229 bool AllUsesAreCopies = true;
230 for (MachineRegisterInfo::use_instr_nodbg_iterator
231 Use = MRI->use_instr_nodbg_begin(Dst),
232 E = MRI->use_instr_nodbg_end();
233 Use != E; ++Use) {
234 unsigned SubReg;
235 if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use))
236 ++NumRemovableCopies;
237 // If the use is an INSERT_SUBREG, that's still something that can
238 // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's
239 // preferable to have it use the FPR64 in most cases, as if the source
240 // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
241 // Ditto for a lane insert.
242 else if (Use->getOpcode() == AArch64::INSERT_SUBREG ||
243 Use->getOpcode() == AArch64::INSvi64gpr)
244 ;
245 else
246 AllUsesAreCopies = false;
247 }
248 // If all of the uses of the original destination register are copies to
249 // FPR64, then we won't end up having a new copy back to GPR64 either.
250 if (AllUsesAreCopies)
251 --NumNewCopies;
252
253 // If a transform will not increase the number of cross-class copies required,
254 // return true.
255 if (NumNewCopies <= NumRemovableCopies)
256 return true;
257
258 // Finally, even if we otherwise wouldn't transform, check if we're forcing
259 // transformation of everything.
260 return TransformAll;
261 }
262
263 static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
264 unsigned Dst, unsigned Src, bool IsKill) {
265 MachineInstrBuilder MIB =
266 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
267 Dst)
268 .addReg(Src, getKillRegState(IsKill));
269 DEBUG(dbgs() << " adding copy: " << *MIB);
270 ++NumCopiesInserted;
271 return MIB;
272 }
273
274 // transformInstruction - Perform the transformation of an instruction
275 // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
276 // to be the correct register class, minimizing cross-class copies.
277 void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
278 DEBUG(dbgs() << "Scalar transform: " << *MI);
279
280 MachineBasicBlock *MBB = MI->getParent();
281 int OldOpc = MI->getOpcode();
282 int NewOpc = getTransformOpcode(OldOpc);
283 assert(OldOpc != NewOpc && "transform an instruction to itself?!");
284
285 // Check if we need a copy for the source registers.
286 unsigned OrigSrc0 = MI->getOperand(1).getReg();
287 unsigned OrigSrc1 = MI->getOperand(2).getReg();
288 unsigned Src0 = 0, SubReg0;
289 unsigned Src1 = 0, SubReg1;
290 if (!MRI->def_empty(OrigSrc0)) {
291 MachineRegisterInfo::def_instr_iterator Def =
292 MRI->def_instr_begin(OrigSrc0);
293 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
294 Src0 = getSrcFromCopy(&*Def, MRI, SubReg0);
295 // If there are no other users of the original source, we can delete
296 // that instruction.
297 if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) {
298 assert(Src0 && "Can't delete copy w/o a valid original source!");
299 Def->eraseFromParent();
300 ++NumCopiesDeleted;
301 }
302 }
303 if (!MRI->def_empty(OrigSrc1)) {
304 MachineRegisterInfo::def_instr_iterator Def =
305 MRI->def_instr_begin(OrigSrc1);
306 assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!");
307 Src1 = getSrcFromCopy(&*Def, MRI, SubReg1);
308 // If there are no other users of the original source, we can delete
309 // that instruction.
310 if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) {
311 assert(Src1 && "Can't delete copy w/o a valid original source!");
312 Def->eraseFromParent();
313 ++NumCopiesDeleted;
314 }
315 }
316 // If we weren't able to reference the original source directly, create a
317 // copy.
318 if (!Src0) {
319 SubReg0 = 0;
320 Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
321 insertCopy(TII, MI, Src0, OrigSrc0, true);
322 }
323 if (!Src1) {
324 SubReg1 = 0;
325 Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
326 insertCopy(TII, MI, Src1, OrigSrc1, true);
327 }
328
329 // Create a vreg for the destination.
330 // FIXME: No need to do this if the ultimate user expects an FPR64.
331 // Check for that and avoid the copy if possible.
332 unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
333
334 // For now, all of the new instructions have the same simple three-register
335 // form, so no need to special case based on what instruction we're
336 // building.
337 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst)
338 .addReg(Src0, getKillRegState(true), SubReg0)
339 .addReg(Src1, getKillRegState(true), SubReg1);
340
341 // Now copy the result back out to a GPR.
342 // FIXME: Try to avoid this if all uses could actually just use the FPR64
343 // directly.
344 insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true);
345
346 // Erase the old instruction.
347 MI->eraseFromParent();
348
349 ++NumScalarInsnsUsed;
350 }
351
352 // processMachineBasicBlock - Main optimzation loop.
353 bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
354 bool Changed = false;
355 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
356 MachineInstr *MI = I;
357 ++I;
358 if (isProfitableToTransform(MI)) {
359 transformInstruction(MI);
360 Changed = true;
361 }
362 }
363 return Changed;
364 }
365
366 // runOnMachineFunction - Pass entry point from PassManager.
367 bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
368 bool Changed = false;
369 DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
370
371 const TargetMachine &TM = mf.getTarget();
372 MRI = &mf.getRegInfo();
373 TII = static_cast(TM.getInstrInfo());
374
375 // Just check things on a one-block-at-a-time basis.
376 for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
377 if (processMachineBasicBlock(I))
378 Changed = true;
379 return Changed;
380 }
381
382 // createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine
383 // to add the pass to the PassManager.
384 FunctionPass *llvm::createAArch64AdvSIMDScalar() {
385 return new AArch64AdvSIMDScalar();
386 }
0 //===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to the AArch64 assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64MCInstLower.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "InstPrinter/AArch64InstPrinter.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/CodeGen/AsmPrinter.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/StackMaps.h"
26 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/DebugInfo.h"
30 #include "llvm/MC/MCAsmInfo.h"
31 #include "llvm/MC/MCContext.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstBuilder.h"
34 #include "llvm/MC/MCLinkerOptimizationHint.h"
35 #include "llvm/MC/MCStreamer.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/TargetRegistry.h"
38 using namespace llvm;
39
40 #define DEBUG_TYPE "asm-printer"
41
42 namespace {
43
44 class AArch64AsmPrinter : public AsmPrinter {
45 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
46 /// make the right decision when printing asm code for different targets.
47 const AArch64Subtarget *Subtarget;
48
49 AArch64MCInstLower MCInstLowering;
50 StackMaps SM;
51
52 public:
53 AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
54 : AsmPrinter(TM, Streamer),
55 Subtarget(&TM.getSubtarget()),
56 MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr),
57 LOHLabelCounter(0) {}
58
59 const char *getPassName() const override {
60 return "AArch64 Assembly Printer";
61 }
62
63 /// \brief Wrapper for MCInstLowering.lowerOperand() for the
64 /// tblgen'erated pseudo lowering.
65 bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
66 return MCInstLowering.lowerOperand(MO, MCOp);
67 }
68
69 void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
70 const MachineInstr &MI);
71 void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
72 const MachineInstr &MI);
73 /// \brief tblgen'erated driver function for lowering simple MI->MC
74 /// pseudo instructions.
75 bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
76 const MachineInstr *MI);
77
78 void EmitInstruction(const MachineInstr *MI) override;
79
80 void getAnalysisUsage(AnalysisUsage &AU) const override {
81 AsmPrinter::getAnalysisUsage(AU);
82 AU.setPreservesAll();
83 }
84
85 bool runOnMachineFunction(MachineFunction &F) override {
86 AArch64FI = F.getInfo();
87 return AsmPrinter::runOnMachineFunction(F);
88 }
89
90 private:
91 MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
92 void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
93 bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
94 bool printAsmRegInClass(const MachineOperand &MO,
95 const TargetRegisterClass *RC, bool isVector,
96 raw_ostream &O);
97
98 bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
99 unsigned AsmVariant, const char *ExtraCode,
100 raw_ostream &O) override;
101 bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
102 unsigned AsmVariant, const char *ExtraCode,
103 raw_ostream &O) override;
104
105 void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
106
107 void EmitFunctionBodyEnd() override;
108
109 MCSymbol *GetCPISymbol(unsigned CPID) const override;
110 void EmitEndOfAsmFile(Module &M) override;
111 AArch64FunctionInfo *AArch64FI;
112
113 /// \brief Emit the LOHs contained in AArch64FI.
114 void EmitLOHs();
115
116 typedef std::map MInstToMCSymbol;
117 MInstToMCSymbol LOHInstToLabel;
118 unsigned LOHLabelCounter;
119 };
120
121 } // end of anonymous namespace
122
123 //===----------------------------------------------------------------------===//
124
125 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
126 if (Subtarget->isTargetMachO()) {
127 // Funny Darwin hack: This flag tells the linker that no global symbols
128 // contain code that falls through to other global symbols (e.g. the obvious
129 // implementation of multiple entry points). If this doesn't occur, the
130 // linker can safely perform dead code stripping. Since LLVM never
131 // generates code that does this, it is always safe to set.
132 OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
133 SM.serializeToStackMapSection();
134 }
135
136 // Emit a .data.rel section containing any stubs that were created.
137 if (Subtarget->isTargetELF()) {
138 const TargetLoweringObjectFileELF &TLOFELF =
139 static_cast(getObjFileLowering());
140
141 MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo();
142
143 // Output stubs for external and common global variables.
144 MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
145 if (!Stubs.empty()) {
146 OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
147 const DataLayout *TD = TM.getDataLayout();
148
149 for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
150 OutStreamer.EmitLabel(Stubs[i].first);
151 OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
152 TD->getPointerSize(0));
153 }
154 Stubs.clear();
155 }
156 }
157
158 }
159
160 MachineLocation
161 AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
162 MachineLocation Location;
163 assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
164 // Frame address. Currently handles register +- offset only.
165 if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
166 Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
167 else {
168 DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
169 }
170 return Location;
171 }
172
173 void AArch64AsmPrinter::EmitLOHs() {
174 SmallVector MCArgs;
175
176 for (const auto &D : AArch64FI->getLOHContainer()) {
177 for (const MachineInstr *MI : D.getArgs()) {
178 MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI);
179 assert(LabelIt != LOHInstToLabel.end() &&
180 "Label hasn't been inserted for LOH related instruction");
181 MCArgs.push_back(LabelIt->second);
182 }
183 OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
184 MCArgs.clear();
185 }
186 }
187
188 void AArch64AsmPrinter::EmitFunctionBodyEnd() {
189 if (!AArch64FI->getLOHRelated().empty())
190 EmitLOHs();
191 }
192
193 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
194 MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
195 // Darwin uses a linker-private symbol name for constant-pools (to
196 // avoid addends on the relocation?), ELF has no such concept and
197 // uses a normal private symbol.
198 if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
199 return OutContext.GetOrCreateSymbol(
200 Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
201 Twine(getFunctionNumber()) + "_" + Twine(CPID));
202
203 return OutContext.GetOrCreateSymbol(
204 Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
205 Twine(getFunctionNumber()) + "_" + Twine(CPID));
206 }
207
208 void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
209 raw_ostream &O) {
210 const MachineOperand &MO = MI->getOperand(OpNum);
211 switch (MO.getType()) {
212 default:
213 assert(0 && "");
214 case MachineOperand::MO_Register: {
215 unsigned Reg = MO.getReg();
216 assert(TargetRegisterInfo::isPhysicalRegister(Reg));
217 assert(!MO.getSubReg() && "Subregs should be eliminated!");
218 O << AArch64InstPrinter::getRegisterName(Reg);
219 break;
220 }
221 case MachineOperand::MO_Immediate: {
222 int64_t Imm = MO.getImm();
223 O << '#' << Imm;
224 break;
225 }
226 }
227 }
228
229 bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
230 raw_ostream &O) {
231 unsigned Reg = MO.getReg();
232 switch (Mode) {
233 default:
234 return true; // Unknown mode.
235 case 'w':
236 Reg = getWRegFromXReg(Reg);
237 break;
238 case 'x':
239 Reg = getXRegFromWReg(Reg);
240 break;
241 }
242
243 O << AArch64InstPrinter::getRegisterName(Reg);
244 return false;
245 }
246
247 // Prints the register in MO using class RC using the offset in the
248 // new register class. This should not be used for cross class
249 // printing.
250 bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
251 const TargetRegisterClass *RC,
252 bool isVector, raw_ostream &O) {
253 assert(MO.isReg() && "Should only get here with a register!");
254 const AArch64RegisterInfo *RI =
255 static_cast(TM.getRegisterInfo());
256 unsigned Reg = MO.getReg();
257 unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
258 assert(RI->regsOverlap(RegToPrint, Reg));
259 O << AArch64InstPrinter::getRegisterName(
260 RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName);
261 return false;
262 }
263
264 bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
265 unsigned AsmVariant,
266 const char *ExtraCode, raw_ostream &O) {
267 const MachineOperand &MO = MI->getOperand(OpNum);
268 // Does this asm operand have a single letter operand modifier?
269 if (ExtraCode && ExtraCode[0]) {
270 if (ExtraCode[1] != 0)
271 return true; // Unknown modifier.
272
273 switch (ExtraCode[0]) {
274 default:
275 return true; // Unknown modifier.
276 case 'w': // Print W register
277 case 'x': // Print X register
278 if (MO.isReg())
279 return printAsmMRegister(MO, ExtraCode[0], O);
280 if (MO.isImm() && MO.getImm() == 0) {
281 unsigned Reg = ExtraCode[0] == 'w' ? AArch64::WZR : AArch64::XZR;
282 O << AArch64InstPrinter::getRegisterName(Reg);
283 return false;
284 }
285 printOperand(MI, OpNum, O);
286 return false;
287 case 'b': // Print B register.
288 case 'h': // Print H register.
289 case 's': // Print S register.
290 case 'd': // Print D register.
291 case 'q': // Print Q register.
292 if (MO.isReg()) {
293 const TargetRegisterClass *RC;
294 switch (ExtraCode[0]) {
295 case 'b':
296 RC = &AArch64::FPR8RegClass;
297 break;
298 case 'h':
299 RC = &AArch64::FPR16RegClass;
300 break;
301 case 's':
302 RC = &AArch64::FPR32RegClass;
303 break;
304 case 'd':
305 RC = &AArch64::FPR64RegClass;
306 break;
307 case 'q':
308 RC = &AArch64::FPR128RegClass;
309 break;
310 default:
311 return true;
312 }
313 return printAsmRegInClass(MO, RC, false /* vector */, O);
314 }
315 printOperand(MI, OpNum, O);
316 return false;
317 }
318 }
319
320 // According to ARM, we should emit x and v registers unless we have a
321 // modifier.
322 if (MO.isReg()) {
323 unsigned Reg = MO.getReg();
324
325 // If this is a w or x register, print an x register.
326 if (AArch64::GPR32allRegClass.contains(Reg) ||
327 AArch64::GPR64allRegClass.contains(Reg))
328 return printAsmMRegister(MO, 'x', O);
329
330 // If this is a b, h, s, d, or q register, print it as a v register.
331 return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */,
332 O);
333 }
334
335 printOperand(MI, OpNum, O);
336 return false;
337 }
338
339 bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
340 unsigned OpNum,
341 unsigned AsmVariant,
342 const char *ExtraCode,
343 raw_ostream &O) {
344 if (ExtraCode && ExtraCode[0])
345 return true; // Unknown modifier.
346
347 const MachineOperand &MO = MI->getOperand(OpNum);
348 assert(MO.isReg() && "unexpected inline asm memory operand");
349 O << "[" << AArch64InstPrinter::getRegisterName(MO.getReg()) << "]";
350 return false;
351 }
352
353 void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
354 raw_ostream &OS) {
355 unsigned NOps = MI->getNumOperands();
356 assert(NOps == 4);
357 OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
358 // cast away const; DIetc do not take const operands for some reason.
359 DIVariable V(const_cast(MI->getOperand(NOps - 1).getMetadata()));
360 OS << V.getName();
361 OS << " <- ";
362 // Frame address. Currently handles register +- offset only.
363 assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
364 OS << '[';
365 printOperand(MI, 0, OS);
366 OS << '+';
367 printOperand(MI, 1, OS);
368 OS << ']';
369 OS << "+";
370 printOperand(MI, NOps - 2, OS);
371 }
372
373 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
374 const MachineInstr &MI) {
375 unsigned NumNOPBytes = MI.getOperand(1).getImm();
376
377 SM.recordStackMap(MI);
378 // Emit padding.
379 assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
380 for (unsigned i = 0; i < NumNOPBytes; i += 4)
381 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
382 }
383
384 // Lower a patchpoint of the form:
385 // [], , , ,
386 void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
387 const MachineInstr &MI) {
388 SM.recordPatchPoint(MI);
389
390 PatchPointOpers Opers(&MI);
391
392 int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
393 unsigned EncodedBytes = 0;
394 if (CallTarget) {
395 assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
396 "High 16 bits of call target should be zero.");
397 unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
398 EncodedBytes = 16;
399 // Materialize the jump address:
400 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZWi)
401 .addReg(ScratchReg)
402 .addImm((CallTarget >> 32) & 0xFFFF)
403 .addImm(32));
404 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
405 .addReg(ScratchReg)
406 .addReg(ScratchReg)
407 .addImm((CallTarget >> 16) & 0xFFFF)
408 .addImm(16));
409 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
410 .addReg(ScratchReg)
411 .addReg(ScratchReg)
412 .addImm(CallTarget & 0xFFFF)
413 .addImm(0));
414 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg));
415 }
416 // Emit padding.
417 unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
418 assert(NumBytes >= EncodedBytes &&
419 "Patchpoint can't request size less than the length of a call.");
420 assert((NumBytes - EncodedBytes) % 4 == 0 &&
421 "Invalid number of NOP bytes requested!");
422 for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
423 EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
424 }
425
426 // Simple pseudo-instructions have their lowering (with expansion to real
427 // instructions) auto-generated.
428 #include "AArch64GenMCPseudoLowering.inc"
429
430 void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
431 // Do any auto-generated pseudo lowerings.
432 if (emitPseudoExpansionLowering(OutStreamer, MI))
433 return;
434
435 if (AArch64FI->getLOHRelated().count(MI)) {
436 // Generate a label for LOH related instruction
437 MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
438 // Associate the instruction with the label
439 LOHInstToLabel[MI] = LOHLabel;
440 OutStreamer.EmitLabel(LOHLabel);
441 }
442
443 // Do any manual lowerings.
444 switch (MI->getOpcode()) {
445 default:
446 break;
447 case AArch64::DBG_VALUE: {
448 if (isVerbose() && OutStreamer.hasRawTextSupport()) {
449 SmallString<128> TmpStr;
450 raw_svector_ostream OS(TmpStr);
451 PrintDebugValueComment(MI, OS);
452 OutStreamer.EmitRawText(StringRef(OS.str()));
453 }
454 return;
455 }
456
457 // Tail calls use pseudo instructions so they have the proper code-gen
458 // attributes (isCall, isReturn, etc.). We lower them to the real
459 // instruction here.
460 case AArch64::TCRETURNri: {
461 MCInst TmpInst;
462 TmpInst.setOpcode(AArch64::BR);
463 TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
464 EmitToStreamer(OutStreamer, TmpInst);
465 return;
466 }
467 case AArch64::TCRETURNdi: {
468 MCOperand Dest;
469 MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
470 MCInst TmpInst;
471 TmpInst.setOpcode(AArch64::B);
472 TmpInst.addOperand(Dest);
473 EmitToStreamer(OutStreamer, TmpInst);
474 return;
475 }
476 case AArch64::TLSDESC_BLR: {
477 MCOperand Callee, Sym;
478 MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
479 MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
480
481 // First emit a relocation-annotation. This expands to no code, but requests
482 // the following instruction gets an R_AARCH64_TLSDESC_CALL.
483 MCInst TLSDescCall;
484 TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
485 TLSDescCall.addOperand(Sym);
486 EmitToStreamer(OutStreamer, TLSDescCall);
487
488 // Other than that it's just a normal indirect call to the function loaded
489 // from the descriptor.
490 MCInst BLR;
491 BLR.setOpcode(AArch64::BLR);
492 BLR.addOperand(Callee);
493 EmitToStreamer(OutStreamer, BLR);
494
495 return;
496 }
497
498 case TargetOpcode::STACKMAP:
499 return LowerSTACKMAP(OutStreamer, SM, *MI);
500
501 case TargetOpcode::PATCHPOINT:
502 return LowerPATCHPOINT(OutStreamer, SM, *MI);
503 }
504
505 // Finally, do the automated lowerings for everything else.
506 MCInst TmpInst;
507 MCInstLowering.Lower(MI, TmpInst);
508 EmitToStreamer(OutStreamer, TmpInst);
509 }
510
511 // Force static initialization.
512 extern "C" void LLVMInitializeAArch64AsmPrinter() {
513 RegisterAsmPrinter X(TheAArch64leTarget);
514 RegisterAsmPrinter Y(TheAArch64beTarget);
515
516 RegisterAsmPrinter Z(TheARM64leTarget);
517 RegisterAsmPrinter W(TheARM64beTarget);
518 }
0 //===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64MachineFunctionInfo.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/CodeGen/MachineFunctionPass.h"
16 #include "llvm/CodeGen/MachineInstrBuilder.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/Format.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Support/CommandLine.h"
23 using namespace llvm;
24
25 #define DEBUG_TYPE "aarch64-branch-relax"
26
27 static cl::opt
28 BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true),
29 cl::desc("Relax out of range conditional branches"));
30
31 static cl::opt
32 TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
33 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
34
35 static cl::opt
36 CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
37 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
38
39 static cl::opt
40 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
41 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
42
43 STATISTIC(NumSplit, "Number of basic blocks split");
44 STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
45
46 namespace {
47 class AArch64BranchRelaxation : public MachineFunctionPass {
48 /// BasicBlockInfo - Information about the offset and size of a single
49 /// basic block.
50 struct BasicBlockInfo {
51 /// Offset - Distance from the beginning of the function to the beginning
52 /// of this basic block.
53 ///
54 /// The offset is always aligned as required by the basic block.
55 unsigned Offset;
56
57 /// Size - Size of the basic block in bytes. If the block contains
58 /// inline assembly, this is a worst case estimate.
59 ///
60 /// The size does not include any alignment padding whether from the
61 /// beginning of the block, or from an aligned jump table at the end.
62 unsigned Size;
63
64 BasicBlockInfo() : Offset(0), Size(0) {}
65
66 /// Compute the offset immediately following this block. If LogAlign is
67 /// specified, return the offset the successor block will get if it has
68 /// this alignment.
69 unsigned postOffset(unsigned LogAlign = 0) const {
70 unsigned PO = Offset + Size;
71 unsigned Align = 1 << LogAlign;
72 return (PO + Align - 1) / Align * Align;
73 }
74 };
75
76 SmallVector BlockInfo;
77
78 MachineFunction *MF;
79 const AArch64InstrInfo *TII;
80
81 bool relaxBranchInstructions();
82 void scanFunction();
83 MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
84 void adjustBlockOffsets(MachineBasicBlock &MBB);
85 bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
86 bool fixupConditionalBranch(MachineInstr *MI);
87 void computeBlockSize(const MachineBasicBlock &MBB);
88 unsigned getInstrOffset(MachineInstr *MI) const;
89 void dumpBBs();
90 void verify();
91
92 public:
93 static char ID;
94 AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
95
96 bool runOnMachineFunction(MachineFunction &MF) override;
97
98 const char *getPassName() const override {
99 return "AArch64 branch relaxation pass";
100 }
101 };
102 char AArch64BranchRelaxation::ID = 0;
103 }
104
105 /// verify - check BBOffsets, BBSizes, alignment of islands
106 void AArch64BranchRelaxation::verify() {
107 #ifndef NDEBUG
108 unsigned PrevNum = MF->begin()->getNumber();
109 for (MachineBasicBlock &MBB : *MF) {
110 unsigned Align = MBB.getAlignment();
111 unsigned Num = MBB.getNumber();
112 assert(BlockInfo[Num].Offset % (1u << Align) == 0);
113 assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
114 PrevNum = Num;
115 }
116 #endif
117 }
118
119 /// print block size and offset information - debugging
120 void AArch64BranchRelaxation::dumpBBs() {
121 for (auto &MBB : *MF) {
122 const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
123 dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
124 << format("size=%#x\n", BBI.Size);
125 }
126 }
127
128 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
129 /// into the block immediately after it.
130 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
131 // Get the next machine basic block in the function.
132 MachineFunction::iterator MBBI = MBB;
133 // Can't fall off end of function.
134 MachineBasicBlock *NextBB = std::next(MBBI);
135 if (NextBB == MBB->getParent()->end())
136 return false;
137
138 for (MachineBasicBlock *S : MBB->successors())
139 if (S == NextBB)
140 return true;
141
142 return false;
143 }
144
145 /// scanFunction - Do the initial scan of the function, building up
146 /// information about each block.
147 void AArch64BranchRelaxation::scanFunction() {
148 BlockInfo.clear();
149 BlockInfo.resize(MF->getNumBlockIDs());
150
151 // First thing, compute the size of all basic blocks, and see if the function
152 // has any inline assembly in it. If so, we have to be conservative about
153 // alignment assumptions, as we don't know for sure the size of any
154 // instructions in the inline assembly.
155 for (MachineBasicBlock &MBB : *MF)
156 computeBlockSize(MBB);
157
158 // Compute block offsets and known bits.
159 adjustBlockOffsets(*MF->begin());
160 }
161
162 /// computeBlockSize - Compute the size for MBB.
163 /// This function updates BlockInfo directly.
164 void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
165 unsigned Size = 0;
166 for (const MachineInstr &MI : MBB)
167 Size += TII->GetInstSizeInBytes(&MI);
168 BlockInfo[MBB.getNumber()].Size = Size;
169 }
170
171 /// getInstrOffset - Return the current offset of the specified machine
172 /// instruction from the start of the function. This offset changes as stuff is
173 /// moved around inside the function.
174 unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
175 MachineBasicBlock *MBB = MI->getParent();
176
177 // The offset is composed of two things: the sum of the sizes of all MBB's
178 // before this instruction's block, and the offset from the start of the block
179 // it is in.
180 unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
181
182 // Sum instructions before MI in MBB.
183 for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
184 assert(I != MBB->end() && "Didn't find MI in its own basic block?");
185 Offset += TII->GetInstSizeInBytes(I);
186 }
187 return Offset;
188 }
189
190 void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
191 unsigned PrevNum = Start.getNumber();
192 for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
193 unsigned Num = MBB.getNumber();
194 if (!Num) // block zero is never changed from offset zero.
195 continue;
196 // Get the offset and known bits at the end of the layout predecessor.
197 // Include the alignment of the current block.
198 unsigned LogAlign = MBB.getAlignment();
199 BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
200 PrevNum = Num;
201 }
202 }
203
204 /// Split the basic block containing MI into two blocks, which are joined by
205 /// an unconditional branch. Update data structures and renumber blocks to
206 /// account for this change and returns the newly created block.
207 /// NOTE: Successor list of the original BB is out of date after this function,
208 /// and must be updated by the caller! Other transforms follow using this
209 /// utility function, so no point updating now rather than waiting.
210 MachineBasicBlock *
211 AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
212 MachineBasicBlock *OrigBB = MI->getParent();
213
214 // Create a new MBB for the code after the OrigBB.
215 MachineBasicBlock *NewBB =
216 MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
217 MachineFunction::iterator MBBI = OrigBB;
218 ++MBBI;
219 MF->insert(MBBI, NewBB);
220
221 // Splice the instructions starting with MI over to NewBB.
222 NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
223
224 // Add an unconditional branch from OrigBB to NewBB.
225 // Note the new unconditional branch is not being recorded.
226 // There doesn't seem to be meaningful DebugInfo available; this doesn't
227 // correspond to anything in the source.
228 BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB);
229
230 // Insert an entry into BlockInfo to align it properly with the block numbers.
231 BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
232
233 // Figure out how large the OrigBB is. As the first half of the original
234 // block, it cannot contain a tablejump. The size includes
235 // the new jump we added. (It should be possible to do this without
236 // recounting everything, but it's very confusing, and this is rarely
237 // executed.)
238 computeBlockSize(*OrigBB);
239
240 // Figure out how large the NewMBB is. As the second half of the original
241 // block, it may contain a tablejump.
242 computeBlockSize(*NewBB);
243
244 // All BBOffsets following these blocks must be modified.
245 adjustBlockOffsets(*OrigBB);
246
247 ++NumSplit;
248
249 return NewBB;
250 }
251
252 /// isBlockInRange - Returns true if the distance between specific MI and
253 /// specific BB can fit in MI's displacement field.
254 bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI,
255 MachineBasicBlock *DestBB,
256 unsigned Bits) {
257 unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
258 unsigned BrOffset = getInstrOffset(MI);
259 unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
260
261 DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
262 << " from BB#" << MI->getParent()->getNumber()
263 << " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
264 << " to " << DestOffset << " offset "
265 << int(DestOffset - BrOffset) << "\t" << *MI);
266
267 // Branch before the Dest.
268 if (BrOffset <= DestOffset)
269 return (DestOffset - BrOffset <= MaxOffs);
270 return (BrOffset - DestOffset <= MaxOffs);
271 }
272
273 static bool isConditionalBranch(unsigned Opc) {
274 switch (Opc) {
275 default:
276 return false;
277 case AArch64::TBZW:
278 case AArch64::TBNZW:
279 case AArch64::TBZX:
280 case AArch64::TBNZX:
281 case AArch64::CBZW:
282 case AArch64::CBNZW:
283 case AArch64::CBZX:
284 case AArch64::CBNZX:
285 case AArch64::Bcc:
286 return true;
287 }
288 }
289
290 static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
291 switch (MI->getOpcode()) {
292 default:
293 assert(0 && "unexpected opcode!");
294 case AArch64::TBZW:
295 case AArch64::TBNZW:
296 case AArch64::TBZX:
297 case AArch64::TBNZX:
298 return MI->getOperand(2).getMBB();
299 case AArch64::CBZW:
300 case AArch64::CBNZW:
301 case AArch64::CBZX:
302 case AArch64::CBNZX:
303 case AArch64::Bcc:
304 return MI->getOperand(1).getMBB();
305 }
306 }
307
308 static unsigned getOppositeConditionOpcode(unsigned Opc) {
309 switch (Opc) {
310 default:
311 assert(0 && "unexpected opcode!");
312 case AArch64::TBNZW: return AArch64::TBZW;
313 case AArch64::TBNZX: return AArch64::TBZX;
314 case AArch64::TBZW: return AArch64::TBNZW;
315 case AArch64::TBZX: return AArch64::TBNZX;
316 case AArch64::CBNZW: return AArch64::CBZW;
317 case AArch64::CBNZX: return AArch64::CBZX;
318 case AArch64::CBZW: return AArch64::CBNZW;
319 case AArch64::CBZX: return AArch64::CBNZX;
320 case AArch64::Bcc: return AArch64::Bcc; // Condition is an operand for Bcc.
321 }
322 }
323
324 static unsigned getBranchDisplacementBits(unsigned Opc) {
325 switch (Opc) {
326