llvm.org GIT mirror llvm / testing
Creating branches/google/testing and tags/google/testing/2017-11-14 from r317716 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/testing@318248 91177308-0d34-0410-b5e6-96231b3b80d8 David L. Jones 1 year, 10 months ago
866 changed file(s) with 36997 addition(s) and 14516 deletion(s). Raw diff Collapse all Expand all
11
22 cmake_minimum_required(VERSION 3.4.3)
33
4 if(POLICY CMP0022)
5 cmake_policy(SET CMP0022 NEW) # automatic when 2.8.12 is required
6 endif()
7
8 if (POLICY CMP0051)
9 # CMake 3.1 and higher include generator expressions of the form
10 # $ in the SOURCES property. These need to be
11 # stripped everywhere that access the SOURCES property, so we just
12 # defer to the OLD behavior of not including generator expressions
13 # in the output for now.
14 cmake_policy(SET CMP0051 OLD)
15 endif()
16
17 if(POLICY CMP0056)
18 cmake_policy(SET CMP0056 NEW)
19 endif()
20
21 if(POLICY CMP0057)
22 cmake_policy(SET CMP0057 NEW)
23 endif()
4 cmake_policy(SET CMP0022 NEW)
5
6 cmake_policy(SET CMP0048 NEW)
7
8 # CMake 3.1 and higher include generator expressions of the form
9 # $ in the SOURCES property. These need to be
10 # stripped everywhere that access the SOURCES property, so we just
11 # defer to the OLD behavior of not including generator expressions
12 # in the output for now.
13 cmake_policy(SET CMP0051 OLD)
14
15 cmake_policy(SET CMP0056 NEW)
16
17 cmake_policy(SET CMP0057 NEW)
2418
2519 if(NOT DEFINED LLVM_VERSION_MAJOR)
2620 set(LLVM_VERSION_MAJOR 6)
3327 endif()
3428 if(NOT DEFINED LLVM_VERSION_SUFFIX)
3529 set(LLVM_VERSION_SUFFIX svn)
36 endif()
37
38 if (POLICY CMP0048)
39 cmake_policy(SET CMP0048 NEW)
40 set(cmake_3_0_PROJ_VERSION
41 VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH})
42 set(cmake_3_0_LANGUAGES LANGUAGES)
4330 endif()
4431
4532 if (NOT PACKAGE_VERSION)
5542 endif()
5643
5744 project(LLVM
58 ${cmake_3_0_PROJ_VERSION}
59 ${cmake_3_0_LANGUAGES}
60 C CXX ASM)
45 VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}
46 LANGUAGES C CXX ASM)
6147
6248 if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
6349 message(STATUS "No build type selected, default to Debug")
191177 # Generate a CompilationDatabase (compile_commands.json file) for our build,
192178 # for use by clang_complete, YouCompleteMe, etc.
193179 set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
180
181 option(LLVM_INSTALL_BINUTILS_SYMLINKS
182 "Install symlinks from the binutils tool names to the corresponding LLVM tools." OFF)
194183
195184 option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF)
196185
765754 add_custom_target(srpm
766755 COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES
767756 COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE})
757 set_target_properties(srpm PROPERTIES FOLDER "Misc")
768758
769759
770760 # They are not referenced. See set_output_directory().
977967 # Installing the headers needs to depend on generating any public
978968 # tablegen'd headers.
979969 add_custom_target(llvm-headers DEPENDS intrinsics_gen)
970 set_target_properties(llvm-headers PROPERTIES FOLDER "Misc")
980971
981972 if (NOT CMAKE_CONFIGURATION_TYPES)
982973 add_custom_target(install-llvm-headers
156156 set(HAVE_TERMINFO 0)
157157 endif()
158158
159 find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2)
159 find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c)
160160 set(LLVM_LIBXML2_ENABLED 0)
161161 set(LIBXML2_FOUND 0)
162162 if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE))
220220 COMMAND "${CMAKE_COMMAND}" "-E" "make_directory" "${LLVM_LIBRARY_DIR}/ocaml/llvm")
221221 add_custom_target("ocaml_all")
222222 set_target_properties(ocaml_all PROPERTIES FOLDER "Misc")
223 set_target_properties(ocaml_make_directory PROPERTIES FOLDER "Misc")
1515 COMMAND ${CMAKE_COMMAND} -E make_directory ${LLVM_${target_name}_BUILD}
1616 COMMENT "Creating ${LLVM_${target_name}_BUILD}...")
1717
18 add_custom_target(CREATE_LLVM_${target_name}
19 DEPENDS ${LLVM_${target_name}_BUILD})
20
1821 add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt
1922 COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
2023 ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR}
2124 -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
2225 WORKING_DIRECTORY ${LLVM_${target_name}_BUILD}
23 DEPENDS ${LLVM_${target_name}_BUILD}
26 DEPENDS CREATE_LLVM_${target_name}
2427 COMMENT "Configuring ${target_name} LLVM...")
2528
2629 add_custom_target(CONFIGURE_LLVM_${target_name}
231231 set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -m32")
232232 set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -m32")
233233 set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -m32")
234
235 # FIXME: CMAKE_SIZEOF_VOID_P is still 8
236 add_definitions(-D_LARGEFILE_SOURCE)
237 add_definitions(-D_FILE_OFFSET_BITS=64)
234238 endif( LLVM_BUILD_32_BITS )
235239 endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
236240
241245 set(LLVM_FORCE_SMALLFILE_FOR_ANDROID TRUE)
242246 endif()
243247 if( CMAKE_SIZEOF_VOID_P EQUAL 4 AND NOT LLVM_FORCE_SMALLFILE_FOR_ANDROID)
248 # FIXME: It isn't handled in LLVM_BUILD_32_BITS.
244249 add_definitions( -D_LARGEFILE_SOURCE )
245250 add_definitions( -D_FILE_OFFSET_BITS=64 )
246251 endif()
109109 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${target} PARENT_SCOPE)
110110 endfunction()
111111
112 if(LLVM_USE_HOST_TOOLS AND NOT TARGET NATIVE_LIB_LLVMTABLEGEN)
113 llvm_ExternalProject_BuildCmd(tblgen_build_cmd LLVMSupport
114 ${LLVM_NATIVE_BUILD}
115 CONFIGURATION Release)
116 add_custom_command(OUTPUT LIB_LLVMTABLEGEN
117 COMMAND ${tblgen_build_cmd}
118 DEPENDS CONFIGURE_LLVM_NATIVE
119 WORKING_DIRECTORY ${LLVM_NATIVE_BUILD}
120 COMMENT "Building libLLVMTableGen for native TableGen..."
121 USES_TERMINAL)
122 add_custom_target(NATIVE_LIB_LLVMTABLEGEN DEPENDS LIB_LLVMTABLEGEN)
123 endif()
124
125112 macro(add_tablegen target project)
126113 set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS})
127114 set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
165152 CONFIGURATION Release)
166153 add_custom_command(OUTPUT ${${project}_TABLEGEN_EXE}
167154 COMMAND ${tblgen_build_cmd}
168 DEPENDS ${target} NATIVE_LIB_LLVMTABLEGEN
155 DEPENDS CONFIGURE_LLVM_NATIVE ${target}
169156 WORKING_DIRECTORY ${LLVM_NATIVE_BUILD}
170157 COMMENT "Building native TableGen..."
171158 USES_TERMINAL)
222222 **LLVM_INCLUDE_TOOLS**:BOOL
223223 Generate build targets for the LLVM tools. Defaults to ON. You can use this
224224 option to disable the generation of build targets for the LLVM tools.
225
226 **LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL
227 Install symlinks from the binutils tool names to the corresponding LLVM tools.
228 For example, ar will be symlinked to llvm-ar.
225229
226230 **LLVM_BUILD_EXAMPLES**:BOOL
227231 Build LLVM examples. Defaults to OFF. Targets for building each example are
22 if (LLVM_ENABLE_DOXYGEN)
33 set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR})
44 set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR})
5
5
66 if (HAVE_DOT)
77 set(DOT ${LLVM_PATH_DOT})
88 endif()
2020 set(enable_external_search "NO")
2121 set(extra_search_mappings "")
2222 endif()
23
23
2424 # If asked, configure doxygen for the creation of a Qt Compressed Help file.
2525 option(LLVM_ENABLE_DOXYGEN_QT_HELP
2626 "Generate a Qt Compressed Help file." OFF)
2727 if (LLVM_ENABLE_DOXYGEN_QT_HELP)
2828 set(LLVM_DOXYGEN_QCH_FILENAME "org.llvm.qch" CACHE STRING
2929 "Filename of the Qt Compressed help file")
30 set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING
30 set(LLVM_DOXYGEN_QHP_NAMESPACE "org.llvm" CACHE STRING
3131 "Namespace under which the intermediate Qt Help Project file lives")
3232 set(LLVM_DOXYGEN_QHP_CUST_FILTER_NAME "${PACKAGE_STRING}" CACHE STRING
3333 "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters")
3434 set(LLVM_DOXYGEN_QHP_CUST_FILTER_ATTRS "${PACKAGE_NAME},${PACKAGE_VERSION}" CACHE STRING
3535 "See http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes")
36 find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator
36 find_program(LLVM_DOXYGEN_QHELPGENERATOR_PATH qhelpgenerator
3737 DOC "Path to the qhelpgenerator binary")
3838 if (NOT LLVM_DOXYGEN_QHELPGENERATOR_PATH)
3939 message(FATAL_ERROR "Failed to find qhelpgenerator binary")
5454 set(llvm_doxygen_qhp_cust_filter_name "")
5555 set(llvm_doxygen_qhp_cust_filter_attrs "")
5656 endif()
57
57
5858 option(LLVM_DOXYGEN_SVG
5959 "Use svg instead of png files for doxygen graphs." OFF)
6060 if (LLVM_DOXYGEN_SVG)
112112 if (${SPHINX_OUTPUT_MAN})
113113 add_sphinx_target(man llvm)
114114 add_sphinx_target(man llvm-dwarfdump)
115 add_sphinx_target(man dsymutil)
115116 endif()
116117
117118 endif()
8484 remain set throughout the file.
8585
8686 All other variables get undefined after each encountered ``CHECK-LABEL``.
87
88 .. option:: -D
89
90 Sets a filecheck variable ``VAR`` with value ``VALUE`` that can be used in
91 ``CHECK:`` lines.
8792
8893 .. option:: -version
8994
0 dsymutil - manipulate archived DWARF debug symbol files
1 =======================================================
2
3 SYNOPSIS
4 --------
5
6 | :program:`dsymutil` [*options*] *executable*
7
8 DESCRIPTION
9 -----------
10
11 :program:`dsymutil` links the DWARF debug information found in the object files
12 for an executable *executable* by using debug symbols information contained in
13 its symbol table. By default, the linked debug information is placed in a
14 ``.dSYM`` bundle with the same name as the executable.
15
16 OPTIONS
17 -------
18 .. option:: --arch=
19
20 Link DWARF debug information only for specified CPU architecture types.
21 Architectures may be specified by name. When using this option, an error will
22 be returned if any architectures can not be properly linked. This option can
23 be specified multiple times, once for each desired architecture. All CPU
24 architectures will be linked by default and any architectures that can't be
25 properly linked will cause :program:`dsymutil` to return an error.
26
27 .. option:: --dump-debug-map
28
29 Dump the *executable*'s debug-map (the list of the object files containing the
30 debug information) in YAML format and exit. Not DWARF link will take place.
31
32 .. option:: -f, --flat
33
34 Produce a flat dSYM file. A ``.dwarf`` extension will be appended to the
35 executable name unless the output file is specified using the -o option.
36
37 .. option:: --no-odr
38
39 Do not use ODR (One Definition Rule) for uniquing C++ types.
40
41 .. option:: --no-output
42
43 Do the link in memory, but do not emit the result file.
44
45 .. option:: --no-swiftmodule-timestamp
46
47 Don't check the timestamp for swiftmodule files.
48
49 .. option:: -j , --num-threads=
50
51 Specifies the maximum number (``n``) of simultaneous threads to use when
52 linking multiple architectures.
53
54 .. option:: -o
55
56 Specifies an alternate ``path`` to place the dSYM bundle. The default dSYM
57 bundle path is created by appending ``.dSYM`` to the executable name.
58
59 .. option:: --oso-prepend-path=
60
61 Specifies a ``path`` to prepend to all debug symbol object file paths.
62
63 .. option:: -s, --symtab
64
65 Dumps the symbol table found in *executable* or object file(s) and exits.
66
67 .. option:: -v, --verbose
68
69 Display verbose information when linking.
70
71 .. option:: --version
72
73 Display the version of the tool.
74
75 .. option:: -y
76
77 Treat *executable* as a YAML debug-map rather than an executable.
78
79 EXIT STATUS
80 -----------
81
82 :program:`dsymutil` returns 0 if the DWARF debug information was linked
83 successfully. Otherwise, it returns 1.
84
85 SEE ALSO
86 --------
87
88 :manpage:`llvm-dwarfdump(1)`
2929 llvm-stress
3030 llvm-symbolizer
3131 llvm-dwarfdump
32 dsymutil
3233
3334 Debugging Tools
3435 ~~~~~~~~~~~~~~~
121121
122122 Choose the code model from:
123123
124 .. code-block:: perl
124 .. code-block:: text
125125
126126 default: Target default code model
127127 small: Small code model
153153
154154 Instruction schedulers available (before register allocation):
155155
156 .. code-block:: perl
156 .. code-block:: text
157157
158158 =default: Best scheduler for the target
159159 =none: No scheduling: breadth first sequencing
167167
168168 Register allocator to use (default=linearscan)
169169
170 .. code-block:: perl
170 .. code-block:: text
171171
172172 =bigblock: Big-block register allocator
173173 =linearscan: linear scan register allocator =local - local register allocator
177177
178178 Choose relocation model from:
179179
180 .. code-block:: perl
180 .. code-block:: text
181181
182182 =default: Target default relocation model
183183 =static: Non-relocatable code =pic - Fully relocatable, position independent code
187187
188188 Spiller to use (default=local)
189189
190 .. code-block:: perl
190 .. code-block:: text
191191
192192 =simple: simple spiller
193193 =local: local spiller
196196
197197 Choose style of code to emit from X86 backend:
198198
199 .. code-block:: perl
199 .. code-block:: text
200200
201201 =att: Emit AT&T-style assembly
202202 =intel: Emit Intel-style assembly
141141
142142 Displays class definitions in the specified format.
143143
144 .. code-block:: perl
144 .. code-block:: text
145145
146146 =all - Display all class members including data, constants, typedefs, functions, etc (default)
147147 =layout - Only display members that contribute to class size.
151151
152152 Displays classes in the specified order.
153153
154 .. code-block:: perl
154 .. code-block:: text
155155
156156 =none - Undefined / no particular sort order (default)
157157 =name - Sort classes by name
199199 Type of symbols to dump when -globals, -externals, or -module-syms is
200200 specified. (default all)
201201
202 .. code-block:: perl
202 .. code-block:: text
203203
204204 =thunks - Display thunk symbols
205205 =data - Display data symbols
211211 For symbols dumped via the -module-syms, -globals, or -externals options, sort
212212 the results in specified order.
213213
214 .. code-block:: perl
214 .. code-block:: text
215215
216216 =none - Undefined / no particular sort order
217217 =name - Sort symbols by name
195195 In order to access the 18th integer in the array, you would need to do the
196196 following:
197197
198 .. code-block:: llvm
198 .. code-block:: text
199199
200200 %idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0
201201 %arr = load [40 x i32]** %idx
0 ===================================================================
1 How to Cross Compile Compiler-rt Builtins For Arm
2 ===================================================================
3
4 Introduction
5 ============
6
7 This document contains information about building and testing the builtins part
8 of compiler-rt for an Arm target, from an x86_64 Linux machine.
9
10 While this document concentrates on Arm and Linux the general principles should
11 apply to other targets supported by compiler-rt. Further contributions for other
12 targets are welcome.
13
14 The instructions in this document depend on libraries and programs external to
15 LLVM, there are many ways to install and configure these dependencies so you
16 may need to adapt the instructions here to fit your own local situation.
17
18 Prerequisites
19 =============
20
21 In this use case we'll be using CMake on a Debian-based Linux system,
22 cross-compiling from an x86_64 host to a hard-float Armv7-A target. We'll be
23 using as many of the LLVM tools as we can, but it is possible to use GNU
24 equivalents.
25
26 * ``A build of LLVM/clang for the llvm-tools and llvm-config``
27 * ``The qemu-arm user mode emulator``
28 * ``An arm-linux-gnueabihf sysroot``
29
30 See https://compiler-rt.llvm.org/ for more information about the dependencies
31 on clang and LLVM.
32
33 ``qemu-arm`` should be available as a package for your Linux distribution.
34
35 The most complicated of the prequisites to satisfy is the arm-linux-gnueabihf
36 sysroot. The :doc:`HowToCrossCompileLLVM` has information about how to use the
37 Linux distributions multiarch support to fulfill the dependencies for building
38 LLVM. Alternatively, as building and testing just the compiler-rt builtins
39 requires fewer dependencies than LLVM, it is possible to use the Linaro
40 arm-linux-gnueabihf gcc installation as our sysroot.
41
42 Building compiler-rt builtins for Arm
43 =====================================
44 We will be doing a standalone build of compiler-rt using the following cmake
45 options.
46
47 * ``path/to/llvm/projects/compiler-rt``
48 * ``-DCOMPILER_RT_BUILD_BUILTINS=ON``
49 * ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF``
50 * ``-DCOMPILER_RT_BUILD_XRAY=OFF``
51 * ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF``
52 * ``-DCOMPILER_RT_BUILD_PROFILE=OFF``
53 * ``-DCMAKE_C_COMPILER=/path/to/clang``
54 * ``-DCMAKE_AR=/path/to/llvm-ar``
55 * ``-DCMAKE_NM=/path/to/llvm-nm``
56 * ``-DCMAKE_RANLIB=/path/to/llvm-ranlib``
57 * ``-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld"``
58 * ``-DCMAKE_C_COMPILER_TARGET="arm-linux-gnueabihf"``
59 * ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON``
60 * ``-DLLVM_CONFIG_PATH=/path/to/llvm-config``
61 * ``-DCMAKE_C_FLAGS="build-c-flags"``
62
63 The build-c-flags need to be sufficient to pass the C-make compiler check and
64 to compile compiler-rt. When using a GCC 7 Linaro arm-linux-gnueabihf
65 installation the following flags are needed:
66
67 * ``--target=arm-linux-gnueabihf``
68 * ``--march=armv7a``
69 * ``--gcc-toolchain=/path/to/dir/toolchain``
70 * ``--sysroot=/path/to/toolchain/arm-linux-gnueabihf/libc``
71
72 Depending on how your sysroot is laid out, you may not need ``--gcc-toolchain``.
73 For example if you have added armhf as an architecture using your Linux
74 distributions multiarch support then you should be able to use ``--sysroot=/``.
75
76 Once cmake has completed the builtins can be built with ``ninja builtins``
77
78 Testing compiler-rt builtins using qemu-arm
79 ===========================================
80 To test the builtins library we need to add a few more cmake flags to enable
81 testing and set up the compiler and flags for test case. We must also tell
82 cmake that we wish to run the tests on ``qemu-arm``.
83
84 * ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armhf/sysroot``
85 * ``-DCOMPILER_RT_INCLUDE_TESTS=ON``
86 * ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"``
87 * ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"``
88
89 The ``/path/to/armhf/sysroot`` should be the same as the one passed to
90 ``--sysroot`` in the "build-c-flags".
91
92 The "test-c-flags" can be the same as the "build-c-flags", with the addition
93 of ``"-fuse-ld=lld`` if you wish to use lld to link the tests.
94
95 Once cmake has completed the tests can be built and run using
96 ``ninja check-builtins``
97
98 Modifications for other Targets
99 ===============================
100
101 Arm Soft-Float Target
102 ---------------------
103 The instructions for the Arm hard-float target can be used for the soft-float
104 target by substituting soft-float equivalents for the sysroot and target. The
105 target to use is:
106
107 * ``-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi``
108
109 Depending on whether you want to use floating point instructions or not you
110 may need extra c-flags such as ``-mfloat-abi=softfp`` for use of floating-point
111 instructions, and ``-mfloat-abi=soft -mfpu=none`` for software floating-point
112 emulation.
113
114 AArch64 Target
115 --------------
116 The instructions for Arm can be used for AArch64 by substituting AArch64
117 equivalents for the sysroot, emulator and target.
118
119 * ``-DCMAKE_C_COMPILER_TARGET=aarch64-linux-gnu``
120 * ``-DCOMPILER_RT_EMULATOR="qemu-aarch64 -L /path/to/aarch64/sysroot``
121
122 The CMAKE_C_FLAGS and COMPILER_RT_TEST_COMPILER_CFLAGS may also need:
123 ``"--sysroot=/path/to/aarch64/sysroot --gcc-toolchain=/path/to/gcc-toolchain"``
124
125 Armv6-m, Armv7-m and Armv7E-M targets
126 -------------------------------------
127 If you wish to build, but not test compiler-rt for Armv6-M, Armv7-M or Armv7E-M
128 then the easiest way is to use the BaremetalARM.cmake recipe in
129 clang/cmake/caches.
130
131 You will need a bare metal sysroot such as that provided by the GNU ARM
132 Embedded toolchain.
133
134 The libraries can be built with the cmake options:
135
136 * ``-DBAREMETAL_ARMV6M_SYSROOT=/path/to/bare/metal/sysroot``
137 * ``-DBAREMETAL_ARMV7M_SYSROOT=/path/to/bare/metal/sysroot``
138 * ``-DBAREMETAL_ARMV7EM_SYSROOT=/path/to/bare/metal/sysroot``
139 * ``-C /path/to/llvm/source/tools/clang/cmake/caches/BaremetalARM.cmake``
140
141 **Note** that for the recipe to work the compiler-rt source must be checked out
142 into the directory llvm/runtimes and not llvm/projects.
143
144 To build and test the libraries using a similar method to Armv7-A is possible
145 but more difficult. The main problems are:
146
147 * There isn't a ``qemu-arm`` user-mode emulator for bare-metal systems. The ``qemu-system-arm`` can be used but this is significantly more difficult to setup.
148 * The target to compile compiler-rt have the suffix -none-eabi. This uses the BareMetal driver in clang and by default won't find the libraries needed to pass the cmake compiler check.
149
150 As the Armv6-M, Armv7-M and Armv7E-M builds of compiler-rt only use instructions
151 that are supported on Armv7-A we can still get most of the value of running the
152 tests using the same ``qemu-arm`` that we used for Armv7-A by building and
153 running the test cases for Armv7-A but using the builtins compiled for
154 Armv6-M, Armv7-M or Armv7E-M. This will not catch instructions that are
155 supported on Armv7-A but not Armv6-M, Armv7-M and Armv7E-M.
156
157 To get the cmake compile test to pass the libraries needed to successfully link
158 the test application will need to be manually added to ``CMAKE_CFLAGS``.
159 Alternatively if you are using version 3.6 or above of cmake you can use
160 ``CMAKE_TRY_COMPILE_TARGET=STATIC_LIBRARY`` to skip the link step.
161
162 * ``-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY``
163 * ``-DCOMPILER_RT_OS_DIR="baremetal"``
164 * ``-DCOMPILER_RT_BUILD_BUILTINS=ON``
165 * ``-DCOMPILER_RT_BUILD_SANITIZERS=OFF``
166 * ``-DCOMPILER_RT_BUILD_XRAY=OFF``
167 * ``-DCOMPILER_RT_BUILD_LIBFUZZER=OFF``
168 * ``-DCOMPILER_RT_BUILD_PROFILE=OFF``
169 * ``-DCMAKE_C_COMPILER=${host_install_dir}/bin/clang``
170 * ``-DCMAKE_C_COMPILER_TARGET="your *-none-eabi target"``
171 * ``-DCMAKE_AR=/path/to/llvm-ar``
172 * ``-DCMAKE_NM=/path/to/llvm-nm``
173 * ``-DCMAKE_RANLIB=/path/to/llvm-ranlib``
174 * ``-DCOMPILER_RT_BAREMETAL_BUILD=ON``
175 * ``-DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON``
176 * ``-DLLVM_CONFIG_PATH=/path/to/llvm-config``
177 * ``-DCMAKE_C_FLAGS="build-c-flags"``
178 * ``-DCMAKE_ASM_FLAGS="${arm_cflags}"``
179 * ``-DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armv7-A/sysroot"``
180 * ``-DCOMPILER_RT_INCLUDE_TESTS=ON``
181 * ``-DCOMPILER_RT_TEST_COMPILER="/path/to/clang"``
182 * ``-DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"``
183
184 The Armv6-M builtins will use the soft-float ABI. When compiling the tests for
185 Armv7-A we must include ``"-mthumb -mfloat-abi=soft -mfpu=none"`` in the
186 test-c-flags. We must use an Armv7-A soft-float abi sysroot for ``qemu-arm``.
187
188 Unfortunately at time of writing the Armv7-M and Armv7E-M builds of
189 compiler-rt will always include assembler files including floating point
190 instructions. This means that building for a cpu without a floating point unit
191 requires something like removing the arm_Thumb1_VFPv2_SOURCES from the
192 arm_Thumb1_SOURCES in builtins/CMakeLists.txt. The float-abi of the compiler-rt
193 library must be matched by the float abi of the Armv7-A sysroot used by
194 qemu-arm.
195
196 Depending on the linker used for the test cases you may encounter BuildAttribute
197 mismatches between the M-profile objects from compiler-rt and the A-profile
198 objects from the test. The lld linker does not check the BuildAttributes so it
199 can be used to link the tests by adding -fuse-ld=lld to the
200 ``COMPILER_RT_TEST_COMPILER_CFLAGS``.
541541
542542 ``dso_local``
543543 The compiler may assume that a function or variable marked as ``dso_local``
544 will resolve to a symbol within the same linkage unit. Direct access will
544 will resolve to a symbol within the same linkage unit. Direct access will
545545 be generated even if the definition is not within this compilation unit.
546546
547547 .. _namedtypes:
596596 case they don't have an initializer.
597597
598598 Either global variable definitions or declarations may have an explicit section
599 to be placed in and may have an optional explicit alignment specified. If there
600 is a mismatch between the explicit or inferred section information for the
601 variable declaration and its definition the resulting behavior is undefined.
599 to be placed in and may have an optional explicit alignment specified. If there
600 is a mismatch between the explicit or inferred section information for the
601 variable declaration and its definition the resulting behavior is undefined.
602602
603603 A variable may be defined as a global ``constant``, which indicates that
604604 the contents of the variable will **never** be modified (enabling better
641641 Additionally, the global can placed in a comdat if the target has the necessary
642642 support.
643643
644 External declarations may have an explicit section specified. Section
645 information is retained in LLVM IR for targets that make use of this
646 information. Attaching section information to an external declaration is an
647 assertion that its definition is located in the specified section. If the
648 definition is located in a different section, the behavior is undefined.
644 External declarations may have an explicit section specified. Section
645 information is retained in LLVM IR for targets that make use of this
646 information. Attaching section information to an external declaration is an
647 assertion that its definition is located in the specified section. If the
648 definition is located in a different section, the behavior is undefined.
649649
650650 By default, global initializers are optimized by assuming that global
651651 variables defined within the module are not modified from their
22712271 Fast-Math Flags
22722272 ---------------
22732273
2274 LLVM IR floating-point binary ops (:ref:`fadd `,
2274 LLVM IR floating-point operations (:ref:`fadd `,
22752275 :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `,
22762276 :ref:`frem `, :ref:`fcmp `) and :ref:`call `
2277 instructions have the following flags that can be set to enable
2278 otherwise unsafe floating point transformations.
2277 may use the following flags to enable otherwise unsafe
2278 floating-point transformations.
22792279
22802280 ``nnan``
22812281 No NaNs - Allow optimizations to assume the arguments and result are not
22992299 Allow floating-point contraction (e.g. fusing a multiply followed by an
23002300 addition into a fused multiply-and-add).
23012301
2302 ``afn``
2303 Approximate functions - Allow substitution of approximate calculations for
2304 functions (sin, log, sqrt, etc). See floating-point intrinsic definitions
2305 for places where this can apply to LLVM's intrinsic math functions.
2306
2307 ``reassoc``
2308 Allow reassociation transformations for floating-point instructions.
2309 This may dramatically change results in floating point.
2310
23022311 ``fast``
2303 Fast - Allow algebraically equivalent transformations that may
2304 dramatically change results in floating point (e.g. reassociate). This
2305 flag implies all the others.
2312 This flag implies all of the others.
23062313
23072314 .. _uselistorder:
23082315
44984505 doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when
44994506 combined with a concrete location.
45004507
4501 .. code-block:: llvm
4508 .. code-block:: text
45024509
45034510 !0 = !DIExpression(DW_OP_deref)
45044511 !1 = !DIExpression(DW_OP_plus_uconst, 3)
46384645 int i; // offset 0
46394646 float f; // offset 4
46404647 };
4641
4648
46424649 struct Outer {
46434650 float f; // offset 0
46444651 double d; // offset 4
46454652 struct Inner inner_a; // offset 12
46464653 };
4647
4654
46484655 void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
46494656 outer->f = 0; // tag0: (OuterStructTy, FloatScalarTy, 0)
46504657 outer->inner_a.i = 0; // tag1: (OuterStructTy, IntScalarTy, 12)
51935200 !1 = !{!1} ; an identifier for the inner loop
51945201 !2 = !{!2} ; an identifier for the outer loop
51955202
5203 '``irr_loop``' Metadata
5204 ^^^^^^^^^^^^^^^^^^^^^^^
5205
5206 ``irr_loop`` metadata may be attached to the terminator instruction of a basic
5207 block that's an irreducible loop header (note that an irreducible loop has more
5208 than once header basic blocks.) If ``irr_loop`` metadata is attached to the
5209 terminator instruction of a basic block that is not really an irreducible loop
5210 header, the behavior is undefined. The intent of this metadata is to improve the
5211 accuracy of the block frequency propagation. For example, in the code below, the
5212 block ``header0`` may have a loop header weight (relative to the other headers of
5213 the irreducible loop) of 100:
5214
5215 .. code-block:: llvm
5216
5217 header0:
5218 ...
5219 br i1 %cmp, label %t1, label %t2, !irr_loop !0
5220
5221 ...
5222 !0 = !{"loop_header_weight", i64 100}
5223
5224 Irreducible loop header weights are typically based on profile data.
5225
51965226 '``invariant.group``' Metadata
51975227 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
51985228
51995229 The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions.
5200 The existence of the ``invariant.group`` metadata on the instruction tells
5201 the optimizer that every ``load`` and ``store`` to the same pointer operand
5202 within the same invariant group can be assumed to load or store the same
5203 value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
5230 The existence of the ``invariant.group`` metadata on the instruction tells
5231 the optimizer that every ``load`` and ``store`` to the same pointer operand
5232 within the same invariant group can be assumed to load or store the same
5233 value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects
52045234 when two pointers are considered the same). Pointers returned by bitcast or
52055235 getelementptr with only zero indices are considered the same.
52065236
52135243 %ptr = alloca i8
52145244 store i8 42, i8* %ptr, !invariant.group !0
52155245 call void @foo(i8* %ptr)
5216
5246
52175247 %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
52185248 call void @foo(i8* %ptr)
52195249 %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
5220
5221 %newPtr = call i8* @getPointer(i8* %ptr)
5250
5251 %newPtr = call i8* @getPointer(i8* %ptr)
52225252 %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
5223
5253
52245254 %unknownValue = load i8, i8* @unknownPtr
52255255 store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42
5226
5256
52275257 call void @foo(i8* %ptr)
52285258 %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
52295259 %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr
5230
5260
52315261 ...
52325262 declare void @foo(i8*)
52335263 declare i8* @getPointer(i8*)
52345264 declare i8* @llvm.invariant.group.barrier(i8*)
5235
5265
52365266 !0 = !{!"magic ptr"}
52375267 !1 = !{!"other ptr"}
52385268
52415271 to the SSA value of the pointer operand.
52425272
52435273 .. code-block:: llvm
5244
5274
52455275 %v = load i8, i8* %x, !invariant.group !0
52465276 ; if %x mustalias %y then we can replace the above instruction with
52475277 %v = load i8, i8* %y
52715301
52725302 Example:
52735303
5274 .. code-block:: llvm
5304 .. code-block:: text
52755305
52765306 $a = comdat any
52775307 @a = global i32 1, comdat $a
66996729
67006730 Note that unsigned integer remainder and signed integer remainder are
67016731 distinct operations; for signed integer remainder, use '``srem``'.
6702
6732
67036733 Taking the remainder of a division by zero is undefined behavior.
6704 For vectors, if any element of the divisor is zero, the operation has
6734 For vectors, if any element of the divisor is zero, the operation has
67056735 undefined behavior.
67066736
67076737 Example:
67536783 distinct operations; for unsigned integer remainder, use '``urem``'.
67546784
67556785 Taking the remainder of a division by zero is undefined behavior.
6756 For vectors, if any element of the divisor is zero, the operation has
6786 For vectors, if any element of the divisor is zero, the operation has
67576787 undefined behavior.
67586788 Overflow also leads to undefined behavior; this is a rare case, but can
67596789 occur, for example, by taking the remainder of a 32-bit division of
76267656 instructions to save cache bandwidth, such as the ``MOVNT`` instruction on
76277657 x86.
76287658
7629 The optional ``!invariant.group`` metadata must reference a
7659 The optional ``!invariant.group`` metadata must reference a
76307660 single metadata name ````. See ``invariant.group`` metadata.
76317661
76327662 Semantics:
77007730 Example:
77017731 """"""""
77027732
7703 .. code-block:: llvm
7733 .. code-block:: text
77047734
77057735 fence acquire ; yields void
77067736 fence syncscope("singlethread") seq_cst ; yields void
77327762 to operate on, a value to compare to the value currently be at that
77337763 address, and a new value to place at that address if the compared values
77347764 are equal. The type of '' must be an integer or pointer type whose
7735 bit width is a power of two greater than or equal to eight and less
7765 bit width is a power of two greater than or equal to eight and less
77367766 than or equal to a target-specific size limit. '' and '' must
7737 have the same type, and the type of '' must be a pointer to
7738 that type. If the ``cmpxchg`` is marked as ``volatile``, then the
7767 have the same type, and the type of '' must be a pointer to
7768 that type. If the ``cmpxchg`` is marked as ``volatile``, then the
77397769 optimizer is not allowed to modify the number or order of execution of
77407770 this ``cmpxchg`` with other :ref:`volatile operations `.
77417771
90299059 ``tail`` or ``musttail`` markers to the call. It is used to prevent tail
90309060 call optimization from being performed on the call.
90319061
9032 #. The optional ``fast-math flags`` marker indicates that the call has one or more
9062 #. The optional ``fast-math flags`` marker indicates that the call has one or more
90339063 :ref:`fast-math flags `, which are optimization hints to enable
90349064 otherwise unsafe floating-point optimizations. Fast-math flags are only valid
90359065 for calls that return a floating-point scalar or vector type.
1045910489 """""""
1046010490
1046110491 This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any
10462 floating point or vector of floating point type. Not all targets support
10492 floating-point or vector of floating-point type. Not all targets support
1046310493 all types however.
1046410494
1046510495 ::
1047310503 Overview:
1047410504 """""""""
1047510505
10476 The '``llvm.sqrt``' intrinsics return the square root of the specified value,
10477 returning the same value as the libm '``sqrt``' functions would, but without
10478 trapping or setting ``errno``.
10479
10480 Arguments:
10481 """"""""""
10482
10483 The argument and return value are floating point numbers of the same type.
10484
10485 Semantics:
10486 """"""""""
10487
10488 This function returns the square root of the operand if it is a nonnegative
10489 floating point number.
10506 The '``llvm.sqrt``' intrinsics return the square root of the specified value.
10507
10508 Arguments:
10509 """"""""""
10510
10511 The argument and return value are floating-point numbers of the same type.
10512
10513 Semantics:
10514 """"""""""
10515
10516 Return the same value as a corresponding libm '``sqrt``' function but without
10517 trapping or setting ``errno``. For types specified by IEEE-754, the result
10518 matches a conforming libm implementation.
10519
10520 When specified with the fast-math-flag 'afn', the result may be approximated
10521 using a less accurate calculation.
1049010522
1049110523 '``llvm.powi.*``' Intrinsic
1049210524 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1053310565 """""""
1053410566
1053510567 This is an overloaded intrinsic. You can use ``llvm.sin`` on any
10536 floating point or vector of floating point type. Not all targets support
10568 floating-point or vector of floating-point type. Not all targets support
1053710569 all types however.
1053810570
1053910571 ::
1055210584 Arguments:
1055310585 """"""""""
1055410586
10555 The argument and return value are floating point numbers of the same type.
10556
10557 Semantics:
10558 """"""""""
10559
10560 This function returns the sine of the specified operand, returning the
10561 same values as the libm ``sin`` functions would, and handles error
10562 conditions in the same way.
10587 The argument and return value are floating-point numbers of the same type.
10588
10589 Semantics:
10590 """"""""""
10591
10592 Return the same value as a corresponding libm '``sin``' function but without
10593 trapping or setting ``errno``.
10594
10595 When specified with the fast-math-flag 'afn', the result may be approximated
10596 using a less accurate calculation.
1056310597
1056410598 '``llvm.cos.*``' Intrinsic
1056510599 ^^^^^^^^^^^^^^^^^^^^^^^^^^
1056810602 """""""
1056910603
1057010604 This is an overloaded intrinsic. You can use ``llvm.cos`` on any
10571 floating point or vector of floating point type. Not all targets support
10605 floating-point or vector of floating-point type. Not all targets support
1057210606 all types however.
1057310607
1057410608 ::
1058710621 Arguments:
1058810622 """"""""""
1058910623
10590 The argument and return value are floating point numbers of the same type.
10591
10592 Semantics:
10593 """"""""""
10594
10595 This function returns the cosine of the specified operand, returning the
10596 same values as the libm ``cos`` functions would, and handles error
10597 conditions in the same way.
10624 The argument and return value are floating-point numbers of the same type.
10625
10626 Semantics:
10627 """"""""""
10628
10629 Return the same value as a corresponding libm '``cos``' function but without
10630 trapping or setting ``errno``.
10631
10632 When specified with the fast-math-flag 'afn', the result may be approximated
10633 using a less accurate calculation.
1059810634
1059910635 '``llvm.pow.*``' Intrinsic
1060010636 ^^^^^^^^^^^^^^^^^^^^^^^^^^
1060310639 """""""
1060410640
1060510641 This is an overloaded intrinsic. You can use ``llvm.pow`` on any
10606 floating point or vector of floating point type. Not all targets support
10642 floating-point or vector of floating-point type. Not all targets support
1060710643 all types however.
1060810644
1060910645 ::
1062310659 Arguments:
1062410660 """"""""""
1062510661
10626 The second argument is a floating point power, and the first is a value
10627 to raise to that power.
10628
10629 Semantics:
10630 """"""""""
10631
10632 This function returns the first value raised to the second power,
10633 returning the same values as the libm ``pow`` functions would, and
10634 handles error conditions in the same way.
10662 The arguments and return value are floating-point numbers of the same type.
10663
10664 Semantics:
10665 """"""""""
10666
10667 Return the same value as a corresponding libm '``pow``' function but without
10668 trapping or setting ``errno``.
10669
10670 When specified with the fast-math-flag 'afn', the result may be approximated
10671 using a less accurate calculation.
1063510672
1063610673 '``llvm.exp.*``' Intrinsic
1063710674 ^^^^^^^^^^^^^^^^^^^^^^^^^^
1064010677 """""""
1064110678
1064210679 This is an overloaded intrinsic. You can use ``llvm.exp`` on any
10643 floating point or vector of floating point type. Not all targets support
10680 floating-point or vector of floating-point type. Not all targets support
1064410681 all types however.
1064510682
1064610683 ::
1066010697 Arguments:
1066110698 """"""""""
1066210699
10663 The argument and return value are floating point numbers of the same type.
10664
10665 Semantics:
10666 """"""""""
10667
10668 This function returns the same values as the libm ``exp`` functions
10669 would, and handles error conditions in the same way.
10700 The argument and return value are floating-point numbers of the same type.
10701
10702 Semantics:
10703 """"""""""
10704
10705 Return the same value as a corresponding libm '``exp``' function but without
10706 trapping or setting ``errno``.
10707
10708 When specified with the fast-math-flag 'afn', the result may be approximated
10709 using a less accurate calculation.
1067010710
1067110711 '``llvm.exp2.*``' Intrinsic
1067210712 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1067510715 """""""
1067610716
1067710717 This is an overloaded intrinsic. You can use ``llvm.exp2`` on any
10678 floating point or vector of floating point type. Not all targets support
10718 floating-point or vector of floating-point type. Not all targets support
1067910719 all types however.
1068010720
1068110721 ::
1069510735 Arguments:
1069610736 """"""""""
1069710737
10698 The argument and return value are floating point numbers of the same type.
10699
10700 Semantics:
10701 """"""""""
10702
10703 This function returns the same values as the libm ``exp2`` functions
10704 would, and handles error conditions in the same way.
10738 The argument and return value are floating-point numbers of the same type.
10739
10740 Semantics:
10741 """"""""""
10742
10743 Return the same value as a corresponding libm '``exp2``' function but without
10744 trapping or setting ``errno``.
10745
10746 When specified with the fast-math-flag 'afn', the result may be approximated
10747 using a less accurate calculation.
1070510748
1070610749 '``llvm.log.*``' Intrinsic
1070710750 ^^^^^^^^^^^^^^^^^^^^^^^^^^
1071010753 """""""
1071110754
1071210755 This is an overloaded intrinsic. You can use ``llvm.log`` on any
10713 floating point or vector of floating point type. Not all targets support
10756 floating-point or vector of floating-point type. Not all targets support
1071410757 all types however.
1071510758
1071610759 ::
1073010773 Arguments:
1073110774 """"""""""
1073210775
10733 The argument and return value are floating point numbers of the same type.
10734
10735 Semantics:
10736 """"""""""
10737
10738 This function returns the same values as the libm ``log`` functions
10739 would, and handles error conditions in the same way.
10776 The argument and return value are floating-point numbers of the same type.
10777
10778 Semantics:
10779 """"""""""
10780
10781 Return the same value as a corresponding libm '``log``' function but without
10782 trapping or setting ``errno``.
10783
10784 When specified with the fast-math-flag 'afn', the result may be approximated
10785 using a less accurate calculation.
1074010786
1074110787 '``llvm.log10.*``' Intrinsic
1074210788 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1074510791 """""""
1074610792
1074710793 This is an overloaded intrinsic. You can use ``llvm.log10`` on any
10748 floating point or vector of floating point type. Not all targets support
10794 floating-point or vector of floating-point type. Not all targets support
1074910795 all types however.
1075010796
1075110797 ::
1076510811 Arguments:
1076610812 """"""""""
1076710813
10768 The argument and return value are floating point numbers of the same type.
10769
10770 Semantics:
10771 """"""""""
10772
10773 This function returns the same values as the libm ``log10`` functions
10774 would, and handles error conditions in the same way.
10814 The argument and return value are floating-point numbers of the same type.
10815
10816 Semantics:
10817 """"""""""
10818
10819 Return the same value as a corresponding libm '``log10``' function but without
10820 trapping or setting ``errno``.
10821
10822 When specified with the fast-math-flag 'afn', the result may be approximated
10823 using a less accurate calculation.
1077510824
1077610825 '``llvm.log2.*``' Intrinsic
1077710826 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1078010829 """""""
1078110830
1078210831 This is an overloaded intrinsic. You can use ``llvm.log2`` on any
10783 floating point or vector of floating point type. Not all targets support
10832 floating-point or vector of floating-point type. Not all targets support
1078410833 all types however.
1078510834
1078610835 ::
1080010849 Arguments:
1080110850 """"""""""
1080210851
10803 The argument and return value are floating point numbers of the same type.
10804
10805 Semantics:
10806 """"""""""
10807
10808 This function returns the same values as the libm ``log2`` functions
10809 would, and handles error conditions in the same way.
10852 The argument and return value are floating-point numbers of the same type.
10853
10854 Semantics:
10855 """"""""""
10856
10857 Return the same value as a corresponding libm '``log2``' function but without
10858 trapping or setting ``errno``.
10859
10860 When specified with the fast-math-flag 'afn', the result may be approximated
10861 using a less accurate calculation.
1081010862
1081110863 '``llvm.fma.*``' Intrinsic
1081210864 ^^^^^^^^^^^^^^^^^^^^^^^^^^
1081510867 """""""
1081610868
1081710869 This is an overloaded intrinsic. You can use ``llvm.fma`` on any
10818 floating point or vector of floating point type. Not all targets support
10870 floating-point or vector of floating-point type. Not all targets support
1081910871 all types however.
1082010872
1082110873 ::
1082910881 Overview:
1083010882 """""""""
1083110883
10832 The '``llvm.fma.*``' intrinsics perform the fused multiply-add
10833 operation.
10834
10835 Arguments:
10836 """"""""""
10837
10838 The argument and return value are floating point numbers of the same
10839 type.
10840
10841 Semantics:
10842 """"""""""
10843
10844 This function returns the same values as the libm ``fma`` functions
10845 would, and does not set errno.
10884 The '``llvm.fma.*``' intrinsics perform the fused multiply-add operation.
10885
10886 Arguments:
10887 """"""""""
10888
10889 The arguments and return value are floating-point numbers of the same type.
10890
10891 Semantics:
10892 """"""""""
10893
10894 Return the same value as a corresponding libm '``fma``' function but without
10895 trapping or setting ``errno``.
10896
10897 When specified with the fast-math-flag 'afn', the result may be approximated
10898 using a less accurate calculation.
1084610899
1084710900 '``llvm.fabs.*``' Intrinsic
1084810901 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
1277112824 Overview:
1277212825 """""""""
1277312826
12774 The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant
12827 The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant
1277512828 established by invariant.group metadata no longer holds, to obtain a new pointer
1277612829 value that does not carry the invariant information.
1277712830
1278512838 Semantics:
1278612839 """"""""""
1278712840
12788 Returns another pointer that aliases its argument but which is considered different
12841 Returns another pointer that aliases its argument but which is considered different
1278912842 for the purposes of ``load``/``store`` ``invariant.group`` metadata.
1279012843
1279112844 Constrained Floating Point Intrinsics
1286312916 Any FP exception that would have been raised by the original code must be raised
1286412917 by the transformed code, and the transformed code must not raise any FP
1286512918 exceptions that would not have been raised by the original code. This is the
12866 exception behavior argument that will be used if the code being compiled reads
12919 exception behavior argument that will be used if the code being compiled reads
1286712920 the FP exception status flags, but this mode can also be used with code that
1286812921 unmasks FP exceptions.
1286912922
1288112934
1288212935 ::
1288312936
12884 declare
12937 declare
1288512938 @llvm.experimental.constrained.fadd( , ,
1288612939 metadata ,
1288712940 metadata )
1291812971
1291912972 ::
1292012973
12921 declare
12974 declare
1292212975 @llvm.experimental.constrained.fsub( , ,
1292312976 metadata ,
1292412977 metadata )
1295513008
1295613009 ::
1295713010
12958 declare
13011 declare
1295913012 @llvm.experimental.constrained.fmul( , ,
1296013013 metadata ,
1296113014 metadata )
1299213045
1299313046 ::
1299413047
12995 declare
13048 declare
1299613049 @llvm.experimental.constrained.fdiv( , ,
1299713050 metadata ,
1299813051 metadata )
1302913082
1303013083 ::
1303113084
13032 declare
13085 declare
1303313086 @llvm.experimental.constrained.frem( , ,
1303413087 metadata ,
1303513088 metadata )
1305813111
1305913112 The value produced is the floating point remainder from the division of the two
1306013113 value operands and has the same type as the operands. The remainder has the
13061 same sign as the dividend.
13114 same sign as the dividend.
1306213115
1306313116 '``llvm.experimental.constrained.fma``' Intrinsic
1306413117 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1311813171
1311913172 ::
1312013173
13121 declare
13174 declare
1312213175 @llvm.experimental.constrained.sqrt( ,
1312313176 metadata ,
1312413177 metadata )
1315513208
1315613209 ::
1315713210
13158 declare
13211 declare
1315913212 @llvm.experimental.constrained.pow( , ,
1316013213 metadata ,
1316113214 metadata )
1319213245
1319313246 ::
1319413247
13195 declare
13248 declare
1319613249 @llvm.experimental.constrained.powi( , i32 ,
1319713250 metadata ,
1319813251 metadata )
1323113284
1323213285 ::
1323313286
13234 declare
13287 declare
1323513288 @llvm.experimental.constrained.sin( ,
1323613289 metadata ,
1323713290 metadata )
1326713320
1326813321 ::
1326913322
13270 declare
13323 declare
1327113324 @llvm.experimental.constrained.cos( ,
1327213325 metadata ,
1327313326 metadata )
1330313356
1330413357 ::
1330513358
13306 declare
13359 declare
1330713360 @llvm.experimental.constrained.exp( ,
1330813361 metadata ,
1330913362 metadata )
1333813391
1333913392 ::
1334013393
13341 declare
13394 declare
1334213395 @llvm.experimental.constrained.exp2( ,
1334313396 metadata ,
1334413397 metadata )
1337413427
1337513428 ::
1337613429
13377 declare
13430 declare
1337813431 @llvm.experimental.constrained.log( ,
1337913432 metadata ,
1338013433 metadata )
1341013463
1341113464 ::
1341213465
13413 declare
13466 declare
1341413467 @llvm.experimental.constrained.log10( ,
1341513468 metadata ,
1341613469 metadata )
1344513498
1344613499 ::
1344713500
13448 declare
13501 declare
1344913502 @llvm.experimental.constrained.log2( ,
1345013503 metadata ,
1345113504 metadata )
1348013533
1348113534 ::
1348213535
13483 declare
13536 declare
1348413537 @llvm.experimental.constrained.rint( ,
1348513538 metadata ,
1348613539 metadata )
1351913572
1352013573 ::
1352113574
13522 declare
13575 declare
1352313576 @llvm.experimental.constrained.nearbyint( ,
1352413577 metadata ,
1352513578 metadata )
1428014333 memory from the source location to the destination location. These locations are not
1428114334 allowed to overlap. The memory copy is performed as a sequence of load/store operations
1428214335 where each access is guaranteed to be a multiple of ``element_size`` bytes wide and
14283 aligned at an ``element_size`` boundary.
14336 aligned at an ``element_size`` boundary.
1428414337
1428514338 The order of the copy is unspecified. The same value may be read from the source
1428614339 buffer many times, but only one write is issued to the destination buffer per
1435514408 of memory from the source location to the destination location. These locations
1435614409 are allowed to overlap. The memory copy is performed as a sequence of load/store
1435714410 operations where each access is guaranteed to be a multiple of ``element_size``
14358 bytes wide and aligned at an ``element_size`` boundary.
14411 bytes wide and aligned at an ``element_size`` boundary.
1435914412
1436014413 The order of the copy is unspecified. The same value may be read from the source
1436114414 buffer many times, but only one write is issued to the destination buffer per
1443014483 The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of
1443114484 memory starting at the destination location to the given ``value``. The memory is
1443214485 set with a sequence of store operations where each access is guaranteed to be a
14433 multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary.
14486 multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary.
1443414487
1443514488 The order of the assignment is unspecified. Only one write is issued to the
1443614489 destination buffer per element. It is well defined to have concurrent reads and
187187 `_. An `llvm.dbg.addr` intrinsic describes the
188188 *address* of a source variable.
189189
190 .. code-block:: llvm
190 .. code-block:: text
191191
192192 %i.addr = alloca i32, align 4
193193 call void @llvm.dbg.addr(metadata i32* %i.addr, metadata !1,
10311031 Pass Statistics
10321032 ===============
10331033
1034 The `Statistic -source.html>`_ class is
1034 The `Statistic _source.html>`_ class is
10351035 designed to be an easy way to expose various success metrics from passes.
10361036 These statistics are printed at the end of a run, when the :option:`-stats`
10371037 command line option is enabled on the command line. See the :ref:`Statistics
10421042 What PassManager does
10431043 ---------------------
10441044
1045 The `PassManager -source.html>`_ `class
1045 The `PassManager _source.html>`_ `class
10461046 `_ takes a list of
10471047 passes, ensures their :ref:`prerequisites `
10481048 are set up correctly, and then schedules passes to run efficiently. All of the
6767 CMakePrimer
6868 AdvancedBuilds
6969 HowToBuildOnARM
70 HowToCrossCompileBuiltinsOnArm
7071 HowToCrossCompileLLVM
7172 CommandGuide/index
7273 GettingStarted
103104
104105 :doc:`HowToBuildOnARM`
105106 Notes on building and testing LLVM/Clang on ARM.
107
108 :doc:`HowToCrossCompileBuiltinsOnArm`
109 Notes on cross-building and testing the compiler-rt builtins for Arm.
106110
107111 :doc:`HowToCrossCompileLLVM`
108112 Notes on cross-building and testing LLVM/Clang.
5555
5656 size_type size() const { return Vector.size(); }
5757
58 /// Grow the MapVector so that it can contain at least \p NumEntries items
59 /// before resizing again.
60 void reserve(size_type NumEntries) {
61 Map.reserve(NumEntries);
62 Vector.reserve(NumEntries);
63 }
64
5865 iterator begin() { return Vector.begin(); }
5966 const_iterator begin() const { return Vector.begin(); }
6067 iterator end() { return Vector.end(); }
812812 C.clear();
813813 }
814814
815 /// Provide wrappers to std::for_each which take ranges instead of having to
816 /// pass begin/end explicitly.
817 template
818 UnaryPredicate for_each(R &&Range, UnaryPredicate P) {
819 return std::for_each(std::begin(Range), std::end(Range), P);
820 }
821
815822 /// Provide wrappers to std::all_of which take ranges instead of having to pass
816823 /// begin/end explicitly.
817824 template
7474 /// the enclosing function's count (if available) and returns the value.
7575 Optional getProfileCountFromFreq(uint64_t Freq) const;
7676
77 /// \brief Returns true if \p BB is an irreducible loop header
78 /// block. Otherwise false.
79 bool isIrrLoopHeader(const BasicBlock *BB);
80
7781 // Set the frequency of the given basic block.
7882 void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
7983
1919 #include "llvm/ADT/Optional.h"
2020 #include "llvm/ADT/PostOrderIterator.h"
2121 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/SparseBitVector.h"
2223 #include "llvm/ADT/Twine.h"
2324 #include "llvm/ADT/iterator_range.h"
2425 #include "llvm/IR/BasicBlock.h"
413414 /// \brief Data about each block. This is used downstream.
414415 std::vector Freqs;
415416
417 /// \brief Whether each block is an irreducible loop header.
418 /// This is used downstream.
419 SparseBitVector<> IsIrrLoopHeader;
420
416421 /// \brief Loop data: see initializeLoops().
417422 std::vector Working;
418423
491496 /// the backedges going into each of the loop headers.
492497 void adjustLoopHeaderMass(LoopData &Loop);
493498
499 void distributeIrrLoopHeaderMass(Distribution &Dist);
500
494501 /// \brief Package up a loop.
495502 void packageLoop(LoopData &Loop);
496503
519526 const BlockNode &Node) const;
520527 Optional getProfileCountFromFreq(const Function &F,
521528 uint64_t Freq) const;
529 bool isIrrLoopHeader(const BlockNode &Node);
522530
523531 void setBlockFreq(const BlockNode &Node, uint64_t Freq);
524532
972980 return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
973981 }
974982
983 bool isIrrLoopHeader(const BlockT *BB) {
984 return BlockFrequencyInfoImplBase::isIrrLoopHeader(getNode(BB));
985 }
986
975987 void setBlockFreq(const BlockT *BB, uint64_t Freq);
976988
977989 Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
11391151 DEBUG(dbgs() << "compute-mass-in-loop: " << getLoopName(Loop) << "\n");
11401152
11411153 if (Loop.isIrreducible()) {
1142 BlockMass Remaining = BlockMass::getFull();
1154 DEBUG(dbgs() << "isIrreducible = true\n");
1155 Distribution Dist;
1156 unsigned NumHeadersWithWeight = 0;
11431157 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1144 auto &Mass = Working[Loop.Nodes[H].Index].getMass();
1145 Mass = Remaining * BranchProbability(1, Loop.NumHeaders - H);
1146 Remaining -= Mass;
1147 }
1158 auto &HeaderNode = Loop.Nodes[H];
1159 const BlockT *Block = getBlock(HeaderNode);
1160 IsIrrLoopHeader.set(Loop.Nodes[H].Index);
1161 Optional HeaderWeight = Block->getIrrLoopHeaderWeight();
1162 if (!HeaderWeight)
1163 continue;
1164 DEBUG(dbgs() << getBlockName(HeaderNode)
1165 << " has irr loop header weight " << HeaderWeight.getValue()
1166 << "\n");
1167 NumHeadersWithWeight++;
1168 uint64_t HeaderWeightValue = HeaderWeight.getValue();
1169 if (HeaderWeightValue)
1170 Dist.addLocal(HeaderNode, HeaderWeightValue);
1171 }
1172 if (NumHeadersWithWeight != Loop.NumHeaders) {
1173 // Not all headers have a weight metadata. Distribute weight evenly.
1174 Dist = Distribution();
1175 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) {
1176 auto &HeaderNode = Loop.Nodes[H];
1177 Dist.addLocal(HeaderNode, 1);
1178 }
1179 }
1180 distributeIrrLoopHeaderMass(Dist);
11481181 for (const BlockNode &M : Loop.Nodes)
11491182 if (!propagateMassToSuccessors(&Loop, M))
11501183 llvm_unreachable("unhandled irreducible control flow");
1151
1152 adjustLoopHeaderMass(Loop);
1184 if (NumHeadersWithWeight != Loop.NumHeaders)
1185 // Not all headers have a weight metadata. Adjust header mass.
1186 adjustLoopHeaderMass(Loop);
11531187 } else {
11541188 Working[Loop.getHeader().Index].getMass() = BlockMass::getFull();
11551189 if (!propagateMassToSuccessors(&Loop, Loop.getHeader()))
12841318 BlockFrequencyInfoImplBase::getBlockProfileCount(
12851319 *F->getFunction(), getNode(&BB)))
12861320 OS << ", count = " << ProfileCount.getValue();
1321 if (Optional IrrLoopHeaderWeight =
1322 BB.getIrrLoopHeaderWeight())
1323 OS << ", irr_loop_header_weight = " << IrrLoopHeaderWeight.getValue();
12871324 OS << "\n";
12881325 }
12891326
2121 #include "llvm/CodeGen/MachineInstrBuilder.h"
2222 #include "llvm/CodeGen/MachineOperand.h"
2323 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/TargetInstrInfo.h"
2425 #include "llvm/IR/Constants.h"
2526 #include "llvm/Support/Debug.h"
2627 #include "llvm/Support/ErrorHandling.h"
2728 #include "llvm/Support/raw_ostream.h"
28 #include "llvm/Target/TargetInstrInfo.h"
2929 #include "llvm/Target/TargetOpcodes.h"
3030 #include "llvm/Target/TargetRegisterInfo.h"
3131 #include
2525 #include
2626 #include
2727 #include
28 #include
2829
2930 namespace llvm {
3031
119120 }
120121 }
121122
123 typedef std::pair SizeAndAction;
124 typedef std::vector SizeAndActionsVec;
125 using SizeChangeStrategy =
126 std::function;
127
122128 /// More friendly way to set an action for common types that have an LLT
123129 /// representation.
130 /// The LegalizeAction must be one for which NeedsLegalizingToDifferentSize
131 /// returns false.
124132 void setAction(const InstrAspect &Aspect, LegalizeAction Action) {
133 assert(!needsLegalizingToDifferentSize(Action));
125134 TablesInitialized = false;
126 unsigned Opcode = Aspect.Opcode - FirstOp;
127 if (Actions[Opcode].size() <= Aspect.Idx)
128 Actions[Opcode].resize(Aspect.Idx + 1);
129 Actions[Aspect.Opcode - FirstOp][Aspect.Idx][Aspect.Type] = Action;
130 }
131
132 /// If an operation on a given vector type (say ) isn't explicitly
133 /// specified, we proceed in 2 stages. First we legalize the underlying scalar
134 /// (so that there's at least one legal vector with that scalar), then we
135 /// adjust the number of elements in the vector so that it is legal. The
136 /// desired action in the first step is controlled by this function.
137 void setScalarInVectorAction(unsigned Opcode, LLT ScalarTy,
138 LegalizeAction Action) {
139 assert(!ScalarTy.isVector());
140 ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action;
141 }
135 const unsigned OpcodeIdx = Aspect.Opcode - FirstOp;
136 if (SpecifiedActions[OpcodeIdx].size() <= Aspect.Idx)
137 SpecifiedActions[OpcodeIdx].resize(Aspect.Idx + 1);
138 SpecifiedActions[OpcodeIdx][Aspect.Idx][Aspect.Type] = Action;
139 }
140
141 /// The setAction calls record the non-size-changing legalization actions
142 /// to take on specificly-sized types. The SizeChangeStrategy defines what
143 /// to do when the size of the type needs to be changed to reach a legally
144 /// sized type (i.e., one that was defined through a setAction call).
145 /// e.g.
146 /// setAction ({G_ADD, 0, LLT::scalar(32)}, Legal);
147 /// setLegalizeScalarToDifferentSizeStrategy(
148 /// G_ADD, 0, widenToLargerTypesAndNarrowToLargest);
149 /// will end up defining getAction({G_ADD, 0, T}) to return the following
150 /// actions for different scalar types T:
151 /// LLT::scalar(1)..LLT::scalar(31): {WidenScalar, 0, LLT::scalar(32)}
152 /// LLT::scalar(32): {Legal, 0, LLT::scalar(32)}
153 /// LLT::scalar(33)..: {NarrowScalar, 0, LLT::scalar(32)}
154 ///
155 /// If no SizeChangeAction gets defined, through this function,
156 /// the default is unsupportedForDifferentSizes.
157 void setLegalizeScalarToDifferentSizeStrategy(const unsigned Opcode,
158 const unsigned TypeIdx,
159 SizeChangeStrategy S) {
160 const unsigned OpcodeIdx = Opcode - FirstOp;
161 if (ScalarSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx)
162 ScalarSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1);
163 ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] = S;
164 }
165
166 /// See also setLegalizeScalarToDifferentSizeStrategy.
167 /// This function allows to set the SizeChangeStrategy for vector elements.
168 void setLegalizeVectorElementToDifferentSizeStrategy(const unsigned Opcode,
169 const unsigned TypeIdx,
170 SizeChangeStrategy S) {
171 const unsigned OpcodeIdx = Opcode - FirstOp;
172 if (VectorElementSizeChangeStrategies[OpcodeIdx].size() <= TypeIdx)
173 VectorElementSizeChangeStrategies[OpcodeIdx].resize(TypeIdx + 1);
174 VectorElementSizeChangeStrategies[OpcodeIdx][TypeIdx] = S;
175 }
176
177 /// A SizeChangeStrategy for the common case where legalization for a
178 /// particular operation consists of only supporting a specific set of type
179 /// sizes. E.g.
180 /// setAction ({G_DIV, 0, LLT::scalar(32)}, Legal);
181 /// setAction ({G_DIV, 0, LLT::scalar(64)}, Legal);
182 /// setLegalizeScalarToDifferentSizeStrategy(
183 /// G_DIV, 0, unsupportedForDifferentSizes);
184 /// will result in getAction({G_DIV, 0, T}) to return Legal for s32 and s64,
185 /// and Unsupported for all other scalar types T.
186 static SizeAndActionsVec
187 unsupportedForDifferentSizes(const SizeAndActionsVec &v) {
188 return increaseToLargerTypesAndDecreaseToLargest(v, Unsupported,
189 Unsupported);
190 }
191
192 /// A SizeChangeStrategy for the common case where legalization for a
193 /// particular operation consists of widening the type to a large legal type,
194 /// unless there is no such type and then instead it should be narrowed to the
195 /// largest legal type.
196 static SizeAndActionsVec
197 widenToLargerTypesAndNarrowToLargest(const SizeAndActionsVec &v) {
198 assert(v.size() > 0 &&
199 "At least one size that can be legalized towards is needed"
200 " for this SizeChangeStrategy");
201 return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
202 NarrowScalar);
203 }
204
205 static SizeAndActionsVec
206 widenToLargerTypesUnsupportedOtherwise(const SizeAndActionsVec &v) {
207 return increaseToLargerTypesAndDecreaseToLargest(v, WidenScalar,
208 Unsupported);
209 }
210
211 static SizeAndActionsVec
212 narrowToSmallerAndUnsupportedIfTooSmall(const SizeAndActionsVec &v) {
213 return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
214 Unsupported);
215 }
216
217 static SizeAndActionsVec
218 narrowToSmallerAndWidenToSmallest(const SizeAndActionsVec &v) {
219 assert(v.size() > 0 &&
220 "At least one size that can be legalized towards is needed"
221 " for this SizeChangeStrategy");
222 return decreaseToSmallerTypesAndIncreaseToSmallest(v, NarrowScalar,
223 WidenScalar);
224 }
225
226 /// A SizeChangeStrategy for the common case where legalization for a
227 /// particular vector operation consists of having more elements in the
228 /// vector, to a type that is legal. Unless there is no such type and then
229 /// instead it should be legalized towards the widest vector that's still
230 /// legal. E.g.
231 /// setAction({G_ADD, LLT::vector(8, 8)}, Legal);
232 /// setAction({G_ADD, LLT::vector(16, 8)}, Legal);
233 /// setAction({G_ADD, LLT::vector(2, 32)}, Legal);
234 /// setAction({G_ADD, LLT::vector(4, 32)}, Legal);
235 /// setLegalizeVectorElementToDifferentSizeStrategy(
236 /// G_ADD, 0, moreToWiderTypesAndLessToWidest);
237 /// will result in the following getAction results:
238 /// * getAction({G_ADD, LLT::vector(8,8)}) returns
239 /// (Legal, vector(8,8)).
240 /// * getAction({G_ADD, LLT::vector(9,8)}) returns
241 /// (MoreElements, vector(16,8)).
242 /// * getAction({G_ADD, LLT::vector(8,32)}) returns
243 /// (FewerElements, vector(4,32)).
244 static SizeAndActionsVec
245 moreToWiderTypesAndLessToWidest(const SizeAndActionsVec &v) {
246 return increaseToLargerTypesAndDecreaseToLargest(v, MoreElements,
247 FewerElements);
248 }
249
250 /// Helper function to implement many typical SizeChangeStrategy functions.
251 static SizeAndActionsVec
252 increaseToLargerTypesAndDecreaseToLargest(const SizeAndActionsVec &v,
253 LegalizeAction IncreaseAction,
254 LegalizeAction DecreaseAction);
255 /// Helper function to implement many typical SizeChangeStrategy functions.
256 static SizeAndActionsVec
257 decreaseToSmallerTypesAndIncreaseToSmallest(const SizeAndActionsVec &v,
258 LegalizeAction DecreaseAction,
259 LegalizeAction IncreaseAction);
142260
143261 /// Determine what action should be taken to legalize the given generic
144262 /// instruction opcode, type-index and type. Requires computeTables to have
157275 std::tuple
158276 getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
159277
160 /// Iterate the given function (typically something like doubling the width)
161 /// on Ty until we find a legal type for this operation.
162 Optional findLegalizableSize(const InstrAspect &Aspect,
163 function_ref NextType) const {
164 if (Aspect.Idx >= Actions[Aspect.Opcode - FirstOp].size())
165 return None;
166
167 LegalizeAction Action;
168 const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
169 LLT Ty = Aspect.Type;
170 do {
171 Ty = NextType(Ty);
172 auto ActionIt = Map.find(Ty);
173 if (ActionIt == Map.end()) {
174 auto DefaultIt = DefaultActions.find(Aspect.Opcode);
175 if (DefaultIt == DefaultActions.end())
176 return None;
177 Action = DefaultIt->second;
178 } else
179 Action = ActionIt->second;
180 } while (needsLegalizingToDifferentSize(Action));
181 return Ty;
182 }
183
184 /// Find what type it's actually OK to perform the given operation on, given
185 /// the general approach we've decided to take.
186 Optional findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const;
187
188 std::pair findLegalAction(const InstrAspect &Aspect,
189 LegalizeAction Action) const {
190 auto LegalType = findLegalType(Aspect, Action);
191 if (!LegalType)
192 return std::make_pair(LegalizeAction::Unsupported, LLT());
193 return std::make_pair(Action, *LegalType);
194 }
195
196 /// Find the specified \p Aspect in the primary (explicitly set) Actions
197 /// table. Returns either the action the target requested or NotFound if there
198 /// was no setAction call.
199 LegalizeAction findInActions(const InstrAspect &Aspect) const {
200 if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
201 return NotFound;
202 if (Aspect.Idx >= Actions[Aspect.Opcode - FirstOp].size())
203 return NotFound;
204 const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
205 auto ActionIt = Map.find(Aspect.Type);
206 if (ActionIt == Map.end())
207 return NotFound;
208
209 return ActionIt->second;
210 }
211
212278 bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
213279
214280 virtual bool legalizeCustom(MachineInstr &MI,
216282 MachineIRBuilder &MIRBuilder) const;
217283
218284 private:
285 /// The SizeAndActionsVec is a representation mapping between all natural
286 /// numbers and an Action. The natural number represents the bit size of
287 /// the InstrAspect. For example, for a target with native support for 32-bit
288 /// and 64-bit additions, you'd express that as:
289 /// setScalarAction(G_ADD, 0,
290 /// {{1, WidenScalar}, // bit sizes [ 1, 31[
291 /// {32, Legal}, // bit sizes [32, 33[
292 /// {33, WidenScalar}, // bit sizes [33, 64[
293 /// {64, Legal}, // bit sizes [64, 65[
294 /// {65, NarrowScalar} // bit sizes [65, +inf[
295 /// });
296 /// It may be that only 64-bit pointers are supported on your target:
297 /// setPointerAction(G_GEP, 0, LLT:pointer(1),
298 /// {{1, Unsupported}, // bit sizes [ 1, 63[
299 /// {64, Legal}, // bit sizes [64, 65[
300 /// {65, Unsupported}, // bit sizes [65, +inf[
301 /// });
302 void setScalarAction(const unsigned Opcode, const unsigned TypeIndex,
303 const SizeAndActionsVec &SizeAndActions) {
304 const unsigned OpcodeIdx = Opcode - FirstOp;
305 SmallVector &Actions = ScalarActions[OpcodeIdx];
306 setActions(TypeIndex, Actions, SizeAndActions);
307 }
308 void setPointerAction(const unsigned Opcode, const unsigned TypeIndex,
309 const unsigned AddressSpace,
310 const SizeAndActionsVec &SizeAndActions) {
311 const unsigned OpcodeIdx = Opcode - FirstOp;
312 if (AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace) ==
313 AddrSpace2PointerActions[OpcodeIdx].end())
314 AddrSpace2PointerActions[OpcodeIdx][AddressSpace] = {{}};
315 SmallVector &Actions =
316 AddrSpace2PointerActions[OpcodeIdx].find(AddressSpace)->second;
317 setActions(TypeIndex, Actions, SizeAndActions);
318 }
319
320 /// If an operation on a given vector type (say ) isn't explicitly
321 /// specified, we proceed in 2 stages. First we legalize the underlying scalar
322 /// (so that there's at least one legal vector with that scalar), then we
323 /// adjust the number of elements in the vector so that it is legal. The
324 /// desired action in the first step is controlled by this function.
325 void setScalarInVectorAction(const unsigned Opcode, const unsigned TypeIndex,
326 const SizeAndActionsVec &SizeAndActions) {
327 unsigned OpcodeIdx = Opcode - FirstOp;
328 SmallVector &Actions =
329 ScalarInVectorActions[OpcodeIdx];
330 setActions(TypeIndex, Actions, SizeAndActions);
331 }
332
333 /// See also setScalarInVectorAction.
334 /// This function let's you specify the number of elements in a vector that
335 /// are legal for a legal element size.
336 void setVectorNumElementAction(const unsigned Opcode,
337 const unsigned TypeIndex,
338 const unsigned ElementSize,
339 const SizeAndActionsVec &SizeAndActions) {
340 const unsigned OpcodeIdx = Opcode - FirstOp;
341 if (NumElements2Actions[OpcodeIdx].find(ElementSize) ==
342 NumElements2Actions[OpcodeIdx].end())
343 NumElements2Actions[OpcodeIdx][ElementSize] = {{}};
344 SmallVector &Actions =
345 NumElements2Actions[OpcodeIdx].find(ElementSize)->second;
346 setActions(TypeIndex, Actions, SizeAndActions);
347 }
348
349 /// A partial SizeAndActionsVec potentially doesn't cover all bit sizes,
350 /// i.e. it's OK if it doesn't start from size 1.
351 static void checkPartialSizeAndActionsVector(const SizeAndActionsVec& v) {
352 #ifndef NDEBUG
353 // The sizes should be in increasing order
354 int prev_size = -1;
355 for(auto SizeAndAction: v) {
356 assert(SizeAndAction.first > prev_size);
357 prev_size = SizeAndAction.first;
358 }
359 // - for every Widen action, there should be a larger bitsize that
360 // can be legalized towards (e.g. Legal, Lower, Libcall or Custom
361 // action).
362 // - for every Narrow action, there should be a smaller bitsize that
363 // can be legalized towards.
364 int SmallestNarrowIdx = -1;
365 int LargestWidenIdx = -1;
366 int SmallestLegalizableToSameSizeIdx = -1;
367 int LargestLegalizableToSameSizeIdx = -1;
368 for(size_t i=0; i
369 switch (v[i].second) {
370 case FewerElements:
371 case NarrowScalar:
372 if (SmallestNarrowIdx == -1)
373 SmallestNarrowIdx = i;
374 break;
375 case WidenScalar:
376 case MoreElements:
377 LargestWidenIdx = i;
378 break;
379 case Unsupported:
380 break;
381 default:
382 if (SmallestLegalizableToSameSizeIdx == -1)
383 SmallestLegalizableToSameSizeIdx = i;
384 LargestLegalizableToSameSizeIdx = i;
385 }
386 }
387 if (SmallestNarrowIdx != -1) {
388 assert(SmallestLegalizableToSameSizeIdx != -1);
389 assert(SmallestNarrowIdx > SmallestLegalizableToSameSizeIdx);
390 }
391 if (LargestWidenIdx != -1)
392 assert(LargestWidenIdx < LargestLegalizableToSameSizeIdx);
393 #endif
394 }
395
396 /// A full SizeAndActionsVec must cover all bit sizes, i.e. must start with
397 /// from size 1.
398 static void checkFullSizeAndActionsVector(const SizeAndActionsVec& v) {
399 #ifndef NDEBUG
400 // Data structure invariant: The first bit size must be size 1.
401 assert(v.size() >= 1);
402 assert(v[0].first == 1);
403 checkPartialSizeAndActionsVector(v);
404 #endif
405 }
406
407 /// Sets actions for all bit sizes on a particular generic opcode, type
408 /// index and scalar or pointer type.
409 void setActions(unsigned TypeIndex,
410 SmallVector &Actions,
411 const SizeAndActionsVec &SizeAndActions) {
412 checkFullSizeAndActionsVector(SizeAndActions);
413 if (Actions.size() <= TypeIndex)
414 Actions.resize(TypeIndex + 1);
415 Actions[TypeIndex] = SizeAndActions;
416 }
417
418 static SizeAndAction findAction(const SizeAndActionsVec &Vec,
419 const uint32_t Size);
420
421 /// Returns the next action needed to get the scalar or pointer type closer
422 /// to being legal
423 /// E.g. findLegalAction({G_REM, 13}) should return
424 /// (WidenScalar, 32). After that, findLegalAction({G_REM, 32}) will
425 /// probably be called, which should return (Lower, 32).
426 /// This is assuming the setScalarAction on G_REM was something like:
427 /// setScalarAction(G_REM, 0,
428 /// {{1, WidenScalar}, // bit sizes [ 1, 31[
429 /// {32, Lower}, // bit sizes [32, 33[
430 /// {33, NarrowScalar} // bit sizes [65, +inf[
431 /// });
432 std::pair
433 findScalarLegalAction(const InstrAspect &Aspect) const;
434
435 /// Returns the next action needed towards legalizing the vector type.
436 std::pair
437 findVectorLegalAction(const InstrAspect &Aspect) const;
438
219439 static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START;
220440 static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
221441
222 using TypeMap = DenseMap;
223 using SIVActionMap = DenseMap, LegalizeAction>;
224
225 SmallVector Actions[LastOp - FirstOp + 1];
226 SIVActionMap ScalarInVectorActions;
227 DenseMap, uint16_t> MaxLegalVectorElts;
228 DenseMap DefaultActions;
229
230 bool TablesInitialized = false;
442 // Data structures used temporarily during construction of legality data:
443 typedef DenseMap TypeMap;
444 SmallVector SpecifiedActions[LastOp - FirstOp + 1];
445 SmallVector
446 ScalarSizeChangeStrategies[LastOp - FirstOp + 1];
447 SmallVector
448 VectorElementSizeChangeStrategies[LastOp - FirstOp + 1];
449 bool TablesInitialized;
450
451 // Data structures used by getAction:
452 SmallVector ScalarActions[LastOp - FirstOp + 1];
453 SmallVector ScalarInVectorActions[LastOp - FirstOp + 1];
454 std::unordered_map>
455 AddrSpace2PointerActions[LastOp - FirstOp + 1];
456 std::unordered_map>
457 NumElements2Actions[LastOp - FirstOp + 1];
231458 };
232459
233 } // end namespace llvm
460 } // end namespace llvm.
234461
235462 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
9595 using probability_iterator = std::vector::iterator;
9696 using const_probability_iterator =
9797 std::vector::const_iterator;
98
99 Optional IrrLoopHeaderWeight;
98100
99101 /// Keep track of the physical registers that are livein of the basicblock.
100102 using LiveInVector = std::vector;
728730 /// Return the MCSymbol for this basic block.
729731 MCSymbol *getSymbol() const;
730732
733 Optional getIrrLoopHeaderWeight() const {
734 return IrrLoopHeaderWeight;
735 }
736
737 void setIrrLoopHeaderWeight(uint64_t Weight) {
738 IrrLoopHeaderWeight = Weight;
739 }
740
731741 private:
732742 /// Return probability iterator corresponding to the I successor iterator.
733743 probability_iterator getProbabilityIterator(succ_iterator I);
6161 Optional getBlockProfileCount(const MachineBasicBlock *MBB) const;
6262 Optional getProfileCountFromFreq(uint64_t Freq) const;
6363
64 bool isIrrLoopHeader(const MachineBasicBlock *MBB);
65
6466 const MachineFunction *getFunction() const;
6567 const MachineBranchProbabilityInfo *getMBPI() const;
6668 void view(const Twine &Name, bool isSimple = true) const;
300300 return Operands[i];
301301 }
302302
303 /// Return true if operand \p OpIdx is a subregister index.
304 bool isOperandSubregIdx(unsigned OpIdx) const {
305 assert(getOperand(OpIdx).getType() == MachineOperand::MO_Immediate &&
306 "Expected MO_Immediate operand type.");
307 if (isExtractSubreg() && OpIdx == 2)
308 return true;
309 if (isInsertSubreg() && OpIdx == 3)
310 return true;
311 if (isRegSequence() && OpIdx > 1 && (OpIdx % 2) == 0)
312 return true;
313 if (isSubregToReg() && OpIdx == 3)
314 return true;
315 return false;
316 }
317
303318 /// Returns the number of non-implicit operands.
304319 unsigned getNumExplicitOperands() const;
305320
416416 /// shuffles.
417417 FunctionPass *createExpandReductionsPass();
418418
419 // This pass expands memcmp() to load/stores.
420 FunctionPass *createExpandMemCmpPass();
421
422 /// Creates CFI Instruction Inserter pass. \see CFIInstrInserter.cpp
423 FunctionPass *createCFIInstrInserter();
424
419425 } // End llvm namespace
420426
421427 #endif
1919 #include "llvm/CodeGen/DFAPacketizer.h"
2020 #include "llvm/CodeGen/ScheduleDAG.h"
2121 #include "llvm/CodeGen/SelectionDAGISel.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
2223 #include "llvm/MC/MCInstrItineraries.h"
23 #include "llvm/Target/TargetInstrInfo.h"
2424 #include "llvm/Target/TargetRegisterInfo.h"
2525
2626 namespace llvm {
1313 #include "llvm/ADT/SmallVector.h"
1414 #include "llvm/CodeGen/MachineInstr.h"
1515 #include "llvm/IR/CallingConv.h"
16 #include "llvm/MC/MCSymbol.h"
1617 #include "llvm/Support/Debug.h"
1718 #include
1819 #include
2425 class AsmPrinter;
2526 class MCExpr;
2627 class MCStreamer;
27 class MCSymbol;
2828 class raw_ostream;
2929 class TargetRegisterInfo;
3030
1616
1717 #include "llvm/ADT/DenseMap.h"
1818 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/STLExtras.h"
1920 #include "llvm/ADT/SetVector.h"
2021 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/STLExtras.h"
2222 #include "llvm/CodeGen/RegisterScavenging.h"
23 #include "llvm/CodeGen/TargetInstrInfo.h"
2324 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Target/TargetInstrInfo.h"
2525 #include "llvm/Target/TargetSubtargetInfo.h"
2626 #include
2727 #include
0 //===-- llvm/CodeGen/TargetFrameLowering.h ---------------------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Interface to describe the layout of a stack frame on the target machine.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_CODEGEN_TARGETFRAMELOWERING_H
14 #define LLVM_CODEGEN_TARGETFRAMELOWERING_H
15
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include
18 #include
19
20 namespace llvm {
21 class BitVector;
22 class CalleeSavedInfo;
23 class MachineFunction;
24 class RegScavenger;
25
26 /// Information about stack frame layout on the target. It holds the direction
27 /// of stack growth, the known stack alignment on entry to each function, and
28 /// the offset to the locals area.
29 ///
30 /// The offset to the local area is the offset from the stack pointer on
31 /// function entry to the first location where function data (local variables,
32 /// spill locations) can be stored.
33 class TargetFrameLowering {
34 public:
35 enum StackDirection {
36 StackGrowsUp, // Adding to the stack increases the stack address
37 StackGrowsDown // Adding to the stack decreases the stack address
38 };
39
40 // Maps a callee saved register to a stack slot with a fixed offset.
41 struct SpillSlot {
42 unsigned Reg;
43 int Offset; // Offset relative to stack pointer on function entry.
44 };
45 private:
46 StackDirection StackDir;
47 unsigned StackAlignment;
48 unsigned TransientStackAlignment;
49 int LocalAreaOffset;
50 bool StackRealignable;
51 public:
52 TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
53 unsigned TransAl = 1, bool StackReal = true)
54 : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
55 LocalAreaOffset(LAO), StackRealignable(StackReal) {}
56
57 virtual ~TargetFrameLowering();
58
59 // These methods return information that describes the abstract stack layout
60 // of the target machine.
61
62 /// getStackGrowthDirection - Return the direction the stack grows
63 ///
64 StackDirection getStackGrowthDirection() const { return StackDir; }
65
66 /// getStackAlignment - This method returns the number of bytes to which the
67 /// stack pointer must be aligned on entry to a function. Typically, this
68 /// is the largest alignment for any data object in the target.
69 ///
70 unsigned getStackAlignment() const { return StackAlignment; }
71
72 /// alignSPAdjust - This method aligns the stack adjustment to the correct
73 /// alignment.
74 ///
75 int alignSPAdjust(int SPAdj) const {
76 if (SPAdj < 0) {
77 SPAdj = -alignTo(-SPAdj, StackAlignment);
78 } else {
79 SPAdj = alignTo(SPAdj, StackAlignment);
80 }
81 return SPAdj;
82 }
83
84 /// getTransientStackAlignment - This method returns the number of bytes to
85 /// which the stack pointer must be aligned at all times, even between
86 /// calls.
87 ///
88 unsigned getTransientStackAlignment() const {
89 return TransientStackAlignment;
90 }
91
92 /// isStackRealignable - This method returns whether the stack can be
93 /// realigned.
94 bool isStackRealignable() const {
95 return StackRealignable;
96 }
97
98 /// Return the skew that has to be applied to stack alignment under
99 /// certain conditions (e.g. stack was adjusted before function \p MF
100 /// was called).
101 virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
102
103 /// getOffsetOfLocalArea - This method returns the offset of the local area
104 /// from the stack pointer on entrance to a function.
105 ///
106 int getOffsetOfLocalArea() const { return LocalAreaOffset; }
107
108 /// isFPCloseToIncomingSP - Return true if the frame pointer is close to
109 /// the incoming stack pointer, false if it is close to the post-prologue
110 /// stack pointer.
111 virtual bool isFPCloseToIncomingSP() const { return true; }
112
113 /// assignCalleeSavedSpillSlots - Allows target to override spill slot
114 /// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
115 /// assign frame slots to all CSI entries and return true. If this method
116 /// returns false, spill slots will be assigned using generic implementation.
117 /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of
118 /// CSI.
119 virtual bool
120 assignCalleeSavedSpillSlots(MachineFunction &MF,
121 const TargetRegisterInfo *TRI,
122 std::vector &CSI) const {
123 return false;
124 }
125
126 /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
127 /// pairs, that contains an entry for each callee saved register that must be
128 /// spilled to a particular stack location if it is spilled.
129 ///
130 /// Each entry in this array contains a pair, indicating the
131 /// fixed offset from the incoming stack pointer that each register should be
132 /// spilled at. If a register is not listed here, the code generator is
133 /// allowed to spill it anywhere it chooses.
134 ///
135 virtual const SpillSlot *
136 getCalleeSavedSpillSlots(unsigned &NumEntries) const {
137 NumEntries = 0;
138 return nullptr;
139 }
140
141 /// targetHandlesStackFrameRounding - Returns true if the target is
142 /// responsible for rounding up the stack frame (probably at emitPrologue
143 /// time).
144 virtual bool targetHandlesStackFrameRounding() const {
145 return false;
146 }
147
148 /// Returns true if the target will correctly handle shrink wrapping.
149 virtual bool enableShrinkWrapping(const MachineFunction &MF) const {
150 return false;
151 }
152
153 /// Returns true if the stack slot holes in the fixed and callee-save stack
154 /// area should be used when allocating other stack locations to reduce stack
155 /// size.
156 virtual bool enableStackSlotScavenging(const MachineFunction &MF) const {
157 return false;
158 }
159
160 /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
161 /// the function.
162 virtual void emitPrologue(MachineFunction &MF,
163 MachineBasicBlock &MBB) const = 0;
164 virtual void emitEpilogue(MachineFunction &MF,
165 MachineBasicBlock &MBB) const = 0;
166
167 /// Replace a StackProbe stub (if any) with the actual probe code inline
168 virtual void inlineStackProbe(MachineFunction &MF,
169 MachineBasicBlock &PrologueMBB) const {}
170
171 /// Adjust the prologue to have the function use segmented stacks. This works
172 /// by adding a check even before the "normal" function prologue.
173 virtual void adjustForSegmentedStacks(MachineFunction &MF,
174 MachineBasicBlock &PrologueMBB) const {}
175
176 /// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
177 /// the assembly prologue to explicitly handle the stack.
178 virtual void adjustForHiPEPrologue(MachineFunction &MF,
179 MachineBasicBlock &PrologueMBB) const {}
180
181 /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
182 /// saved registers and returns true if it isn't possible / profitable to do
183 /// so by issuing a series of store instructions via
184 /// storeRegToStackSlot(). Returns false otherwise.
185 virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
186 MachineBasicBlock::iterator MI,
187 const std::vector &CSI,
188 const TargetRegisterInfo *TRI) const {
189 return false;
190 }
191
192 /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
193 /// saved registers and returns true if it isn't possible / profitable to do
194 /// so by issuing a series of load instructions via loadRegToStackSlot().
195 /// If it returns true, and any of the registers in CSI is not restored,
196 /// it sets the corresponding Restored flag in CSI to false.
197 /// Returns false otherwise.
198 virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
199 MachineBasicBlock::iterator MI,
200 std::vector &CSI,
201 const TargetRegisterInfo *TRI) const {
202 return false;
203 }
204
205 /// Return true if the target needs to disable frame pointer elimination.
206 virtual bool noFramePointerElim(const MachineFunction &MF) const;
207
208 /// hasFP - Return true if the specified function should have a dedicated
209 /// frame pointer register. For most targets this is true only if the function
210 /// has variable sized allocas or if frame pointer elimination is disabled.
211 virtual bool hasFP(const MachineFunction &MF) const = 0;
212
213 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
214 /// not required, we reserve argument space for call sites in the function
215 /// immediately on entry to the current function. This eliminates the need for
216 /// add/sub sp brackets around call sites. Returns true if the call frame is
217 /// included as part of the stack frame.
218 virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
219 return !hasFP(MF);
220 }
221
222 /// canSimplifyCallFramePseudos - When possible, it's best to simplify the
223 /// call frame pseudo ops before doing frame index elimination. This is
224 /// possible only when frame index references between the pseudos won't
225 /// need adjusting for the call frame adjustments. Normally, that's true
226 /// if the function has a reserved call frame or a frame pointer. Some
227 /// targets (Thumb2, for example) may have more complicated criteria,
228 /// however, and can override this behavior.
229 virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
230 return hasReservedCallFrame(MF) || hasFP(MF);
231 }
232
233 // needsFrameIndexResolution - Do we need to perform FI resolution for
234 // this function. Normally, this is required only when the function
235 // has any stack objects. However, targets may want to override this.
236 virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
237
238 /// getFrameIndexReference - This method should return the base register
239 /// and offset used to reference a frame index location. The offset is
240 /// returned directly, and the base register is returned via FrameReg.
241 virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
242 unsigned &FrameReg) const;
243
244 /// Same as \c getFrameIndexReference, except that the stack pointer (as
245 /// opposed to the frame pointer) will be the preferred value for \p
246 /// FrameReg. This is generally used for emitting statepoint or EH tables that
247 /// use offsets from RSP. If \p IgnoreSPUpdates is true, the returned
248 /// offset is only guaranteed to be valid with respect to the value of SP at
249 /// the end of the prologue.
250 virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
251 unsigned &FrameReg,
252 bool IgnoreSPUpdates) const {
253 // Always safe to dispatch to getFrameIndexReference.
254 return getFrameIndexReference(MF, FI, FrameReg);
255 }
256
257 /// This method determines which of the registers reported by
258 /// TargetRegisterInfo::getCalleeSavedRegs() should actually get saved.
259 /// The default implementation checks populates the \p SavedRegs bitset with
260 /// all registers which are modified in the function, targets may override
261 /// this function to save additional registers.
262 /// This method also sets up the register scavenger ensuring there is a free
263 /// register or a frameindex available.
264 virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
265 RegScavenger *RS = nullptr) const;
266
267 /// processFunctionBeforeFrameFinalized - This method is called immediately
268 /// before the specified function's frame layout (MF.getFrameInfo()) is
269 /// finalized. Once the frame is finalized, MO_FrameIndex operands are
270 /// replaced with direct constants. This method is optional.
271 ///
272 virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF,
273 RegScavenger *RS = nullptr) const {
274 }
275
276 virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
277 report_fatal_error("WinEH not implemented for this target");
278 }
279
280 /// This method is called during prolog/epilog code insertion to eliminate
281 /// call frame setup and destroy pseudo instructions (but only if the Target
282 /// is using them). It is responsible for eliminating these instructions,
283 /// replacing them with concrete instructions. This method need only be
284 /// implemented if using call frame setup/destroy pseudo instructions.
285 /// Returns an iterator pointing to the instruction after the replaced one.
286 virtual MachineBasicBlock::iterator
287 eliminateCallFramePseudoInstr(MachineFunction &MF,
288 MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator MI) const {
290 llvm_unreachable("Call Frame Pseudo Instructions do not exist on this "
291 "target!");
292 }
293
294
295 /// Order the symbols in the local stack frame.
296 /// The list of objects that we want to order is in \p objectsToAllocate as
297 /// indices into the MachineFrameInfo. The array can be reordered in any way
298 /// upon return. The contents of the array, however, may not be modified (i.e.
299 /// only their order may be changed).
300 /// By default, just maintain the original order.
301 virtual void
302 orderFrameObjects(const MachineFunction &MF,
303 SmallVectorImpl &objectsToAllocate) const {
304 }
305
306 /// Check whether or not the given \p MBB can be used as a prologue
307 /// for the target.
308 /// The prologue will be inserted first in this basic block.
309 /// This method is used by the shrink-wrapping pass to decide if
310 /// \p MBB will be correctly handled by the target.
311 /// As soon as the target enable shrink-wrapping without overriding
312 /// this method, we assume that each basic block is a valid
313 /// prologue.
314 virtual bool canUseAsPrologue(const MachineBasicBlock &MBB) const {
315 return true;
316 }
317
318 /// Check whether or not the given \p MBB can be used as a epilogue
319 /// for the target.
320 /// The epilogue will be inserted before the first terminator of that block.
321 /// This method is used by the shrink-wrapping pass to decide if
322 /// \p MBB will be correctly handled by the target.
323 /// As soon as the target enable shrink-wrapping without overriding
324 /// this method, we assume that each basic block is a valid
325 /// epilogue.
326 virtual bool canUseAsEpilogue(const MachineBasicBlock &MBB) const {
327 return true;
328 }
329
330 /// Check if given function is safe for not having callee saved registers.
331 /// This is used when interprocedural register allocation is enabled.
332 static bool isSafeForNoCSROpt(const Function *F) {
333 if (!F->hasLocalLinkage() || F->hasAddressTaken() ||
334 !F->hasFnAttribute(Attribute::NoRecurse))
335 return false;
336 // Function should not be optimized as tail call.
337 for (const User *U : F->users())
338 if (auto CS = ImmutableCallSite(U))
339 if (CS.isTailCall())
340 return false;
341 return true;
342 }
343
344 /// Return initial CFA offset value i.e. the one valid at the beginning of the
345 /// function (before any stack operations).
346 virtual int getInitialCFAOffset(const MachineFunction &MF) const;
347
348 /// Return initial CFA register value i.e. the one valid at the beginning of
349 /// the function (before any stack operations).
350 virtual unsigned getInitialCFARegister(const MachineFunction &MF) const;
351 };
352
353 } // End llvm namespace
354
355 #endif
0 //===- llvm/CodeGen/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file describes the target machine instruction set to the code generator.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_TARGET_TARGETINSTRINFO_H
14 #define LLVM_TARGET_TARGETINSTRINFO_H
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseMapInfo.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineCombinerPattern.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineLoopInfo.h"
26 #include "llvm/CodeGen/MachineOperand.h"
27 #include "llvm/CodeGen/PseudoSourceValue.h"
28 #include "llvm/MC/MCInstrInfo.h"
29 #include "llvm/Support/BranchProbability.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include
32 #include
33 #include
34 #include
35 #include
36
37 namespace llvm {
38
39 class DFAPacketizer;
40 class InstrItineraryData;
41 class LiveVariables;
42 class MachineMemOperand;
43 class MachineRegisterInfo;
44 class MCAsmInfo;
45 class MCInst;
46 struct MCSchedModel;
47 class Module;
48 class ScheduleDAG;
49 class ScheduleHazardRecognizer;
50 class SDNode;
51 class SelectionDAG;
52 class RegScavenger;
53 class TargetRegisterClass;
54 class TargetRegisterInfo;
55 class TargetSchedModel;
56 class TargetSubtargetInfo;
57
58 template class SmallVectorImpl;
59
60 //---------------------------------------------------------------------------
61 ///
62 /// TargetInstrInfo - Interface to description of machine instruction set
63 ///
64 class TargetInstrInfo : public MCInstrInfo {
65 public:
66 TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
67 unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u)
68 : CallFrameSetupOpcode(CFSetupOpcode),
69 CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode),
70 ReturnOpcode(ReturnOpcode) {}
71 TargetInstrInfo(const TargetInstrInfo &) = delete;
72 TargetInstrInfo &operator=(const TargetInstrInfo &) = delete;
73 virtual ~TargetInstrInfo();
74
75 static bool isGenericOpcode(unsigned Opc) {
76 return Opc <= TargetOpcode::GENERIC_OP_END;
77 }
78
79 /// Given a machine instruction descriptor, returns the register
80 /// class constraint for OpNum, or NULL.
81 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
82 const TargetRegisterInfo *TRI,
83 const MachineFunction &MF) const;
84
85 /// Return true if the instruction is trivially rematerializable, meaning it
86 /// has no side effects and requires no operands that aren't always available.
87 /// This means the only allowed uses are constants and unallocatable physical
88 /// registers so that the instructions result is independent of the place
89 /// in the function.
90 bool isTriviallyReMaterializable(const MachineInstr &MI,
91 AliasAnalysis *AA = nullptr) const {
92 return MI.getOpcode() == TargetOpcode::IMPLICIT_DEF ||
93 (MI.getDesc().isRematerializable() &&
94 (isReallyTriviallyReMaterializable(MI, AA) ||
95 isReallyTriviallyReMaterializableGeneric(MI, AA)));
96 }
97
98 protected:
99 /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
100 /// set, this hook lets the target specify whether the instruction is actually
101 /// trivially rematerializable, taking into consideration its operands. This
102 /// predicate must return false if the instruction has any side effects other
103 /// than producing a value, or if it requres any address registers that are
104 /// not always available.
105 /// Requirements must be check as stated in isTriviallyReMaterializable() .
106 virtual bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
107 AliasAnalysis *AA) const {
108 return false;
109 }
110
111 /// This method commutes the operands of the given machine instruction MI.
112 /// The operands to be commuted are specified by their indices OpIdx1 and
113 /// OpIdx2.
114 ///
115 /// If a target has any instructions that are commutable but require
116 /// converting to different instructions or making non-trivial changes
117 /// to commute them, this method can be overloaded to do that.
118 /// The default implementation simply swaps the commutable operands.
119 ///
120 /// If NewMI is false, MI is modified in place and returned; otherwise, a
121 /// new machine instruction is created and returned.
122 ///
123 /// Do not call this method for a non-commutable instruction.
124 /// Even though the instruction is commutable, the method may still
125 /// fail to commute the operands, null pointer is returned in such cases.
126 virtual MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
127 unsigned OpIdx1,
128 unsigned OpIdx2) const;
129
130 /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable
131 /// operand indices to (ResultIdx1, ResultIdx2).
132 /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be
133 /// predefined to some indices or be undefined (designated by the special
134 /// value 'CommuteAnyOperandIndex').
135 /// The predefined result indices cannot be re-defined.
136 /// The function returns true iff after the result pair redefinition
137 /// the fixed result pair is equal to or equivalent to the source pair of
138 /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
139 /// the pairs (x,y) and (y,x) are equivalent.
140 static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2,
141 unsigned CommutableOpIdx1,
142 unsigned CommutableOpIdx2);
143
144 private:
145 /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
146 /// set and the target hook isReallyTriviallyReMaterializable returns false,
147 /// this function does target-independent tests to determine if the
148 /// instruction is really trivially rematerializable.
149 bool isReallyTriviallyReMaterializableGeneric(const MachineInstr &MI,
150 AliasAnalysis *AA) const;
151
152 public:
153 /// These methods return the opcode of the frame setup/destroy instructions
154 /// if they exist (-1 otherwise). Some targets use pseudo instructions in
155 /// order to abstract away the difference between operating with a frame
156 /// pointer and operating without, through the use of these two instructions.
157 ///
158 unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
159 unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
160
161 /// Returns true if the argument is a frame pseudo instruction.
162 bool isFrameInstr(const MachineInstr &I) const {
163 return I.getOpcode() == getCallFrameSetupOpcode() ||
164 I.getOpcode() == getCallFrameDestroyOpcode();
165 }
166
167 /// Returns true if the argument is a frame setup pseudo instruction.
168 bool isFrameSetup(const MachineInstr &I) const {
169 return I.getOpcode() == getCallFrameSetupOpcode();
170 }
171
172 /// Returns size of the frame associated with the given frame instruction.
173 /// For frame setup instruction this is frame that is set up space set up
174 /// after the instruction. For frame destroy instruction this is the frame
175 /// freed by the caller.
176 /// Note, in some cases a call frame (or a part of it) may be prepared prior
177 /// to the frame setup instruction. It occurs in the calls that involve
178 /// inalloca arguments. This function reports only the size of the frame part
179 /// that is set up between the frame setup and destroy pseudo instructions.
180 int64_t getFrameSize(const MachineInstr &I) const {
181 assert(isFrameInstr(I) && "Not a frame instruction");
182 assert(I.getOperand(0).getImm() >= 0);
183 return I.getOperand(0).getImm();
184 }
185
186 /// Returns the total frame size, which is made up of the space set up inside
187 /// the pair of frame start-stop instructions and the space that is set up
188 /// prior to the pair.
189 int64_t getFrameTotalSize(const MachineInstr &I) const {
190 if (isFrameSetup(I)) {
191 assert(I.getOperand(1).getImm() >= 0 &&
192 "Frame size must not be negative");
193 return getFrameSize(I) + I.getOperand(1).getImm();
194 }
195 return getFrameSize(I);
196 }
197
198 unsigned getCatchReturnOpcode() const { return CatchRetOpcode; }
199 unsigned getReturnOpcode() const { return ReturnOpcode; }
200
201 /// Returns the actual stack pointer adjustment made by an instruction
202 /// as part of a call sequence. By default, only call frame setup/destroy
203 /// instructions adjust the stack, but targets may want to override this
204 /// to enable more fine-grained adjustment, or adjust by a different value.
205 virtual int getSPAdjust(const MachineInstr &MI) const;
206
207 /// Return true if the instruction is a "coalescable" extension instruction.
208 /// That is, it's like a copy where it's legal for the source to overlap the
209 /// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's
210 /// expected the pre-extension value is available as a subreg of the result
211 /// register. This also returns the sub-register index in SubIdx.
212 virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
213 unsigned &DstReg, unsigned &SubIdx) const {
214 return false;
215 }
216
217 /// If the specified machine instruction is a direct
218 /// load from a stack slot, return the virtual or physical register number of
219 /// the destination along with the FrameIndex of the loaded stack slot. If
220 /// not, return 0. This predicate must return 0 if the instruction has
221 /// any side effects other than loading from the stack slot.
222 virtual unsigned isLoadFromStackSlot(const MachineInstr &MI,
223 int &FrameIndex) const {
224 return 0;
225 }
226
227 /// Check for post-frame ptr elimination stack locations as well.
228 /// This uses a heuristic so it isn't reliable for correctness.
229 virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr &MI,
230 int &FrameIndex) const {
231 return 0;
232 }
233
234 /// If the specified machine instruction has a load from a stack slot,
235 /// return true along with the FrameIndex of the loaded stack slot and the
236 /// machine mem operand containing the reference.
237 /// If not, return false. Unlike isLoadFromStackSlot, this returns true for
238 /// any instructions that loads from the stack. This is just a hint, as some
239 /// cases may be missed.
240 virtual bool hasLoadFromStackSlot(const MachineInstr &MI,
241 const MachineMemOperand *&MMO,
242 int &FrameIndex) const;
243
244 /// If the specified machine instruction is a direct
245 /// store to a stack slot, return the virtual or physical register number of
246 /// the source reg along with the FrameIndex of the loaded stack slot. If
247 /// not, return 0. This predicate must return 0 if the instruction has
248 /// any side effects other than storing to the stack slot.
249 virtual unsigned isStoreToStackSlot(const MachineInstr &MI,
250 int &FrameIndex) const {
251 return 0;
252 }
253
254 /// Check for post-frame ptr elimination stack locations as well.
255 /// This uses a heuristic, so it isn't reliable for correctness.
256 virtual unsigned isStoreToStackSlotPostFE(const MachineInstr &MI,
257 int &FrameIndex) const {
258 return 0;
259 }
260
261 /// If the specified machine instruction has a store to a stack slot,
262 /// return true along with the FrameIndex of the loaded stack slot and the
263 /// machine mem operand containing the reference.
264 /// If not, return false. Unlike isStoreToStackSlot,
265 /// this returns true for any instructions that stores to the
266 /// stack. This is just a hint, as some cases may be missed.
267 virtual bool hasStoreToStackSlot(const MachineInstr &MI,
268 const MachineMemOperand *&MMO,
269 int &FrameIndex) const;
270
271 /// Return true if the specified machine instruction
272 /// is a copy of one stack slot to another and has no other effect.
273 /// Provide the identity of the two frame indices.
274 virtual bool isStackSlotCopy(const MachineInstr &MI, int &DestFrameIndex,
275 int &SrcFrameIndex) const {
276 return false;
277 }
278
279 /// Compute the size in bytes and offset within a stack slot of a spilled
280 /// register or subregister.
281 ///
282 /// \param [out] Size in bytes of the spilled value.
283 /// \param [out] Offset in bytes within the stack slot.
284 /// \returns true if both Size and Offset are successfully computed.
285 ///
286 /// Not all subregisters have computable spill slots. For example,
287 /// subregisters registers may not be byte-sized, and a pair of discontiguous
288 /// subregisters has no single offset.
289 ///
290 /// Targets with nontrivial bigendian implementations may need to override
291 /// this, particularly to support spilled vector registers.
292 virtual bool getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx,
293 unsigned &Size, unsigned &Offset,
294 const MachineFunction &MF) const;
295
296 /// Returns the size in bytes of the specified MachineInstr, or ~0U
297 /// when this function is not implemented by a target.
298 virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const {
299 return ~0U;
300 }
301
302 /// Return true if the instruction is as cheap as a move instruction.
303 ///
304 /// Targets for different archs need to override this, and different
305 /// micro-architectures can also be finely tuned inside.
306 virtual bool isAsCheapAsAMove(const MachineInstr &MI) const {
307 return MI.isAsCheapAsAMove();
308 }
309
310 /// Return true if the instruction should be sunk by MachineSink.
311 ///
312 /// MachineSink determines on its own whether the instruction is safe to sink;
313 /// this gives the target a hook to override the default behavior with regards
314 /// to which instructions should be sunk.
315 virtual bool shouldSink(const MachineInstr &MI) const { return true; }
316
317 /// Re-issue the specified 'original' instruction at the
318 /// specific location targeting a new destination register.
319 /// The register in Orig->getOperand(0).getReg() will be substituted by
320 /// DestReg:SubIdx. Any existing subreg index is preserved or composed with
321 /// SubIdx.
322 virtual void reMaterialize(MachineBasicBlock &MBB,
323 MachineBasicBlock::iterator MI, unsigned DestReg,
324 unsigned SubIdx, const MachineInstr &Orig,
325 const TargetRegisterInfo &TRI) const;
326
327 /// \brief Clones instruction or the whole instruction bundle \p Orig and
328 /// insert into \p MBB before \p InsertBefore. The target may update operands
329 /// that are required to be unique.
330 ///
331 /// \p Orig must not return true for MachineInstr::isNotDuplicable().
332 virtual MachineInstr &duplicate(MachineBasicBlock &MBB,
333 MachineBasicBlock::iterator InsertBefore,
334 const MachineInstr &Orig) const;
335
336 /// This method must be implemented by targets that
337 /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
338 /// may be able to convert a two-address instruction into one or more true
339 /// three-address instructions on demand. This allows the X86 target (for
340 /// example) to convert ADD and SHL instructions into LEA instructions if they
341 /// would require register copies due to two-addressness.
342 ///
343 /// This method returns a null pointer if the transformation cannot be
344 /// performed, otherwise it returns the last new instruction.
345 ///
346 virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
347 MachineInstr &MI,
348 LiveVariables *LV) const {
349 return nullptr;
350 }
351
352 // This constant can be used as an input value of operand index passed to
353 // the method findCommutedOpIndices() to tell the method that the
354 // corresponding operand index is not pre-defined and that the method
355 // can pick any commutable operand.
356 static const unsigned CommuteAnyOperandIndex = ~0U;
357
358 /// This method commutes the operands of the given machine instruction MI.
359 ///
360 /// The operands to be commuted are specified by their indices OpIdx1 and
361 /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value
362 /// 'CommuteAnyOperandIndex', which means that the method is free to choose
363 /// any arbitrarily chosen commutable operand. If both arguments are set to
364 /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable
365 /// operands; then commutes them if such operands could be found.
366 ///
367 /// If NewMI is false, MI is modified in place and returned; otherwise, a
368 /// new machine instruction is created and returned.
369 ///
370 /// Do not call this method for a non-commutable instruction or
371 /// for non-commuable operands.
372 /// Even though the instruction is commutable, the method may still
373 /// fail to commute the operands, null pointer is returned in such cases.
374 MachineInstr *
375 commuteInstruction(MachineInstr &MI, bool NewMI = false,
376 unsigned OpIdx1 = CommuteAnyOperandIndex,
377 unsigned OpIdx2 = CommuteAnyOperandIndex) const;
378
379 /// Returns true iff the routine could find two commutable operands in the
380 /// given machine instruction.
381 /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments.
382 /// If any of the INPUT values is set to the special value
383 /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable
384 /// operand, then returns its index in the corresponding argument.
385 /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method
386 /// looks for 2 commutable operands.
387 /// If INPUT values refer to some operands of MI, then the method simply
388 /// returns true if the corresponding operands are commutable and returns
389 /// false otherwise.
390 ///
391 /// For example, calling this method this way:
392 /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
393 /// findCommutedOpIndices(MI, Op1, Op2);
394 /// can be interpreted as a query asking to find an operand that would be
395 /// commutable with the operand#1.
396 virtual bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
397 unsigned &SrcOpIdx2) const;
398
399 /// A pair composed of a register and a sub-register index.
400 /// Used to give some type checking when modeling Reg:SubReg.
401 struct RegSubRegPair {
402 unsigned Reg;
403 unsigned SubReg;
404
405 RegSubRegPair(unsigned Reg = 0, unsigned SubReg = 0)
406 : Reg(Reg), SubReg(SubReg) {}
407 };
408
409 /// A pair composed of a pair of a register and a sub-register index,
410 /// and another sub-register index.
411 /// Used to give some type checking when modeling Reg:SubReg1, SubReg2.
412 struct RegSubRegPairAndIdx : RegSubRegPair {
413 unsigned SubIdx;
414
415 RegSubRegPairAndIdx(unsigned Reg = 0, unsigned SubReg = 0,
416 unsigned SubIdx = 0)
417 : RegSubRegPair(Reg, SubReg), SubIdx(SubIdx) {}
418 };
419
420 /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI
421 /// and \p DefIdx.
422 /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of
423 /// the list is modeled as .
424 /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce
425 /// two elements:
426 /// - vreg1:sub1, sub0
427 /// - vreg2<:0>, sub1
428 ///
429 /// \returns true if it is possible to build such an input sequence
430 /// with the pair \p MI, \p DefIdx. False otherwise.
431 ///
432 /// \pre MI.isRegSequence() or MI.isRegSequenceLike().
433 ///
434 /// \note The generic implementation does not provide any support for
435 /// MI.isRegSequenceLike(). In other words, one has to override
436 /// getRegSequenceLikeInputs for target specific instructions.
437 bool
438 getRegSequenceInputs(const MachineInstr &MI, unsigned DefIdx,
439 SmallVectorImpl &InputRegs) const;
440
441 /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI
442 /// and \p DefIdx.
443 /// \p [out] InputReg of the equivalent EXTRACT_SUBREG.
444 /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce:
445 /// - vreg1:sub1, sub0
446 ///
447 /// \returns true if it is possible to build such an input sequence
448 /// with the pair \p MI, \p DefIdx. False otherwise.
449 ///
450 /// \pre MI.isExtractSubreg() or MI.isExtractSubregLike().
451 ///
452 /// \note The generic implementation does not provide any support for
453 /// MI.isExtractSubregLike(). In other words, one has to override
454 /// getExtractSubregLikeInputs for target specific instructions.
455 bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
456 RegSubRegPairAndIdx &InputReg) const;
457
458 /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
459 /// and \p DefIdx.
460 /// \p [out] BaseReg and \p [out] InsertedReg contain
461 /// the equivalent inputs of INSERT_SUBREG.
462 /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce:
463 /// - BaseReg: vreg0:sub0
464 /// - InsertedReg: vreg1:sub1, sub3
465 ///
466 /// \returns true if it is possible to build such an input sequence
467 /// with the pair \p MI, \p DefIdx. False otherwise.
468 ///
469 /// \pre MI.isInsertSubreg() or MI.isInsertSubregLike().
470 ///
471 /// \note The generic implementation does not provide any support for
472 /// MI.isInsertSubregLike(). In other words, one has to override
473 /// getInsertSubregLikeInputs for target specific instructions.
474 bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
475 RegSubRegPair &BaseReg,
476 RegSubRegPairAndIdx &InsertedReg) const;
477
478 /// Return true if two machine instructions would produce identical values.
479 /// By default, this is only true when the two instructions
480 /// are deemed identical except for defs. If this function is called when the
481 /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for
482 /// aggressive checks.
483 virtual bool produceSameValue(const MachineInstr &MI0,
484 const MachineInstr &MI1,
485 const MachineRegisterInfo *MRI = nullptr) const;
486
487 /// \returns true if a branch from an instruction with opcode \p BranchOpc
488 /// bytes is capable of jumping to a position \p BrOffset bytes away.
489 virtual bool isBranchOffsetInRange(unsigned BranchOpc,
490 int64_t BrOffset) const {
491 llvm_unreachable("target did not implement");
492 }
493
494 /// \returns The block that branch instruction \p MI jumps to.
495 virtual MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const {
496 llvm_unreachable("target did not implement");
497 }
498
499 /// Insert an unconditional indirect branch at the end of \p MBB to \p
500 /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to
501 /// the offset of the position to insert the new branch.
502 ///
503 /// \returns The number of bytes added to the block.
504 virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
505 MachineBasicBlock &NewDestBB,
506 const DebugLoc &DL,
507 int64_t BrOffset = 0,
508 RegScavenger *RS = nullptr) const {
509 llvm_unreachable("target did not implement");
510 }
511
512 /// Analyze the branching code at the end of MBB, returning
513 /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
514 /// implemented for a target). Upon success, this returns false and returns
515 /// with the following information in various cases:
516 ///
517 /// 1. If this block ends with no branches (it just falls through to its succ)
518 /// just return false, leaving TBB/FBB null.
519 /// 2. If this block ends with only an unconditional branch, it sets TBB to be
520 /// the destination block.
521 /// 3. If this block ends with a conditional branch and it falls through to a
522 /// successor block, it sets TBB to be the branch destination block and a
523 /// list of operands that evaluate the condition. These operands can be
524 /// passed to other TargetInstrInfo methods to create new branches.
525 /// 4. If this block ends with a conditional branch followed by an
526 /// unconditional branch, it returns the 'true' destination in TBB, the
527 /// 'false' destination in FBB, and a list of operands that evaluate the
528 /// condition. These operands can be passed to other TargetInstrInfo
529 /// methods to create new branches.
530 ///
531 /// Note that removeBranch and insertBranch must be implemented to support
532 /// cases where this method returns success.
533 ///
534 /// If AllowModify is true, then this routine is allowed to modify the basic
535 /// block (e.g. delete instructions after the unconditional branch).
536 ///
537 /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
538 /// before calling this function.
539 virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
540 MachineBasicBlock *&FBB,
541 SmallVectorImpl &Cond,
542 bool AllowModify = false) const {
543 return true;
544 }
545
546 /// Represents a predicate at the MachineFunction level. The control flow a
547 /// MachineBranchPredicate represents is:
548 ///
549 /// Reg = LHS `Predicate` RHS == ConditionDef
550 /// if Reg then goto TrueDest else goto FalseDest
551 ///
552 struct MachineBranchPredicate {
553 enum ComparePredicate {
554 PRED_EQ, // True if two values are equal
555 PRED_NE, // True if two values are not equal
556 PRED_INVALID // Sentinel value
557 };
558
559 ComparePredicate Predicate = PRED_INVALID;
560 MachineOperand LHS = MachineOperand::CreateImm(0);
561 MachineOperand RHS = MachineOperand::CreateImm(0);
562 MachineBasicBlock *TrueDest = nullptr;
563 MachineBasicBlock *FalseDest = nullptr;
564 MachineInstr *ConditionDef = nullptr;
565
566 /// SingleUseCondition is true if ConditionDef is dead except for the
567 /// branch(es) at the end of the basic block.
568 ///
569 bool SingleUseCondition = false;
570
571 explicit MachineBranchPredicate() = default;
572 };
573
574 /// Analyze the branching code at the end of MBB and parse it into the
575 /// MachineBranchPredicate structure if possible. Returns false on success
576 /// and true on failure.
577 ///
578 /// If AllowModify is true, then this routine is allowed to modify the basic
579 /// block (e.g. delete instructions after the unconditional branch).
580 ///
581 virtual bool analyzeBranchPredicate(MachineBasicBlock &MBB,
582 MachineBranchPredicate &MBP,
583 bool AllowModify = false) const {
584 return true;
585 }
586
587 /// Remove the branching code at the end of the specific MBB.
588 /// This is only invoked in cases where AnalyzeBranch returns success. It
589 /// returns the number of instructions that were removed.
590 /// If \p BytesRemoved is non-null, report the change in code size from the
591 /// removed instructions.
592 virtual unsigned removeBranch(MachineBasicBlock &MBB,
593 int *BytesRemoved = nullptr) const {
594 llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!");
595 }
596
597 /// Insert branch code into the end of the specified MachineBasicBlock. The
598 /// operands to this method are the same as those returned by AnalyzeBranch.
599 /// This is only invoked in cases where AnalyzeBranch returns success. It
600 /// returns the number of instructions inserted. If \p BytesAdded is non-null,
601 /// report the change in code size from the added instructions.
602 ///
603 /// It is also invoked by tail merging to add unconditional branches in
604 /// cases where AnalyzeBranch doesn't apply because there was no original
605 /// branch to analyze. At least this much must be implemented, else tail
606 /// merging needs to be disabled.
607 ///
608 /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
609 /// before calling this function.
610 virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
611 MachineBasicBlock *FBB,
612 ArrayRef Cond,
613 const DebugLoc &DL,
614 int *BytesAdded = nullptr) const {
615 llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!");
616 }
617
618 unsigned insertUnconditionalBranch(MachineBasicBlock &MBB,
619 MachineBasicBlock *DestBB,
620 const DebugLoc &DL,
621 int *BytesAdded = nullptr) const {
622 return insertBranch(MBB, DestBB, nullptr, ArrayRef(), DL,
623 BytesAdded);
624 }
625
626 /// Analyze the loop code, return true if it cannot be understoo. Upon
627 /// success, this function returns false and returns information about the
628 /// induction variable and compare instruction used at the end.
629 virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
630 MachineInstr *&CmpInst) const {
631 return true;
632 }
633
634 /// Generate code to reduce the loop iteration by one and check if the loop is
635 /// finished. Return the value/register of the the new loop count. We need
636 /// this function when peeling off one or more iterations of a loop. This
637 /// function assumes the nth iteration is peeled first.
638 virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
639 MachineInstr &Cmp,
640 SmallVectorImpl &Cond,
641 SmallVectorImpl &PrevInsts,
642 unsigned Iter, unsigned MaxIter) const {
643 llvm_unreachable("Target didn't implement ReduceLoopCount");
644 }
645
646 /// Delete the instruction OldInst and everything after it, replacing it with
647 /// an unconditional branch to NewDest. This is used by the tail merging pass.
648 virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
649 MachineBasicBlock *NewDest) const;
650
651 /// Return true if it's legal to split the given basic
652 /// block at the specified instruction (i.e. instruction would be the start
653 /// of a new basic block).
654 virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
655 MachineBasicBlock::iterator MBBI) const {
656 return true;
657 }
658
659 /// Return true if it's profitable to predicate
660 /// instructions with accumulated instruction latency of "NumCycles"
661 /// of the specified basic block, where the probability of the instructions
662 /// being executed is given by Probability, and Confidence is a measure
663 /// of our confidence that it will be properly predicted.
664 virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
665 unsigned ExtraPredCycles,
666 BranchProbability Probability) const {
667 return false;
668 }
669
670 /// Second variant of isProfitableToIfCvt. This one
671 /// checks for the case where two basic blocks from true and false path
672 /// of a if-then-else (diamond) are predicated on mutally exclusive
673 /// predicates, where the probability of the true path being taken is given
674 /// by Probability, and Confidence is a measure of our confidence that it
675 /// will be properly predicted.
676 virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles,
677 unsigned ExtraTCycles,
678 MachineBasicBlock &FMBB, unsigned NumFCycles,
679 unsigned ExtraFCycles,
680 BranchProbability Probability) const {
681 return false;
682 }
683
684 /// Return true if it's profitable for if-converter to duplicate instructions
685 /// of specified accumulated instruction latencies in the specified MBB to
686 /// enable if-conversion.
687 /// The probability of the instructions being executed is given by
688 /// Probability, and Confidence is a measure of our confidence that it
689 /// will be properly predicted.
690 virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
691 unsigned NumCycles,
692 BranchProbability Probability) const {
693 return false;
694 }
695
696 /// Return true if it's profitable to unpredicate
697 /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
698 /// exclusive predicates.
699 /// e.g.
700 /// subeq r0, r1, #1
701 /// addne r0, r1, #1
702 /// =>
703 /// sub r0, r1, #1
704 /// addne r0, r1, #1
705 ///
706 /// This may be profitable is conditional instructions are always executed.
707 virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
708 MachineBasicBlock &FMBB) const {
709 return false;
710 }
711
712 /// Return true if it is possible to insert a select
713 /// instruction that chooses between TrueReg and FalseReg based on the
714 /// condition code in Cond.
715 ///
716 /// When successful, also return the latency in cycles from TrueReg,
717 /// FalseReg, and Cond to the destination register. In most cases, a select
718 /// instruction will be 1 cycle, so CondCycles = TrueCycles = FalseCycles = 1
719 ///
720 /// Some x86 implementations have 2-cycle cmov instructions.
721 ///
722 /// @param MBB Block where select instruction would be inserted.
723 /// @param Cond Condition returned by AnalyzeBranch.
724 /// @param TrueReg Virtual register to select when Cond is true.
725 /// @param FalseReg Virtual register to select when Cond is false.
726 /// @param CondCycles Latency from Cond+Branch to select output.
727 /// @param TrueCycles Latency from TrueReg to select output.
728 /// @param FalseCycles Latency from FalseReg to select output.
729 virtual bool canInsertSelect(const MachineBasicBlock &MBB,
730 ArrayRef Cond, unsigned TrueReg,
731 unsigned FalseReg, int &CondCycles,
732 int &TrueCycles, int &FalseCycles) const {
733 return false;
734 }
735
736 /// Insert a select instruction into MBB before I that will copy TrueReg to
737 /// DstReg when Cond is true, and FalseReg to DstReg when Cond is false.
738 ///
739 /// This function can only be called after canInsertSelect() returned true.
740 /// The condition in Cond comes from AnalyzeBranch, and it can be assumed
741 /// that the same flags or registers required by Cond are available at the
742 /// insertion point.
743 ///
744 /// @param MBB Block where select instruction should be inserted.
745 /// @param I Insertion point.
746 /// @param DL Source location for debugging.
747 /// @param DstReg Virtual register to be defined by select instruction.
748 /// @param Cond Condition as computed by AnalyzeBranch.
749 /// @param TrueReg Virtual register to copy when Cond is true.
750 /// @param FalseReg Virtual register to copy when Cons is false.
751 virtual void insertSelect(MachineBasicBlock &MBB,
752 MachineBasicBlock::iterator I, const DebugLoc &DL,
753 unsigned DstReg, ArrayRef Cond,
754 unsigned TrueReg, unsigned FalseReg) const {
755 llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
756 }
757
758 /// Analyze the given select instruction, returning true if
759 /// it cannot be understood. It is assumed that MI->isSelect() is true.
760 ///
761 /// When successful, return the controlling condition and the operands that
762 /// determine the true and false result values.
763 ///
764 /// Result = SELECT Cond, TrueOp, FalseOp
765 ///
766 /// Some targets can optimize select instructions, for example by predicating
767 /// the instruction defining one of the operands. Such targets should set
768 /// Optimizable.
769 ///
770 /// @param MI Select instruction to analyze.
771