llvm.org GIT mirror llvm / 49683f3
This patch adds a new NVPTX back-end to LLVM which supports code generation for NVIDIA PTX 3.0. This back-end will (eventually) replace the current PTX back-end, while maintaining compatibility with it. The new target machines are: nvptx (old ptx32) => 32-bit PTX nvptx64 (old ptx64) => 64-bit PTX The sources are based on the internal NVIDIA NVPTX back-end, and contain more functionality than the current PTX back-end currently provides. NV_CONTRIB git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156196 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Holewinski 8 years ago
86 changed file(s) with 25901 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
7777 Mips
7878 MBlaze
7979 MSP430
80 NVPTX
8081 PowerPC
8182 PTX
8283 Sparc
369369 hexagon-*) llvm_cv_target_arch="Hexagon" ;;
370370 mblaze-*) llvm_cv_target_arch="MBlaze" ;;
371371 ptx-*) llvm_cv_target_arch="PTX" ;;
372 nvptx-*) llvm_cv_target_arch="NVPTX" ;;
372373 *) llvm_cv_target_arch="Unknown" ;;
373374 esac])
374375
516517 Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
517518 MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
518519 PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
520 NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
519521 *) AC_SUBST(TARGET_HAS_JIT,0) ;;
520522 esac
521523 fi
627629 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
628630 [Build specific host targets: all or target1,target2,... Valid targets are:
629631 host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
630 xcore, msp430, ptx, and cpp (default=all)]),,
632 xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
631633 enableval=all)
632634 if test "$enableval" = host-only ; then
633635 enableval=host
634636 fi
635637 case "$enableval" in
636 all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
638 all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
637639 *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
638640 case "$a_target" in
639641 x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
650652 hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
651653 mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
652654 ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
655 nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
653656 host) case "$llvm_cv_target_arch" in
654657 x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
655658 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
663666 MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
664667 Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
665668 PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
669 NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
666670 *) AC_MSG_ERROR([Can not set target to build]) ;;
667671 esac ;;
668672 *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
14191419 --enable-targets Build specific host targets: all or
14201420 target1,target2,... Valid targets are: host, x86,
14211421 x86_64, sparc, powerpc, arm, mips, spu, hexagon,
1422 xcore, msp430, ptx, and cpp (default=all)
1422 xcore, msp430, ptx, nvptx, and cpp (default=all)
14231423 --enable-bindings Build specific language bindings:
14241424 all,auto,none,{binding-name} (default=auto)
14251425 --enable-libffi Check for the presence of libffi (default is NO)
39023902 hexagon-*) llvm_cv_target_arch="Hexagon" ;;
39033903 mblaze-*) llvm_cv_target_arch="MBlaze" ;;
39043904 ptx-*) llvm_cv_target_arch="PTX" ;;
3905 nvptx-*) llvm_cv_target_arch="NVPTX" ;;
39053906 *) llvm_cv_target_arch="Unknown" ;;
39063907 esac
39073908 fi
51255126 ;;
51265127 PTX) TARGET_HAS_JIT=0
51275128 ;;
5129 NVPTX) TARGET_HAS_JIT=0
5130 ;;
51285131 *) TARGET_HAS_JIT=0
51295132 ;;
51305133 esac
53095312 enableval=host
53105313 fi
53115314 case "$enableval" in
5312 all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
5315 all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
53135316 *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
53145317 case "$a_target" in
53155318 x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53265329 hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
53275330 mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
53285331 ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
5332 nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
53295333 host) case "$llvm_cv_target_arch" in
53305334 x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53315335 x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
53395343 MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
53405344 Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
53415345 PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
5346 NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
53425347 *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
53435348 echo "$as_me: error: Can not set target to build" >&2;}
53445349 { (exit 1); exit 1; }; } ;;
1040010405 lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
1040110406 lt_status=$lt_dlunknown
1040210407 cat > conftest.$ac_ext <
10403 #line 10404 "configure"
10408 #line 10409 "configure"
1040410409 #include "confdefs.h"
1040510410
1040610411 #if HAVE_DLFCN_H
6363 mblaze, // MBlaze: mblaze
6464 ptx32, // PTX: ptx (32-bit)
6565 ptx64, // PTX: ptx (64-bit)
66 nvptx, // NVPTX: 32-bit
67 nvptx64, // NVPTX: 64-bit
6668 le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
6769 amdil // amdil: amd IL
6870 };
440440 include "llvm/IntrinsicsXCore.td"
441441 include "llvm/IntrinsicsPTX.td"
442442 include "llvm/IntrinsicsHexagon.td"
443 include "llvm/IntrinsicsNVVM.td"
0 //===- IntrinsicsNVVM.td - Defines NVVM intrinsics ---------*- tablegen -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines all of the NVVM-specific intrinsics for use with NVPTX.
10 //
11 //===----------------------------------------------------------------------===//
12
13 def llvm_anyi64ptr_ty : LLVMAnyPointerType; // (space)i64*
14
15 //
16 // MISC
17 //
18
19 def int_nvvm_clz_i : GCCBuiltin<"__nvvm_clz_i">,
20 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
21 def int_nvvm_clz_ll : GCCBuiltin<"__nvvm_clz_ll">,
22 Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
23
24 def int_nvvm_popc_i : GCCBuiltin<"__nvvm_popc_i">,
25 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
26 def int_nvvm_popc_ll : GCCBuiltin<"__nvvm_popc_ll">,
27 Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
28
29 def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
30 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
31 [IntrNoMem, Commutative]>;
32
33 //
34 // Min Max
35 //
36
37 def int_nvvm_min_i : GCCBuiltin<"__nvvm_min_i">,
38 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
39 [IntrNoMem, Commutative]>;
40 def int_nvvm_min_ui : GCCBuiltin<"__nvvm_min_ui">,
41 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
42 [IntrNoMem, Commutative]>;
43
44 def int_nvvm_min_ll : GCCBuiltin<"__nvvm_min_ll">,
45 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
46 [IntrNoMem, Commutative]>;
47 def int_nvvm_min_ull : GCCBuiltin<"__nvvm_min_ull">,
48 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
49 [IntrNoMem, Commutative]>;
50
51 def int_nvvm_max_i : GCCBuiltin<"__nvvm_max_i">,
52 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
53 [IntrNoMem, Commutative]>;
54 def int_nvvm_max_ui : GCCBuiltin<"__nvvm_max_ui">,
55 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
56 [IntrNoMem, Commutative]>;
57
58 def int_nvvm_max_ll : GCCBuiltin<"__nvvm_max_ll">,
59 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
60 [IntrNoMem, Commutative]>;
61 def int_nvvm_max_ull : GCCBuiltin<"__nvvm_max_ull">,
62 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
63 [IntrNoMem, Commutative]>;
64
65 def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
66 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
67 [IntrNoMem, Commutative]>;
68 def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
69 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
70 [IntrNoMem, Commutative]>;
71
72 def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
73 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
74 , [IntrNoMem, Commutative]>;
75 def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
76 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
77 [IntrNoMem, Commutative]>;
78
79 def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
80 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
81 [IntrNoMem, Commutative]>;
82 def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
83 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
84 [IntrNoMem, Commutative]>;
85
86 //
87 // Multiplication
88 //
89
90 def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
91 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
92 [IntrNoMem, Commutative]>;
93 def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
94 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
95 [IntrNoMem, Commutative]>;
96
97 def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
98 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
99 [IntrNoMem, Commutative]>;
100 def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
101 Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
102 [IntrNoMem, Commutative]>;
103
104 def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
105 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
106 [IntrNoMem, Commutative]>;
107 def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
108 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
109 [IntrNoMem, Commutative]>;
110 def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
111 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
112 [IntrNoMem, Commutative]>;
113 def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
114 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
115 [IntrNoMem, Commutative]>;
116 def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
117 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
118 [IntrNoMem, Commutative]>;
119 def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
120 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
121 [IntrNoMem, Commutative]>;
122 def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
123 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
124 [IntrNoMem, Commutative]>;
125 def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
126 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
127 [IntrNoMem, Commutative]>;
128
129 def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
130 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
131 [IntrNoMem, Commutative]>;
132 def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
133 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
134 [IntrNoMem, Commutative]>;
135 def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
136 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
137 [IntrNoMem, Commutative]>;
138 def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
139 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
140 [IntrNoMem, Commutative]>;
141
142 def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
143 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
144 [IntrNoMem, Commutative]>;
145 def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
146 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
147 [IntrNoMem, Commutative]>;
148
149 //
150 // Div
151 //
152
153 def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
154 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
155 [IntrNoMem, Commutative]>;
156 def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
157 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
158 [IntrNoMem, Commutative]>;
159
160 def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
161 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
162 [IntrNoMem, Commutative]>;
163 def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
164 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
165 [IntrNoMem, Commutative]>;
166
167 def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
168 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
169 [IntrNoMem, Commutative]>;
170 def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
171 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
172 [IntrNoMem, Commutative]>;
173
174 def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
175 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
176 [IntrNoMem, Commutative]>;
177 def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
178 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
179 [IntrNoMem, Commutative]>;
180
181 def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
182 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
183 [IntrNoMem, Commutative]>;
184 def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
185 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
186 [IntrNoMem, Commutative]>;
187
188 def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
189 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
190 [IntrNoMem, Commutative]>;
191 def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
192 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
193 [IntrNoMem, Commutative]>;
194 def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
195 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
196 [IntrNoMem, Commutative]>;
197 def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
198 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
199 [IntrNoMem, Commutative]>;
200
201 //
202 // Brev
203 //
204
205 def int_nvvm_brev32 : GCCBuiltin<"__nvvm_brev32">,
206 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
207 def int_nvvm_brev64 : GCCBuiltin<"__nvvm_brev64">,
208 Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
209
210 //
211 // Sad
212 //
213
214 def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
215 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
216 [IntrNoMem, Commutative]>;
217 def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
218 Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
219 [IntrNoMem, Commutative]>;
220
221 //
222 // Floor Ceil
223 //
224
225 def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
226 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
227 def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
228 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
229 def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
230 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
231
232 def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
233 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
234 def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
235 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
236 def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
237 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
238
239 //
240 // Abs
241 //
242
243 def int_nvvm_abs_i : GCCBuiltin<"__nvvm_abs_i">,
244 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
245 def int_nvvm_abs_ll : GCCBuiltin<"__nvvm_abs_ll">,
246 Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem]>;
247
248 def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
249 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
250 def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
251 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
252
253 def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
254 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
255
256 //
257 // Round
258 //
259
260 def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
261 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
262 def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
263 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
264
265 def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
266 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
267
268 //
269 // Trunc
270 //
271
272 def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
273 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
274 def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
275 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
276
277 def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
278 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
279
280 //
281 // Saturate
282 //
283
284 def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
285 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
286 def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
287 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
288
289 def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
290 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
291
292 //
293 // Exp2 Log2
294 //
295
296 def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
297 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
298 def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
299 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
300 def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
301 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
302
303 def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
304 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
305 def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
306 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
307 def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
308 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
309
310 //
311 // Sin Cos
312 //
313
314 def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
315 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
316 def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
317 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
318
319 def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
320 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
321 def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
322 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
323
324 //
325 // Fma
326 //
327
328 def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
329 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
330 [IntrNoMem, Commutative]>;
331 def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
332 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
333 [IntrNoMem, Commutative]>;
334 def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
335 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
336 [IntrNoMem, Commutative]>;
337 def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
338 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
339 [IntrNoMem, Commutative]>;
340 def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
341 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
342 [IntrNoMem, Commutative]>;
343 def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
344 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
345 [IntrNoMem, Commutative]>;
346 def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
347 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
348 [IntrNoMem, Commutative]>;
349 def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
350 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
351 [IntrNoMem, Commutative]>;
352
353 def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
354 Intrinsic<[llvm_double_ty],
355 [llvm_double_ty, llvm_double_ty, llvm_double_ty],
356 [IntrNoMem, Commutative]>;
357 def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
358 Intrinsic<[llvm_double_ty],
359 [llvm_double_ty, llvm_double_ty, llvm_double_ty],
360 [IntrNoMem, Commutative]>;
361 def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
362 Intrinsic<[llvm_double_ty],
363 [llvm_double_ty, llvm_double_ty, llvm_double_ty],
364 [IntrNoMem, Commutative]>;
365 def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
366 Intrinsic<[llvm_double_ty],
367 [llvm_double_ty, llvm_double_ty, llvm_double_ty],
368 [IntrNoMem, Commutative]>;
369
370 //
371 // Rcp
372 //
373
374 def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
375 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
376 def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
377 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
378 def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
379 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
380 def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
381 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
382 def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
383 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
384 def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
385 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
386 def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
387 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
388 def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
389 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
390
391 def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
392 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
393 def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
394 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
395 def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
396 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
397 def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
398 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
399
400 def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
401 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
402
403 //
404 // Sqrt
405 //
406
407 def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
408 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
409 def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
410 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
411 def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
412 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
413 def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
414 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
415 def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
416 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
417 def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
418 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
419 def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
420 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
421 def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
422 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
423 def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
424 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
425 def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
426 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
427
428 def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
429 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
430 def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
431 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
432 def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
433 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
434 def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
435 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
436
437 //
438 // Rsqrt
439 //
440
441 def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
442 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
443 def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
444 Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
445 def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
446 Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
447
448 //
449 // Add
450 //
451
452 def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
453 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
454 [IntrNoMem, Commutative]>;
455 def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
456 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
457 [IntrNoMem, Commutative]>;
458 def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
459 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
460 [IntrNoMem, Commutative]>;
461 def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
462 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
463 [IntrNoMem, Commutative]>;
464 def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
465 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
466 [IntrNoMem, Commutative]>;
467 def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
468 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
469 [IntrNoMem, Commutative]>;
470 def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
471 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
472 [IntrNoMem, Commutative]>;
473 def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
474 Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
475 [IntrNoMem, Commutative]>;
476
477 def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
478 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
479 [IntrNoMem, Commutative]>;
480 def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
481 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
482 [IntrNoMem, Commutative]>;
483 def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
484 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
485 [IntrNoMem, Commutative]>;
486 def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
487 Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
488 [IntrNoMem, Commutative]>;
489
490 //
491 // Convert
492 //
493
494 def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
495 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
496 def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
497 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
498 def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
499 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
500 def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
501 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
502 def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
503 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
504 def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
505 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
506 def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
507 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
508 def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
509 Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
510
511 def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
512 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
513 def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
514 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
515 def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
516 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
517 def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
518 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
519
520 def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
521 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
522 def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
523 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
524 def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
525 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
526 def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
527 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
528
529 def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
530 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
531 def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
532 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
533 def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
534 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
535 def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
536 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
537
538 def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
539 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
540 def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
541 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
542 def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
543 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
544 def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
545 Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
546
547 def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
548 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
549 def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
550 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
551 def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
552 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
553 def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
554 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
555 def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
556 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
557 def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
558 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
559 def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
560 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
561 def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
562 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
563
564 def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
565 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
566 def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
567 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
568 def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
569 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
570 def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
571 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
572 def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
573 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
574 def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
575 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
576 def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
577 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
578 def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
579 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
580
581 def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
582 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
583 def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
584 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
585 def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
586 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
587 def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
588 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
589
590 def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
591 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
592 def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
593 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
594 def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
595 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
596 def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
597 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
598
599 def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
600 Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
601 [IntrNoMem, Commutative]>;
602
603 def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
604 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
605 def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
606 Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
607
608 def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
609 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
610 def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
611 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
612 def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
613 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
614 def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
615 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
616 def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
617 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
618 def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
619 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
620 def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
621 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
622 def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
623 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
624
625 def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
626 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
627 def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
628 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
629 def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
630 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
631 def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
632 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
633 def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
634 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
635 def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
636 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
637 def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
638 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
639 def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
640 Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
641
642 def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
643 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
644 def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
645 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
646 def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
647 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
648 def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
649 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
650
651 def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
652 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
653 def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
654 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
655 def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
656 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
657 def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
658 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
659
660 def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
661 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
662 def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
663 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
664 def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
665 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
666 def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
667 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
668 def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
669 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
670 def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
671 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
672 def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
673 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
674 def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
675 Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
676
677 def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
678 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
679 def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
680 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
681 def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
682 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
683 def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
684 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
685 def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
686 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
687 def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
688 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
689 def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
690 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
691 def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
692 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
693
694 def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
695 Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
696 def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
697 Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
698
699 def int_nvvm_h2f : GCCBuiltin<"__nvvm_h2f">,
700 Intrinsic<[llvm_float_ty], [llvm_i16_ty], [IntrNoMem]>;
701
702 //
703 // Bitcast
704 //
705
706 def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
707 Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
708 def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
709 Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
710
711 def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
712 Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
713 def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
714 Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
715
716
717 // Atomic not available as an llvm intrinsic.
718 def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
719 [LLVMAnyPointerType, llvm_float_ty],
720 [IntrReadWriteArgMem, NoCapture<0>]>;
721 def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
722 [LLVMAnyPointerType, llvm_i32_ty],
723 [IntrReadWriteArgMem, NoCapture<0>]>;
724 def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
725 [LLVMAnyPointerType, llvm_i32_ty],
726 [IntrReadWriteArgMem, NoCapture<0>]>;
727
728 // Bar.Sync
729 def int_cuda_syncthreads : GCCBuiltin<"__syncthreads">,
730 Intrinsic<[], [], []>;
731 def int_nvvm_barrier0 : GCCBuiltin<"__nvvm_bar0">,
732 Intrinsic<[], [], []>;
733 def int_nvvm_barrier0_popc : GCCBuiltin<"__nvvm_bar0_popc">,
734 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
735 def int_nvvm_barrier0_and : GCCBuiltin<"__nvvm_bar0_and">,
736 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
737 def int_nvvm_barrier0_or : GCCBuiltin<"__nvvm_bar0_or">,
738 Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
739
740 // Membar
741 def int_nvvm_membar_cta : GCCBuiltin<"__nvvm_membar_cta">,
742 Intrinsic<[], [], []>;
743 def int_nvvm_membar_gl : GCCBuiltin<"__nvvm_membar_gl">,
744 Intrinsic<[], [], []>;
745 def int_nvvm_membar_sys : GCCBuiltin<"__nvvm_membar_sys">,
746 Intrinsic<[], [], []>;
747
748
749 // Accessing special registers
750 def int_nvvm_read_ptx_sreg_tid_x :
751 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
752 GCCBuiltin<"__nvvm_read_ptx_sreg_tid_x">;
753 def int_nvvm_read_ptx_sreg_tid_y :
754 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
755 GCCBuiltin<"__nvvm_read_ptx_sreg_tid_y">;
756 def int_nvvm_read_ptx_sreg_tid_z :
757 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
758 GCCBuiltin<"__nvvm_read_ptx_sreg_tid_z">;
759
760 def int_nvvm_read_ptx_sreg_ntid_x :
761 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
762 GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_x">;
763 def int_nvvm_read_ptx_sreg_ntid_y :
764 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
765 GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_y">;
766 def int_nvvm_read_ptx_sreg_ntid_z :
767 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
768 GCCBuiltin<"__nvvm_read_ptx_sreg_ntid_z">;
769
770 def int_nvvm_read_ptx_sreg_ctaid_x :
771 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
772 GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_x">;
773 def int_nvvm_read_ptx_sreg_ctaid_y :
774 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
775 GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_y">;
776 def int_nvvm_read_ptx_sreg_ctaid_z :
777 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
778 GCCBuiltin<"__nvvm_read_ptx_sreg_ctaid_z">;
779
780 def int_nvvm_read_ptx_sreg_nctaid_x :
781 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
782 GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_x">;
783 def int_nvvm_read_ptx_sreg_nctaid_y :
784 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
785 GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_y">;
786 def int_nvvm_read_ptx_sreg_nctaid_z :
787 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
788 GCCBuiltin<"__nvvm_read_ptx_sreg_nctaid_z">;
789
790 def int_nvvm_read_ptx_sreg_warpsize :
791 Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
792 GCCBuiltin<"__nvvm_read_ptx_sreg_warpsize">;
793
794
795 // Generated within nvvm. Use for ldu on sm_20 or later
796 // @TODO: Revisit this, Changed LLVMAnyPointerType to LLVMPointerType
797 def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
798 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
799 "llvm.nvvm.ldu.global.i">;
800 def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
801 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
802 "llvm.nvvm.ldu.global.f">;
803 def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
804 [LLVMPointerType>], [IntrReadMem, NoCapture<0>],
805 "llvm.nvvm.ldu.global.p">;
806
807
808 // Use for generic pointers
809 // - These intrinsics are used to convert address spaces.
810 // - The input pointer and output pointer must have the same type, except for
811 // the address-space. (This restriction is not enforced here as there is
812 // currently no way to describe it).
813 // - This complements the llvm bitcast, which can be used to cast one type
814 // of pointer to another type of pointer, while the address space remains
815 // the same.
816 def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
817 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
818 "llvm.nvvm.ptr.local.to.gen">;
819 def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
820 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
821 "llvm.nvvm.ptr.shared.to.gen">;
822 def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
823 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
824 "llvm.nvvm.ptr.global.to.gen">;
825 def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
826 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
827 "llvm.nvvm.ptr.constant.to.gen">;
828
829 def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
830 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
831 "llvm.nvvm.ptr.gen.to.global">;
832 def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
833 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
834 "llvm.nvvm.ptr.gen.to.shared">;
835 def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
836 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
837 "llvm.nvvm.ptr.gen.to.local">;
838 def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
839 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>],
840 "llvm.nvvm.ptr.gen.to.constant">;
841
842 // Used in nvvm internally to help address space opt and ptx code generation
843 // This is for params that are passed to kernel functions by pointer by-val.
844 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
845 [llvm_anyptr_ty],
846 [IntrNoMem, NoCapture<0>],
847 "llvm.nvvm.ptr.gen.to.param">;
848
849 // Move intrinsics, used in nvvm internally
850
851 def int_nvvm_move_i8 : Intrinsic<[llvm_i8_ty], [llvm_i8_ty], [IntrNoMem],
852 "llvm.nvvm.move.i8">;
853 def int_nvvm_move_i16 : Intrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem],
854 "llvm.nvvm.move.i16">;
855 def int_nvvm_move_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem],
856 "llvm.nvvm.move.i32">;
857 def int_nvvm_move_i64 : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem],
858 "llvm.nvvm.move.i64">;
859 def int_nvvm_move_float : Intrinsic<[llvm_float_ty], [llvm_float_ty],
860 [IntrNoMem], "llvm.nvvm.move.float">;
861 def int_nvvm_move_double : Intrinsic<[llvm_double_ty], [llvm_double_ty],
862 [IntrNoMem], "llvm.nvvm.move.double">;
863 def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
864 [IntrNoMem, NoCapture<0>], "llvm.nvvm.move.ptr">;
865
866
867 /// Error / Warn
868 def int_nvvm_compiler_error :
869 Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
870 def int_nvvm_compiler_warn :
871 Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
3939 case mblaze: return "mblaze";
4040 case ptx32: return "ptx32";
4141 case ptx64: return "ptx64";
42 case nvptx: return "nvptx";
43 case nvptx64: return "nvptx64";
4244 case le32: return "le32";
4345 case amdil: return "amdil";
4446 }
7577
7678 case ptx32: return "ptx";
7779 case ptx64: return "ptx";
80 case nvptx: return "nvptx";
81 case nvptx64: return "nvptx";
7882 case le32: return "le32";
7983 case amdil: return "amdil";
8084 }
161165 .Case("xcore", xcore)
162166 .Case("ptx32", ptx32)
163167 .Case("ptx64", ptx64)
168 .Case("nvptx", nvptx)
169 .Case("nvptx64", nvptx64)
164170 .Case("le32", le32)
165171 .Case("amdil", amdil)
166172 .Default(UnknownArch);
193199 .Case("r600", Triple::r600)
194200 .Case("ptx32", Triple::ptx32)
195201 .Case("ptx64", Triple::ptx64)
202 .Case("nvptx", Triple::nvptx)
203 .Case("nvptx64", Triple::nvptx64)
196204 .Case("amdil", Triple::amdil)
197205 .Default(Triple::UnknownArch);
198206 }
216224 .Case("r600", "r600")
217225 .Case("ptx32", "ptx32")
218226 .Case("ptx64", "ptx64")
227 .Case("nvptx", "nvptx")
228 .Case("nvptx64", "nvptx64")
219229 .Case("le32", "le32")
220230 .Case("amdil", "amdil")
221231 .Default(NULL);
250260 .Case("xcore", Triple::xcore)
251261 .Case("ptx32", Triple::ptx32)
252262 .Case("ptx64", Triple::ptx64)
263 .Case("nvptx", Triple::nvptx)
264 .Case("nvptx64", Triple::nvptx64)
253265 .Case("le32", Triple::le32)
254266 .Case("amdil", Triple::amdil)
255267 .Default(Triple::UnknownArch);
651663 case llvm::Triple::mblaze:
652664 case llvm::Triple::mips:
653665 case llvm::Triple::mipsel:
666 case llvm::Triple::nvptx:
654667 case llvm::Triple::ppc:
655668 case llvm::Triple::ptx32:
656669 case llvm::Triple::r600:
663676
664677 case llvm::Triple::mips64:
665678 case llvm::Triple::mips64el:
679 case llvm::Triple::nvptx64:
666680 case llvm::Triple::ppc64:
667681 case llvm::Triple::ptx64:
668682 case llvm::Triple::sparcv9:
700714 case Triple::mblaze:
701715 case Triple::mips:
702716 case Triple::mipsel:
717 case Triple::nvptx:
703718 case Triple::ppc:
704719 case Triple::ptx32:
705720 case Triple::r600:
713728
714729 case Triple::mips64: T.setArch(Triple::mips); break;
715730 case Triple::mips64el: T.setArch(Triple::mipsel); break;
731 case Triple::nvptx64: T.setArch(Triple::nvptx); break;
716732 case Triple::ppc64: T.setArch(Triple::ppc); break;
717733 case Triple::ptx64: T.setArch(Triple::ptx32); break;
718734 case Triple::sparcv9: T.setArch(Triple::sparc); break;
741757
742758 case Triple::mips64:
743759 case Triple::mips64el:
760 case Triple::nvptx64:
744761 case Triple::ppc64:
745762 case Triple::ptx64:
746763 case Triple::sparcv9:
750767
751768 case Triple::mips: T.setArch(Triple::mips64); break;
752769 case Triple::mipsel: T.setArch(Triple::mips64el); break;
770 case Triple::nvptx: T.setArch(Triple::nvptx64); break;
753771 case Triple::ppc: T.setArch(Triple::ppc64); break;
754772 case Triple::ptx32: T.setArch(Triple::ptx64); break;
755773 case Triple::sparc: T.setArch(Triple::sparcv9); break;
1515 ;===------------------------------------------------------------------------===;
1616
1717 [common]
18 subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
18 subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
1919
2020 ; This is a special group whose required libraries are extended (by llvm-build)
2121 ; with the best execution engine (the native JIT, if available, or the
0 set(LLVM_TARGET_DEFINITIONS NVPTX.td)
1
2
3 tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
4 tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
5 tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
6 tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
7 tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
8 add_public_tablegen_target(NVPTXCommonTableGen)
9
10 set(NVPTXCodeGen_sources
11 NVPTXFrameLowering.cpp
12 NVPTXInstrInfo.cpp
13 NVPTXISelDAGToDAG.cpp
14 NVPTXISelLowering.cpp
15 NVPTXRegisterInfo.cpp
16 NVPTXSubtarget.cpp
17 NVPTXTargetMachine.cpp
18 NVPTXSplitBBatBar.cpp
19 NVPTXLowerAggrCopies.cpp
20 NVPTXutil.cpp
21 NVPTXAllocaHoisting.cpp
22 NVPTXAsmPrinter.cpp
23 NVPTXUtilities.cpp
24 VectorElementize.cpp
25 )
26
27 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
28
29
30 add_subdirectory(TargetInfo)
31 add_subdirectory(InstPrinter)
32 add_subdirectory(MCTargetDesc)
0 include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
1
2 add_llvm_library(LLVMNVPTXAsmPrinter
3 NVPTXInstPrinter.cpp
4 )
5
6 add_dependencies(LLVMNVPTXAsmPrinter NVPTXCommonTableGen)
0 ;===- ./lib/Target/NVPTX/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
1 ;
2 ; The LLVM Compiler Infrastructure
3 ;
4 ; This file is distributed under the University of Illinois Open Source
5 ; License. See LICENSE.TXT for details.
6 ;
7 ;===------------------------------------------------------------------------===;
8 ;
9 ; This is an LLVMBuild description file for the components in this subdirectory.
10 ;
11 ; For more information on the LLVMBuild system, please see:
12 ;
13 ; http://llvm.org/docs/LLVMBuild.html
14 ;
15 ;===------------------------------------------------------------------------===;
16
17 [component_0]
18 type = Library
19 name = NVPTXAsmPrinter
20 parent = NVPTX
21 required_libraries = MC Support
22 add_to_library_groups = NVPTX
0 ##===- lib/Target/NVPTX/AsmPrinter/Makefile ----------------*- Makefile -*-===##
1 #
2 # The LLVM Compiler Infrastructure
3 #
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 #
7 ##===----------------------------------------------------------------------===##
8 LEVEL = ../../../..
9 LIBRARYNAME = LLVMNVPTXAsmPrinter
10
11 # Hack: we need to include 'main' ptx target directory to grab private headers
12 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
13
14 include $(LEVEL)/Makefile.common
0 ;===- ./lib/Target/NVPTX/LLVMBuild.txt -------------------------*- Conf -*--===;
1 ;
2 ; The LLVM Compiler Infrastructure
3 ;
4 ; This file is distributed under the University of Illinois Open Source
5 ; License. See LICENSE.TXT for details.
6 ;
7 ;===------------------------------------------------------------------------===;
8 ;
9 ; This is an LLVMBuild description file for the components in this subdirectory.
10 ;
11 ; For more information on the LLVMBuild system, please see:
12 ;
13 ; http://llvm.org/docs/LLVMBuild.html
14 ;
15 ;===------------------------------------------------------------------------===;
16
17 [common]
18 subdirectories = InstPrinter MCTargetDesc TargetInfo
19
20 [component_0]
21 type = TargetGroup
22 name = NVPTX
23 parent = Target
24 has_asmprinter = 1
25
26 [component_1]
27 type = Library
28 name = NVPTXCodeGen
29 parent = NVPTX
30 required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXDesc NVPTXInfo SelectionDAG Support Target TransformUtils
31 add_to_library_groups = NVPTX
0 add_llvm_library(LLVMNVPTXDesc
1 NVPTXMCAsmInfo.cpp
2 NVPTXMCTargetDesc.cpp
3 )
4
5 add_dependencies(LLVMNVPTXDesc NVPTXCommonTableGen)
6
7 # Hack: we need to include 'main' target directory to grab private headers
8 #include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
0 ;===- ./lib/Target/NVPTX/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
1 ;
2 ; The LLVM Compiler Infrastructure
3 ;
4 ; This file is distributed under the University of Illinois Open Source
5 ; License. See LICENSE.TXT for details.
6 ;
7 ;===------------------------------------------------------------------------===;
8 ;
9 ; This is an LLVMBuild description file for the components in this subdirectory.
10 ;
11 ; For more information on the LLVMBuild system, please see:
12 ;
13 ; http://llvm.org/docs/LLVMBuild.html
14 ;
15 ;===------------------------------------------------------------------------===;
16
17 [component_0]
18 type = Library
19 name = NVPTXDesc
20 parent = NVPTX
21 required_libraries = MC NVPTXAsmPrinter NVPTXInfo Support
22 add_to_library_groups = NVPTX
0 ##===- lib/Target/NVPTX/TargetDesc/Makefile ----------------*- Makefile -*-===##
1 #
2 # The LLVM Compiler Infrastructure
3 #
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 #
7 ##===----------------------------------------------------------------------===##
8
9 LEVEL = ../../../..
10 LIBRARYNAME = LLVMNVPTXDesc
11
12 # Hack: we need to include 'main' target directory to grab private headers
13 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
14
15 include $(LEVEL)/Makefile.common
0 //===-- NVPTXBaseInfo.h - Top-level definitions for NVPTX -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains small standalone helper functions and enum definitions for
10 // the NVPTX target useful for the compiler back-end and the MC libraries.
11 // As such, it deliberately does not include references to LLVM core
12 // code gen types, passes, etc..
13 //
14 //===----------------------------------------------------------------------===//
15
16 #ifndef NVPTXBASEINFO_H
17 #define NVPTXBASEINFO_H
18
19 namespace llvm {
20
21 enum AddressSpace {
22 ADDRESS_SPACE_GENERIC = 0,
23 ADDRESS_SPACE_GLOBAL = 1,
24 ADDRESS_SPACE_CONST_NOT_GEN = 2, // Not part of generic space
25 ADDRESS_SPACE_SHARED = 3,
26 ADDRESS_SPACE_CONST = 4,
27 ADDRESS_SPACE_LOCAL = 5,
28
29 // NVVM Internal
30 ADDRESS_SPACE_PARAM = 101
31 };
32
33 enum PropertyAnnotation {
34 PROPERTY_MAXNTID_X = 0,
35 PROPERTY_MAXNTID_Y,
36 PROPERTY_MAXNTID_Z,
37 PROPERTY_REQNTID_X,
38 PROPERTY_REQNTID_Y,
39 PROPERTY_REQNTID_Z,
40 PROPERTY_MINNCTAPERSM,
41 PROPERTY_ISTEXTURE,
42 PROPERTY_ISSURFACE,
43 PROPERTY_ISSAMPLER,
44 PROPERTY_ISREADONLY_IMAGE_PARAM,
45 PROPERTY_ISWRITEONLY_IMAGE_PARAM,
46 PROPERTY_ISKERNEL_FUNCTION,
47 PROPERTY_ALIGN,
48
49 // last property
50 PROPERTY_LAST
51 };
52
53 const unsigned AnnotationNameLen = 8; // length of each annotation name
54 const char
55 PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
56 "maxntidx", // PROPERTY_MAXNTID_X
57 "maxntidy", // PROPERTY_MAXNTID_Y
58 "maxntidz", // PROPERTY_MAXNTID_Z
59 "reqntidx", // PROPERTY_REQNTID_X
60 "reqntidy", // PROPERTY_REQNTID_Y
61 "reqntidz", // PROPERTY_REQNTID_Z
62 "minctasm", // PROPERTY_MINNCTAPERSM
63 "texture", // PROPERTY_ISTEXTURE
64 "surface", // PROPERTY_ISSURFACE
65 "sampler", // PROPERTY_ISSAMPLER
66 "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
67 "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
68 "kernel", // PROPERTY_ISKERNEL_FUNCTION
69 "align", // PROPERTY_ALIGN
70
71 // last property
72 "proplast", // PROPERTY_LAST
73 };
74
75 // name of named metadata used for global annotations
76 #if defined(__GNUC__)
77 // As this is declared to be static but some of the .cpp files that
78 // include NVVM.h do not use this array, gcc gives a warning when
79 // compiling those .cpp files, hence __attribute__((unused)).
80 __attribute__((unused))
81 #endif
82 static const char* NamedMDForAnnotations = "nvvm.annotations";
83
84 }
85
86
87 #endif
0 //===-- NVPTXMCAsmInfo.cpp - NVPTX asm properties -------------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declarations of the NVPTXMCAsmInfo properties.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "NVPTXMCAsmInfo.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/Support/CommandLine.h"
16
17 using namespace llvm;
18
19 bool CompileForDebugging;
20
21 // -debug-compile - Command line option to inform opt and llc passes to
22 // compile for debugging
23 static cl::opt
24 Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
25 cl::location(CompileForDebugging),
26 cl::init(false));
27
28 void NVPTXMCAsmInfo::anchor() { }
29
30 NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
31 Triple TheTriple(TT);
32 if (TheTriple.getArch() == Triple::nvptx64)
33 PointerSize = 8;
34
35 CommentString = "//";
36
37 PrivateGlobalPrefix = "$L__";
38
39 AllowPeriodsInName = false;
40
41 HasSetDirective = false;
42
43 HasSingleParameterDotFile = false;
44
45 InlineAsmStart = " inline asm";
46 InlineAsmEnd = " inline asm";
47
48 SupportsDebugInformation = CompileForDebugging;
49 HasDotTypeDotSizeDirective = false;
50
51 Data8bitsDirective = " .b8 ";
52 Data16bitsDirective = " .b16 ";
53 Data32bitsDirective = " .b32 ";
54 Data64bitsDirective = " .b64 ";
55 PrivateGlobalPrefix = "";
56 ZeroDirective = " .b8";
57 AsciiDirective = " .b8";
58 AscizDirective = " .b8";
59
60 // @TODO: Can we just disable this?
61 GlobalDirective = "\t// .globl\t";
62 }
0 //===-- NVPTXMCAsmInfo.h - NVPTX asm properties ----------------*- C++ -*--===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declaration of the NVPTXMCAsmInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef NVPTX_MCASM_INFO_H
14 #define NVPTX_MCASM_INFO_H
15
16 #include "llvm/MC/MCAsmInfo.h"
17
18 namespace llvm {
19 class Target;
20 class StringRef;
21
22 class NVPTXMCAsmInfo : public MCAsmInfo {
23 virtual void anchor();
24 public:
25 explicit NVPTXMCAsmInfo(const Target &T, const StringRef &TT);
26 };
27 } // namespace llvm
28
29 #endif // NVPTX_MCASM_INFO_H
0 //===-- NVPTXMCTargetDesc.cpp - NVPTX Target Descriptions -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides NVPTX specific target descriptions.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "NVPTXMCTargetDesc.h"
14 #include "NVPTXMCAsmInfo.h"
15 #include "llvm/MC/MCCodeGenInfo.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCRegisterInfo.h"
18 #include "llvm/MC/MCSubtargetInfo.h"
19 #include "llvm/Support/TargetRegistry.h"
20
21 #define GET_INSTRINFO_MC_DESC
22 #include "NVPTXGenInstrInfo.inc"
23
24 #define GET_SUBTARGETINFO_MC_DESC
25 #include "NVPTXGenSubtargetInfo.inc"
26
27 #define GET_REGINFO_MC_DESC
28 #include "NVPTXGenRegisterInfo.inc"
29
30
31 using namespace llvm;
32
33 static MCInstrInfo *createNVPTXMCInstrInfo() {
34 MCInstrInfo *X = new MCInstrInfo();
35 InitNVPTXMCInstrInfo(X);
36 return X;
37 }
38
39 static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
40 MCRegisterInfo *X = new MCRegisterInfo();
41 // PTX does not have a return address register.
42 InitNVPTXMCRegisterInfo(X, 0);
43 return X;
44 }
45
46 static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
47 StringRef FS) {
48 MCSubtargetInfo *X = new MCSubtargetInfo();
49 InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
50 return X;
51 }
52
53 static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
54 CodeModel::Model CM,
55 CodeGenOpt::Level OL) {
56 MCCodeGenInfo *X = new MCCodeGenInfo();
57 X->InitMCCodeGenInfo(RM, CM, OL);
58 return X;
59 }
60
61
62 // Force static initialization.
63 extern "C" void LLVMInitializeNVPTXTargetMC() {
64 // Register the MC asm info.
65 RegisterMCAsmInfo X(TheNVPTXTarget32);
66 RegisterMCAsmInfo Y(TheNVPTXTarget64);
67
68 // Register the MC codegen info.
69 TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
70 createNVPTXMCCodeGenInfo);
71 TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
72 createNVPTXMCCodeGenInfo);
73
74 // Register the MC instruction info.
75 TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
76 TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
77
78 // Register the MC register info.
79 TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
80 createNVPTXMCRegisterInfo);
81 TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
82 createNVPTXMCRegisterInfo);
83
84 // Register the MC subtarget info.
85 TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
86 createNVPTXMCSubtargetInfo);
87 TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
88 createNVPTXMCSubtargetInfo);
89
90 }
0 //===-- NVPTXMCTargetDesc.h - NVPTX Target Descriptions ---------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides NVPTX specific target descriptions.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef NVPTXMCTARGETDESC_H
14 #define NVPTXMCTARGETDESC_H
15
16 namespace llvm {
17 class Target;
18
19 extern Target TheNVPTXTarget32;
20 extern Target TheNVPTXTarget64;
21
22 } // End llvm namespace
23
24 // Defines symbolic names for PTX registers.
25 #define GET_REGINFO_ENUM
26 #include "NVPTXGenRegisterInfo.inc"
27
28 // Defines symbolic names for the PTX instructions.
29 #define GET_INSTRINFO_ENUM
30 #include "NVPTXGenInstrInfo.inc"
31
32 #define GET_SUBTARGETINFO_ENUM
33 #include "NVPTXGenSubtargetInfo.inc"
34
35 #endif
0 ##===- lib/Target/NVPTX/Makefile ---------------------------*- Makefile -*-===##
1 #
2 # The LLVM Compiler Infrastructure
3 #
4 # This file is distributed under the University of Illinois Open Source
5 # License. See LICENSE.TXT for details.
6 #
7 ##===----------------------------------------------------------------------===##
8
9 LEVEL = ../../..
10 LIBRARYNAME = LLVMNVPTXCodeGen
11 TARGET = NVPTX
12
13 # Make sure that tblgen is run, first thing.
14 BUILT_SOURCES = NVPTXGenAsmWriter.inc \
15 NVPTXGenDAGISel.inc \
16 NVPTXGenInstrInfo.inc \
17 NVPTXGenRegisterInfo.inc \
18 NVPTXGenSubtargetInfo.inc
19
20 DIRS = InstPrinter TargetInfo MCTargetDesc
21
22 include $(LEVEL)/Makefile.common
0 //===-- ManagedStringPool.h - Managed String Pool ---------------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The strings allocated from a managed string pool are owned by the string
10 // pool and will be deleted together with the managed string pool.
11 //
12 //===----------------------------------------------------------------------===//
13
14
15 #ifndef LLVM_SUPPORT_MANAGED_STRING_H
16 #define LLVM_SUPPORT_MANAGED_STRING_H
17
18 #include "llvm/ADT/SmallVector.h"
19 #include
20
21 namespace llvm {
22
23 /// ManagedStringPool - The strings allocated from a managed string pool are
24 /// owned by the string pool and will be deleted together with the managed
25 /// string pool.
26 class ManagedStringPool {
27 SmallVector Pool;
28
29 public:
30 ManagedStringPool() {}
31 ~ManagedStringPool() {
32 SmallVector::iterator Current = Pool.begin();
33 while (Current != Pool.end()) {
34 delete *Current;
35 Current++;
36 }
37 }
38
39 std::string *getManagedString(const char *S) {
40 std::string *Str = new std::string(S);
41 Pool.push_back(Str);
42 return Str;
43 }
44 };
45
46 }
47
48 #endif
0 //===-- NVPTX.h - Top-level interface for NVPTX representation --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the entry points for global functions defined in
10 // the LLVM NVPTX back-end.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_TARGET_NVPTX_H
15 #define LLVM_TARGET_NVPTX_H
16
17 #include
18 #include
19 #include "llvm/Value.h"
20 #include "llvm/Module.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Target/TargetMachine.h"
23 #include "MCTargetDesc/NVPTXBaseInfo.h"
24
25 namespace llvm {
26 class NVPTXTargetMachine;
27 class FunctionPass;
28 class formatted_raw_ostream;
29
30 namespace NVPTXCC {
31 enum CondCodes {
32 EQ,
33 NE,
34 LT,
35 LE,
36 GT,
37 GE
38 };
39 }
40
41 inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
42 switch (CC) {
43 default: assert(0 && "Unknown condition code");
44 case NVPTXCC::NE: return "ne";
45 case NVPTXCC::EQ: return "eq";
46 case NVPTXCC::LT: return "lt";
47 case NVPTXCC::LE: return "le";
48 case NVPTXCC::GT: return "gt";
49 case NVPTXCC::GE: return "ge";
50 }
51 }
52
53 FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
54 llvm::CodeGenOpt::Level OptLevel);
55 FunctionPass *createVectorElementizePass(NVPTXTargetMachine &);
56 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
57 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
58 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
59
60 bool isImageOrSamplerVal(const Value *, const Module *);
61
62 extern Target TheNVPTXTarget32;
63 extern Target TheNVPTXTarget64;
64
65 namespace NVPTX
66 {
67 enum DrvInterface {
68 NVCL,
69 CUDA,
70 TEST
71 };
72
73 // A field inside TSFlags needs a shift and a mask. The usage is
74 // always as follows :
75 // ((TSFlags & fieldMask) >> fieldShift)
76 // The enum keeps the mask, the shift, and all valid values of the
77 // field in one place.
78 enum VecInstType {
79 VecInstTypeShift = 0,
80 VecInstTypeMask = 0xF,
81
82 VecNOP = 0,
83 VecLoad = 1,
84 VecStore = 2,
85 VecBuild = 3,
86 VecShuffle = 4,
87 VecExtract = 5,
88 VecInsert = 6,
89 VecDest = 7,
90 VecOther = 15
91 };
92
93 enum SimpleMove {
94 SimpleMoveMask = 0x10,
95 SimpleMoveShift = 4
96 };
97 enum LoadStore {
98 isLoadMask = 0x20,
99 isLoadShift = 5,
100 isStoreMask = 0x40,
101 isStoreShift = 6
102 };
103
104 namespace PTXLdStInstCode {
105 enum AddressSpace{
106 GENERIC = 0,
107 GLOBAL = 1,
108 CONSTANT = 2,
109 SHARED = 3,
110 PARAM = 4,
111 LOCAL = 5
112 };
113 enum FromType {
114 Unsigned = 0,
115 Signed,
116 Float
117 };
118 enum VecType {
119 Scalar = 1,
120 V2 = 2,
121 V4 = 4
122 };
123 }
124 }
125 } // end namespace llvm;
126
127 // Defines symbolic names for NVPTX registers. This defines a mapping from
128 // register name to register number.
129 #define GET_REGINFO_ENUM
130 #include "NVPTXGenRegisterInfo.inc"
131
132 // Defines symbolic names for the NVPTX instructions.
133 #define GET_INSTRINFO_ENUM
134 #include "NVPTXGenInstrInfo.inc"
135
136 #endif
0 //===- NVPTX.td - Describe the NVPTX Target Machine -----------*- tblgen -*-==//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 // This is the top level entry point for the NVPTX target.
9 //===----------------------------------------------------------------------===//
10
11 //===----------------------------------------------------------------------===//
12 // Target-independent interfaces
13 //===----------------------------------------------------------------------===//
14
15 include "llvm/Target/Target.td"
16
17 include "NVPTXRegisterInfo.td"
18 include "NVPTXInstrInfo.td"
19
20 //===----------------------------------------------------------------------===//
21 // Subtarget Features.
22 // - We use the SM version number instead of explicit feature table.
23 // - Need at least one feature to avoid generating zero sized array by
24 // TableGen in NVPTXGenSubtarget.inc.
25 //===----------------------------------------------------------------------===//
26 def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">;
27
28 //===----------------------------------------------------------------------===//
29 // NVPTX supported processors.
30 //===----------------------------------------------------------------------===//
31
32 class Proc Features>
33 : Processor;
34
35 def : Proc<"sm_10", [FeatureDummy]>;
36
37
38 def NVPTXInstrInfo : InstrInfo {
39 }
40
41 def NVPTX : Target {
42 let InstructionSet = NVPTXInstrInfo;
43 }
0 //===-- AllocaHoisting.cpp - Hosist allocas to the entry block --*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Hoist the alloca instructions in the non-entry blocks to the entry blocks.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Function.h"
14 #include "llvm/Instructions.h"
15 #include "llvm/Constants.h"
16 #include "NVPTXAllocaHoisting.h"
17
18 namespace llvm {
19
20 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
21 bool functionModified = false;
22 Function::iterator I = function.begin();
23 TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
24
25 for (Function::iterator E = function.end(); I != E; ++I) {
26 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
27 AllocaInst *allocaInst = dyn_cast(BI++);
28 if (allocaInst && isa(allocaInst->getArraySize())) {
29 allocaInst->moveBefore(firstTerminatorInst);
30 functionModified = true;
31 }
32 }
33 }
34
35 return functionModified;
36 }
37
38 char NVPTXAllocaHoisting::ID = 1;
39 RegisterPass X("alloca-hoisting",
40 "Hoisting alloca instructsion in non-entry "
41 "blocks to the entry block");
42
43 FunctionPass *createAllocaHoisting() {
44 return new NVPTXAllocaHoisting();
45 }
46
47 } // end namespace llvm
0 //===-- AllocaHoisting.h - Hosist allocas to the entry block ----*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Hoist the alloca instructions in the non-entry blocks to the entry blocks.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef NVPTX_ALLOCA_HOISTING_H_
14 #define NVPTX_ALLOCA_HOISTING_H_
15
16 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
17 #include "llvm/Pass.h"
18 #include "llvm/Target/TargetData.h"
19
20 namespace llvm {
21
22 class FunctionPass;
23 class Function;
24
25 // Hoisting the alloca instructions in the non-entry blocks to the entry
26 // block.
27 class NVPTXAllocaHoisting : public FunctionPass {
28 public:
29 static char ID; // Pass ID
30 NVPTXAllocaHoisting() : FunctionPass(ID) {}
31
32 void getAnalysisUsage(AnalysisUsage &AU) const {
33 AU.addRequired();
34 AU.addPreserved();
35 }
36
37 virtual const char *getPassName() const {
38 return "NVPTX specific alloca hoisting";
39 }
40
41 virtual bool runOnFunction(Function &function);
42 };
43
44 extern FunctionPass *createAllocaHoisting();
45
46 } // end namespace llvm
47
48 #endif // NVPTX_ALLOCA_HOISTING_H_
0 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "NVPTX.h"
15 #include "NVPTXInstrInfo.h"
16 #include "NVPTXTargetMachine.h"
17 #include "NVPTXRegisterInfo.h"
18 #include "NVPTXAsmPrinter.h"
19 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
20 #include "NVPTXNumRegisters.h"
21 #include "../lib/CodeGen/AsmPrinter/DwarfDebug.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/GlobalVariable.h"
24 #include "llvm/Function.h"
25 #include "llvm/Module.h"
26 #include "llvm/CodeGen/Analysis.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineModuleInfo.h"
30 #include "llvm/MC/MCStreamer.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Target/Mangler.h"
33 #include "llvm/Target/TargetLoweringObjectFile.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/FormattedStream.h"
37 #include "llvm/DerivedTypes.h"
38 #include "NVPTXUtilities.h"
39 #include "llvm/Support/TimeValue.h"
40 #include
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Analysis/DebugInfo.h"
43 #include "llvm/Analysis/ConstantFolding.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Assembly/Writer.h"
46 #include "cl_common_defines.h"
47
48
49 using namespace llvm;
50
51
52 #include "NVPTXGenAsmWriter.inc"
53
54 bool RegAllocNilUsed = true;
55
56 #define DEPOTNAME "__local_depot"
57
58 static cl::opt
59 EmitLineNumbers("nvptx-emit-line-numbers",
60 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
61 cl::init(true));
62
63 namespace llvm {
64 bool InterleaveSrcInPtx = false;
65 }
66
67 static cl::optInterleaveSrc("nvptx-emit-src",
68 cl::ZeroOrMore,
69 cl::desc("NVPTX Specific: Emit source line in ptx file"),
70 cl::location(llvm::InterleaveSrcInPtx));
71
72
73
74
75 // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we
76 // cannot just link to the existing version.
77 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
78 ///
79 using namespace nvptx;
80 const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
81 MCContext &Ctx = AP.OutContext;
82
83 if (CV->isNullValue() || isa(CV))
84 return MCConstantExpr::Create(0, Ctx);
85
86 if (const ConstantInt *CI = dyn_cast(CV))
87 return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
88
89 if (const GlobalValue *GV = dyn_cast(CV))
90 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
91
92 if (const BlockAddress *BA = dyn_cast(CV))
93 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
94
95 const ConstantExpr *CE = dyn_cast(CV);
96 if (CE == 0)
97 llvm_unreachable("Unknown constant value to lower!");
98
99
100 switch (CE->getOpcode()) {
101 default:
102 // If the code isn't optimized, there may be outstanding folding
103 // opportunities. Attempt to fold the expression using TargetData as a
104 // last resort before giving up.
105 if (Constant *C =
106 ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
107 if (C != CE)
108 return LowerConstant(C, AP);
109
110 // Otherwise report the problem to the user.
111 {
112 std::string S;
113 raw_string_ostream OS(S);
114 OS << "Unsupported expression in static initializer: ";
115 WriteAsOperand(OS, CE, /*PrintType=*/false,
116 !AP.MF ? 0 : AP.MF->getFunction()->getParent());
117 report_fatal_error(OS.str());
118 }
119 case Instruction::GetElementPtr: {
120 const TargetData &TD = *AP.TM.getTargetData();
121 // Generate a symbolic expression for the byte address
122 const Constant *PtrVal = CE->getOperand(0);
123 SmallVector IdxVec(CE->op_begin()+1, CE->op_end());
124 int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec);
125
126 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
127 if (Offset == 0)
128 return Base;
129
130 // Truncate/sext the offset to the pointer size.
131 if (TD.getPointerSizeInBits() != 64) {
132 int SExtAmount = 64-TD.getPointerSizeInBits();
133 Offset = (Offset << SExtAmount) >> SExtAmount;
134 }
135
136 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
137 Ctx);
138 }
139
140 case Instruction::Trunc:
141 // We emit the value and depend on the assembler to truncate the generated
142 // expression properly. This is important for differences between
143 // blockaddress labels. Since the two labels are in the same function, it
144 // is reasonable to treat their delta as a 32-bit value.
145 // FALL THROUGH.
146 case Instruction::BitCast:
147 return LowerConstant(CE->getOperand(0), AP);
148
149 case Instruction::IntToPtr: {
150 const TargetData &TD = *AP.TM.getTargetData();
151 // Handle casts to pointers by changing them into casts to the appropriate
152 // integer type. This promotes constant folding and simplifies this code.
153 Constant *Op = CE->getOperand(0);
154 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
155 false/*ZExt*/);
156 return LowerConstant(Op, AP);
157 }
158
159 case Instruction::PtrToInt: {
160 const TargetData &TD = *AP.TM.getTargetData();
161 // Support only foldable casts to/from pointers that can be eliminated by
162 // changing the pointer to the appropriately sized integer type.
163 Constant *Op = CE->getOperand(0);
164 Type *Ty = CE->getType();
165
166 const MCExpr *OpExpr = LowerConstant(Op, AP);
167
168 // We can emit the pointer value into this slot if the slot is an
169 // integer slot equal to the size of the pointer.
170 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
171 return OpExpr;
172
173 // Otherwise the pointer is smaller than the resultant integer, mask off
174 // the high bits so we are sure to get a proper truncation if the input is
175 // a constant expr.
176 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
177 const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
178 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
179 }
180
181 // The MC library also has a right-shift operator, but it isn't consistently
182 // signed or unsigned between different targets.
183 case Instruction::Add:
184 case Instruction::Sub:
185 case Instruction::Mul:
186 case Instruction::SDiv:
187 case Instruction::SRem:
188 case Instruction::Shl:
189 case Instruction::And:
190 case Instruction::Or:
191 case Instruction::Xor: {
192 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
193 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
194 switch (CE->getOpcode()) {
195 default: llvm_unreachable("Unknown binary operator constant cast expr");
196 case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
197 case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
198 case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
199 case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
200 case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
201 case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
202 case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
203 case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
204 case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
205 }
206 }
207 }
208 }
209
210
211 void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
212 {
213 if (!EmitLineNumbers)
214 return;
215 if (ignoreLoc(MI))
216 return;
217
218 DebugLoc curLoc = MI.getDebugLoc();
219
220 if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
221 return;
222
223 if (prevDebugLoc == curLoc)
224 return;
225
226 prevDebugLoc = curLoc;
227
228 if (curLoc.isUnknown())
229 return;
230
231
232 const MachineFunction *MF = MI.getParent()->getParent();
233 //const TargetMachine &TM = MF->getTarget();
234
235 const LLVMContext &ctx = MF->getFunction()->getContext();
236 DIScope Scope(curLoc.getScope(ctx));
237
238 if (!Scope.Verify())
239 return;
240
241 StringRef fileName(Scope.getFilename());
242 StringRef dirName(Scope.getDirectory());
243 SmallString<128> FullPathName = dirName;
244 if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
245 sys::path::append(FullPathName, fileName);
246 fileName = FullPathName.str();
247 }
248
249 if (filenameMap.find(fileName.str()) == filenameMap.end())
250 return;
251
252
253 // Emit the line from the source file.
254 if (llvm::InterleaveSrcInPtx)
255 this->emitSrcInText(fileName.str(), curLoc.getLine());
256
257 std::stringstream temp;
258 temp << "\t.loc " << filenameMap[fileName.str()]
259 << " " << curLoc.getLine() << " " << curLoc.getCol();
260 OutStreamer.EmitRawText(Twine(temp.str().c_str()));
261 }
262
263 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
264 SmallString<128> Str;
265 raw_svector_ostream OS(Str);
266 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
267 emitLineNumberAsDotLoc(*MI);
268 printInstruction(MI, OS);
269 OutStreamer.EmitRawText(OS.str());
270 }
271
272 void NVPTXAsmPrinter::printReturnValStr(const Function *F,
273 raw_ostream &O)
274 {
275 const TargetData *TD = TM.getTargetData();
276 const TargetLowering *TLI = TM.getTargetLowering();
277
278 Type *Ty = F->getReturnType();
279
280 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
281
282 if (Ty->getTypeID() == Type::VoidTyID)
283 return;
284
285 O << " (";
286
287 if (isABI) {
288 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
289 unsigned size = 0;
290 if (const IntegerType *ITy = dyn_cast(Ty)) {
291 size = ITy->getBitWidth();
292 if (size < 32) size = 32;
293 } else {
294 assert(Ty->isFloatingPointTy() &&
295 "Floating point type expected here");
296 size = Ty->getPrimitiveSizeInBits();
297 }
298
299 O << ".param .b" << size << " func_retval0";
300 }
301 else if (isa(Ty)) {
302 O << ".param .b" << TLI->getPointerTy().getSizeInBits()
303 << " func_retval0";
304 } else {
305 if ((Ty->getTypeID() == Type::StructTyID) ||
306 isa(Ty)) {
307 SmallVector vtparts;
308 ComputeValueVTs(*TLI, Ty, vtparts);
309 unsigned totalsz = 0;
310 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
311 unsigned elems = 1;
312 EVT elemtype = vtparts[i];
313 if (vtparts[i].isVector()) {
314 elems = vtparts[i].getVectorNumElements();
315 elemtype = vtparts[i].getVectorElementType();
316 }
317 for (unsigned j=0, je=elems; j!=je; ++j) {
318 unsigned sz = elemtype.getSizeInBits();
319 if (elemtype.isInteger() && (sz < 8)) sz = 8;
320 totalsz += sz/8;
321 }
322 }
323 unsigned retAlignment = 0;
324 if (!llvm::getAlign(*F, 0, retAlignment))
325 retAlignment = TD->getABITypeAlignment(Ty);
326 O << ".param .align "
327 << retAlignment
328 << " .b8 func_retval0["
329 << totalsz << "]";
330 } else
331 assert(false &&
332 "Unknown return type");
333 }
334 } else {
335 SmallVector vtparts;
336 ComputeValueVTs(*TLI, Ty, vtparts);
337 unsigned idx = 0;
338 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
339 unsigned elems = 1;
340 EVT elemtype = vtparts[i];
341 if (vtparts[i].isVector()) {
342 elems = vtparts[i].getVectorNumElements();
343 elemtype = vtparts[i].getVectorElementType();
344 }
345
346 for (unsigned j=0, je=elems; j!=je; ++j) {
347 unsigned sz = elemtype.getSizeInBits();
348 if (elemtype.isInteger() && (sz < 32)) sz = 32;
349 O << ".reg .b" << sz << " func_retval" << idx;
350 if (j
351 ++idx;
352 }
353 if (i < e-1)
354 O << ", ";
355 }
356 }
357 O << ") ";
358 return;
359 }
360
361 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
362 raw_ostream &O) {
363 const Function *F = MF.getFunction();
364 printReturnValStr(F, O);
365 }
366
367 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
368 SmallString<128> Str;
369 raw_svector_ostream O(Str);
370
371 // Set up
372 MRI = &MF->getRegInfo();
373 F = MF->getFunction();
374 emitLinkageDirective(F,O);
375 if (llvm::isKernelFunction(*F))
376 O << ".entry ";
377 else {
378 O << ".func ";
379 printReturnValStr(*MF, O);
380 }
381
382 O << *CurrentFnSym;
383
384 emitFunctionParamList(*MF, O);
385
386 if (llvm::isKernelFunction(*F))
387 emitKernelFunctionDirectives(*F, O);
388
389 OutStreamer.EmitRawText(O.str());
390
391 prevDebugLoc = DebugLoc();
392 }
393
394 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
395 const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
396 unsigned numRegClasses = TRI.getNumRegClasses();
397 VRidGlobal2LocalMap = new std::map[numRegClasses+1];
398 OutStreamer.EmitRawText(StringRef("{\n"));
399 setAndEmitFunctionVirtualRegisters(*MF);
400
401 SmallString<128> Str;
402 raw_svector_ostream O(Str);
403 emitDemotedVars(MF->getFunction(), O);
404 OutStreamer.EmitRawText(O.str());
405 }
406
407 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
408 OutStreamer.EmitRawText(StringRef("}\n"));
409 delete []VRidGlobal2LocalMap;
410 }
411
412
413 void
414 NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
415 raw_ostream &O) const {
416 // If the NVVM IR has some of reqntid* specified, then output
417 // the reqntid directive, and set the unspecified ones to 1.
418 // If none of reqntid* is specified, don't output reqntid directive.
419 unsigned reqntidx, reqntidy, reqntidz;
420 bool specified = false;
421 if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
422 else specified = true;
423 if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
424 else specified = true;
425 if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
426 else specified = true;
427
428 if (specified)
429 O << ".reqntid " << reqntidx << ", "
430 << reqntidy << ", " << reqntidz << "\n";
431
432 // If the NVVM IR has some of maxntid* specified, then output
433 // the maxntid directive, and set the unspecified ones to 1.
434 // If none of maxntid* is specified, don't output maxntid directive.
435 unsigned maxntidx, maxntidy, maxntidz;
436 specified = false;
437 if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
438 else specified = true;
439 if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
440 else specified = true;
441 if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
442 else specified = true;
443
444 if (specified)
445 O << ".maxntid " << maxntidx << ", "
446 << maxntidy << ", " << maxntidz << "\n";
447
448 unsigned mincta;
449 if (llvm::getMinCTASm(F, mincta))
450 O << ".minnctapersm " << mincta << "\n";
451 }
452
453 void
454 NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
455 raw_ostream &O) {
456 const TargetRegisterClass * RC = MRI->getRegClass(vr);
457 unsigned id = RC->getID();
458
459 std::map ®map = VRidGlobal2LocalMap[id];
460 unsigned mapped_vr = regmap[vr];
461
462 if (!isVec) {
463 O << getNVPTXRegClassStr(RC) << mapped_vr;
464 return;
465 }
466 // Vector virtual register
467 if (getNVPTXVectorSize(RC) == 4)
468 O << "{"
469 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
470 << getNVPTXRegClassStr(RC) << mapped_vr << "_1, "
471 << getNVPTXRegClassStr(RC) << mapped_vr << "_2, "
472 << getNVPTXRegClassStr(RC) << mapped_vr << "_3"
473 << "}";
474 else if (getNVPTXVectorSize(RC) == 2)
475 O << "{"
476 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, "
477 << getNVPTXRegClassStr(RC) << mapped_vr << "_1"
478 << "}";
479 else
480 assert(0 && "Unsupported vector size");
481 }
482
483 void
484 NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
485 raw_ostream &O) {
486 getVirtualRegisterName(vr, isVec, O);
487 }
488
489 void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
490 const char *Modifier,
491 raw_ostream &O) {
492 char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
493 int Imm = (int)MO.getImm();
494 if(0 == strcmp(Modifier, "vecelem"))
495 O << "_" << vecelem[Imm];
496 else if(0 == strcmp(Modifier, "vecv4comm1")) {
497 if((Imm < 0) || (Imm > 3))
498 O << "//";
499 }
500 else if(0 == strcmp(Modifier, "vecv4comm2")) {
501 if((Imm < 4) || (Imm > 7))
502 O << "//";
503 }
504 else if(0 == strcmp(Modifier, "vecv4pos")) {
505 if(Imm < 0) Imm = 0;
506 O << "_" << vecelem[Imm%4];
507 }
508 else if(0 == strcmp(Modifier, "vecv2comm1")) {
509 if((Imm < 0) || (Imm > 1))
510 O << "//";
511 }
512 else if(0 == strcmp(Modifier, "vecv2comm2")) {
513 if((Imm < 2) || (Imm > 3))
514 O << "//";
515 }
516 else if(0 == strcmp(Modifier, "vecv2pos")) {
517 if(Imm < 0) Imm = 0;
518 O << "_" << vecelem[Imm%2];
519 }
520 else
521 assert(0 && "Unknown Modifier on immediate operand");
522 }
523
524 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
525 raw_ostream &O, const char *Modifier) {
526 const MachineOperand &MO = MI->getOperand(opNum);
527 switch (MO.getType()) {
528 case MachineOperand::MO_Register:
529 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
530 if (MO.getReg() == NVPTX::VRDepot)
531 O << DEPOTNAME << getFunctionNumber();
532 else
533 O << getRegisterName(MO.getReg());
534 } else {
535 if (!Modifier)
536 emitVirtualRegister(MO.getReg(), false, O);
537 else {
538 if (strcmp(Modifier, "vecfull") == 0)
539 emitVirtualRegister(MO.getReg(), true, O);
540 else
541 assert(0 &&
542 "Don't know how to handle the modifier on virtual register.");
543 }
544 }
545 return;
546
547 case MachineOperand::MO_Immediate:
548 if (!Modifier)
549 O << MO.getImm();
550 else if (strstr(Modifier, "vec") == Modifier)
551 printVecModifiedImmediate(MO, Modifier, O);
552 else
553 assert(0 && "Don't know how to handle modifier on immediate operand");
554 return;
555
556 case MachineOperand::MO_FPImmediate:
557 printFPConstant(MO.getFPImm(), O);
558 break;
559
560 case MachineOperand::MO_GlobalAddress:
561 O << *Mang->getSymbol(MO.getGlobal());
562 break;
563
564 case MachineOperand::MO_ExternalSymbol: {
565 const char * symbname = MO.getSymbolName();
566 if (strstr(symbname, ".PARAM") == symbname) {
567 unsigned index;
568 sscanf(symbname+6, "%u[];", &index);
569 printParamName(index, O);
570 }
571 else if (strstr(symbname, ".HLPPARAM") == symbname) {
572 unsigned index;
573 sscanf(symbname+9, "%u[];", &index);
574 O << *CurrentFnSym << "_param_" << index << "_offset";
575 }
576 else
577 O << symbname;
578 break;
579 }
580
581 case MachineOperand::MO_MachineBasicBlock:
582 O << *MO.getMBB()->getSymbol();
583 return;
584
585 default:
586 assert(0 && " Operand type not supported.");
587 }
588 }
589
590 void NVPTXAsmPrinter::
591 printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
592 #ifndef __OPTIMIZE__
593 O << "\t// Implicit def :";
594 //printOperand(MI, 0);
595 O << "\n";
596 #endif
597 }
598
599 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
600 raw_ostream &O, const char *Modifier) {
601 printOperand(MI, opNum, O);
602
603 if (Modifier && !strcmp(Modifier, "add")) {
604 O << ", ";
605 printOperand(MI, opNum+1, O);
606 } else {
607 if (MI->getOperand(opNum+1).isImm() &&
608 MI->getOperand(opNum+1).getImm() == 0)
609 return; // don't print ',0' or '+0'
610 O << "+";
611 printOperand(MI, opNum+1, O);
612 }
613 }
614
615 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
616 raw_ostream &O, const char *Modifier)
617 {
618 if (Modifier) {
619 const MachineOperand &MO = MI->getOperand(opNum);
620 int Imm = (int)MO.getImm();
621 if (!strcmp(Modifier, "volatile")) {
622 if (Imm)
623 O << ".volatile";
624 } else if (!strcmp(Modifier, "addsp")) {
625 switch (Imm) {
626 case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
627 case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
628 case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
629 case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
630 case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
631 case NVPTX::PTXLdStInstCode::GENERIC:
632 if (!nvptxSubtarget.hasGenericLdSt())
633 O << ".global";
634 break;
635 default:
636 assert("wrong value");
637 }
638 }
639 else if (!strcmp(Modifier, "sign")) {
640 if (Imm==NVPTX::PTXLdStInstCode::Signed)
641 O << "s";
642 else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
643 O << "u";
644 else
645 O << "f";
646 }
647 else if (!strcmp(Modifier, "vec")) {
648 if (Imm==NVPTX::PTXLdStInstCode::V2)
649 O << ".v2";
650 else if (Imm==NVPTX::PTXLdStInstCode::V4)
651 O << ".v4";
652 }
653 else
654 assert("unknown modifier");
655 }
656 else
657 assert("unknown modifier");
658 }
659
660 void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
661
662 emitLinkageDirective(F,O);
663 if (llvm::isKernelFunction(*F))
664 O << ".entry ";
665 else
666 O << ".func ";
667 printReturnValStr(F, O);
668 O << *CurrentFnSym << "\n";
669 emitFunctionParamList(F, O);
670 O << ";\n";
671 }
672
673 static bool usedInGlobalVarDef(const Constant *C)
674 {
675 if (!C)
676 return false;
677
678 if (const GlobalVariable *GV = dyn_cast(C)) {
679 if (GV->getName().str() == "llvm.used")
680 return false;
681 return true;
682 }
683
684 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
685 ui!=ue; ++ui) {
686 const Constant *C = dyn_cast(*ui);
687 if (usedInGlobalVarDef(C))
688 return true;
689 }
690 return false;
691 }
692
693 static bool usedInOneFunc(const User *U, Function const *&oneFunc)
694 {
695 if (const GlobalVariable *othergv = dyn_cast(U)) {
696 if (othergv->getName().str() == "llvm.used")
697 return true;
698 }
699
700 if (const Instruction *instr = dyn_cast(U)) {
701 if (instr->getParent() && instr->getParent()->getParent()) {
702 const Function *curFunc = instr->getParent()->getParent();
703 if (oneFunc && (curFunc != oneFunc))
704 return false;
705 oneFunc = curFunc;
706 return true;
707 }
708 else
709 return false;
710 }
711
712 if (const MDNode *md = dyn_cast(U))
713 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
714 (md->getName().str() == "llvm.dbg.sp")))
715 return true;
716
717
718 for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
719 ui!=ue; ++ui) {
720 if (usedInOneFunc(*ui, oneFunc) == false)
721 return false;
722 }
723 return true;
724 }
725
726 /* Find out if a global variable can be demoted to local scope.
727 * Currently, this is valid for CUDA shared variables, which have local
728 * scope and global lifetime. So the conditions to check are :
729 * 1. Is the global variable in shared address space?
730 * 2. Does it have internal linkage?
731 * 3. Is the global variable referenced only in one function?
732 */
733 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
734 if (gv->hasInternalLinkage() == false)
735 return false;
736 const PointerType *Pty = gv->getType();
737 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
738 return false;
739
740 const Function *oneFunc = 0;
741
742 bool flag = usedInOneFunc(gv, oneFunc);
743 if (flag == false)
744 return false;
745 if (!oneFunc)
746 return false;
747 f = oneFunc;
748 return true;
749 }
750
751 static bool useFuncSeen(const Constant *C,
752 llvm::DenseMap &seenMap) {
753 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
754 ui!=ue; ++ui) {
755 if (const Constant *cu = dyn_cast(*ui)) {
756 if (useFuncSeen(cu, seenMap))
757 return true;
758 } else if (const Instruction *I = dyn_cast(*ui)) {
759 const BasicBlock *bb = I->getParent();
760 if (!bb) continue;
761 const Function *caller = bb->getParent();
762 if (!caller) continue;
763 if (seenMap.find(caller) != seenMap.end())
764 return true;
765 }
766 }
767 return false;
768 }
769
770 void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
771 llvm::DenseMap seenMap;
772 for (Module::const_iterator FI=M.begin(), FE=M.end();
773 FI!=FE; ++FI) {
774 const Function *F = FI;
775
776 if (F->isDeclaration()) {
777 if (F->use_empty())
778 continue;
779 if (F->getIntrinsicID())
780 continue;
781 CurrentFnSym = Mang->getSymbol(F);
782 emitDeclaration(F, O);
783 continue;
784 }
785 for (Value::const_use_iterator iter=F->use_begin(),
786 iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
787 if (const Constant *C = dyn_cast(*iter)) {
788 if (usedInGlobalVarDef(C)) {
789 // The use is in the initialization of a global variable
790 // that is a function pointer, so print a declaration
791 // for the original function
792 CurrentFnSym = Mang->getSymbol(F);
793 emitDeclaration(F, O);
794 break;
795 }
796 // Emit a declaration of this function if the function that
797 // uses this constant expr has already been seen.
798 if (useFuncSeen(C, seenMap)) {
799 CurrentFnSym = Mang->getSymbol(F);
800 emitDeclaration(F, O);
801 break;
802 }
803 }
804
805 if (!isa(*iter)) continue;
806 const Instruction *instr = cast(*iter);
807 const BasicBlock *bb = instr->getParent();
808 if (!bb) continue;
809 const Function *caller = bb->getParent();
810 if (!caller) continue;
811
812 // If a caller has already been seen, then the caller is
813 // appearing in the module before the callee. so print out
814 // a declaration for the callee.
815 if (seenMap.find(caller) != seenMap.end()) {
816 CurrentFnSym = Mang->getSymbol(F);
817 emitDeclaration(F, O);
818 break;
819 }
820 }
821 seenMap[F] = true;
822 }
823 }
824
825 void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
826 DebugInfoFinder DbgFinder;
827 DbgFinder.processModule(M);
828
829 unsigned i=1;
830 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
831 E = DbgFinder.compile_unit_end(); I != E; ++I) {
832 DICompileUnit DIUnit(*I);
833 StringRef Filename(DIUnit.getFilename());
834 StringRef Dirname(DIUnit.getDirectory());
835 SmallString<128> FullPathName = Dirname;
836 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
837 sys::path::append(FullPathName, Filename);
838 Filename = FullPathName.str();
839 }
840 if (filenameMap.find(Filename.str()) != filenameMap.end())
841 continue;
842 filenameMap[Filename.str()] = i;
843 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
844 ++i;
845 }
846
847 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
848 E = DbgFinder.subprogram_end(); I != E; ++I) {
849 DISubprogram SP(*I);
850 StringRef Filename(SP.getFilename());
851 StringRef Dirname(SP.getDirectory());
852 SmallString<128> FullPathName = Dirname;
853 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
854 sys::path::append(FullPathName, Filename);
855 Filename = FullPathName.str();
856 }
857 if (filenameMap.find(Filename.str()) != filenameMap.end())
858 continue;
859 filenameMap[Filename.str()] = i;
860 ++i;
861 }
862 }
863
864 bool NVPTXAsmPrinter::doInitialization (Module &M) {
865
866 SmallString<128> Str1;
867 raw_svector_ostream OS1(Str1);
868
869 MMI = getAnalysisIfAvailable();
870 MMI->AnalyzeModule(M);
871
872 // We need to call the parent's one explicitly.
873 //bool Result = AsmPrinter::doInitialization(M);
874
875 // Initialize TargetLoweringObjectFile.
876 const_cast(getObjFileLowering())
877 .Initialize(OutContext, TM);
878
879 Mang = new Mangler(OutContext, *TM.getTargetData());
880
881 // Emit header before any dwarf directives are emitted below.
882 emitHeader(M, OS1);
883 OutStreamer.EmitRawText(OS1.str());
884
885
886 // Already commented out
887 //bool Result = AsmPrinter::doInitialization(M);
888
889
890 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
891 recordAndEmitFilenames(M);
892
893 SmallString<128> Str2;
894 raw_svector_ostream OS2(Str2);
895
896 emitDeclarations(M, OS2);
897
898 // Print out module-level global variables here.
899 for (Module::global_iterator I = M.global_begin(), E = M.global_end();
900 I != E; ++I)
901 printModuleLevelGV(I, OS2);
902
903 OS2 << '\n';
904
905 OutStreamer.EmitRawText(OS2.str());
906 return false; // success
907 }
908
909 void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
910 O << "//\n";
911 O << "// Generated by LLVM NVPTX Back-End\n";
912 O << "//\n";
913 O << "\n";
914
915 O << ".version 3.0\n";
916
917 O << ".target ";
918 O << nvptxSubtarget.getTargetName();
919
920 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
921 O << ", texmode_independent";
922 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
923 if (!nvptxSubtarget.hasDouble())
924 O << ", map_f64_to_f32";
925 }
926
927 if (MAI->doesSupportDebugInformation())
928 O << ", debug";
929
930 O << "\n";
931
932 O << ".address_size ";
933 if (nvptxSubtarget.is64Bit())
934 O << "64";
935 else
936 O << "32";
937 O << "\n";
938
939 O << "\n";
940 }
941
942 bool NVPTXAsmPrinter::doFinalization(Module &M) {
943 // XXX Temproarily remove global variables so that doFinalization() will not
944 // emit them again (global variables are emitted at beginning).
945
946 Module::GlobalListType &global_list = M.getGlobalList();
947 int i, n = global_list.size();
948 GlobalVariable **gv_array = new GlobalVariable* [n];
949
950 // first, back-up GlobalVariable in gv_array
951 i = 0;
952 for (Module::global_iterator I = global_list.begin(), E = global_list.end();
953 I != E; ++I)
954 gv_array[i++] = &*I;
955
956 // second, empty global_list
957 while (!global_list.empty())
958 global_list.remove(global_list.begin());
959
960 // call doFinalization
961 bool ret = AsmPrinter::doFinalization(M);
962
963 // now we restore global variables
964 for (i = 0; i < n; i ++)
965 global_list.insert(global_list.end(), gv_array[i]);
966
967 delete[] gv_array;
968 return ret;
969
970
971 //bool Result = AsmPrinter::doFinalization(M);
972 // Instead of calling the parents doFinalization, we may
973 // clone parents doFinalization and customize here.
974 // Currently, we if NVISA out the EmitGlobals() in
975 // parent's doFinalization, which is too intrusive.
976 //
977 // Same for the doInitialization.
978 //return Result;
979 }
980
981 // This function emits appropriate linkage directives for
982 // functions and global variables.
983 //
984 // extern function declaration -> .extern
985 // extern function definition -> .visible
986 // external global variable with init -> .visible
987 // external without init -> .extern
988 // appending -> not allowed, assert.
989
990 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
991 {
992 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
993 if (V->hasExternalLinkage()) {
994 if (isa(V)) {
995 const GlobalVariable *GVar = cast(V);
996 if (GVar) {
997 if (GVar->hasInitializer())
998 O << ".visible ";
999 else
1000 O << ".extern ";
1001 }
1002 } else if (V->isDeclaration())
1003 O << ".extern ";
1004 else
1005 O << ".visible ";
1006 } else if (V->hasAppendingLinkage()) {
1007 std::string msg;
1008 msg.append("Error: ");
1009 msg.append("Symbol ");
1010 if (V->hasName())
1011 msg.append(V->getName().str());
1012 msg.append("has unsupported appending linkage type");
1013 llvm_unreachable(msg.c_str());
1014 }
1015 }
1016 }
1017
1018
1019 void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
1020 bool processDemoted) {
1021
1022 // Skip meta data
1023 if (GVar->hasSection()) {
1024 if (GVar->getSection() == "llvm.metadata")
1025 return;
1026 }
1027
1028 const TargetData *TD = TM.getTargetData();
1029
1030 // GlobalVariables are always constant pointers themselves.
1031 const PointerType *PTy = GVar->getType();
1032 Type *ETy = PTy->getElementType();
1033
1034 if (GVar->hasExternalLinkage()) {
1035 if (GVar->hasInitializer())
1036 O << ".visible ";
1037 else
1038 O << ".extern ";
1039 }
1040
1041 if (llvm::isTexture(*GVar)) {
1042 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1043 return;
1044 }
1045
1046 if (llvm::isSurface(*GVar)) {
1047 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1048 return;
1049 }
1050
1051 if (GVar->isDeclaration()) {
1052 // (extern) declarations, no definition or initializer
1053 // Currently the only known declaration is for an automatic __local
1054 // (.shared) promoted to global.
1055 emitPTXGlobalVariable(GVar, O);
1056 O << ";\n";
1057 return;
1058 }
1059
1060 if (llvm::isSampler(*GVar)) {
1061 O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1062
1063 Constant *Initializer = NULL;
1064 if (GVar->hasInitializer())
1065 Initializer = GVar->getInitializer();
1066 ConstantInt *CI = NULL;
1067 if (Initializer)
1068 CI = dyn_cast(Initializer);
1069 if (CI) {
1070 unsigned sample=CI->getZExtValue();
1071
1072 O << " = { ";
1073
1074 for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
1075 __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
1076 O << "addr_mode_" << i << " = ";
1077 switch (addr) {
1078 case 0: O << "wrap"; break;
1079 case 1: O << "clamp_to_border"; break;
1080 case 2: O << "clamp_to_edge"; break;
1081 case 3: O << "wrap"; break;
1082 case 4: O << "mirror"; break;
1083 }
1084 O <<", ";
1085 }
1086 O << "filter_mode = ";
1087 switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
1088 case 0: O << "nearest"; break;
1089 case 1: O << "linear"; break;
1090 case 2: assert ( 0 && "Anisotropic filtering is not supported");
1091 default: O << "nearest"; break;
1092 }
1093 if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
1094 O << ", force_unnormalized_coords = 1";
1095 }
1096 O << " }";
1097 }
1098
1099 O << ";\n";
1100 return;
1101 }
1102
1103 if (GVar->hasPrivateLinkage()) {
1104
1105 if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1106 return;
1107
1108 // FIXME - need better way (e.g. Metadata) to avoid generating this global
1109 if (!strncmp(GVar->getName().data(), "filename", 8))
1110 return;
1111 if (GVar->use_empty())
1112 return;
1113 }
1114
1115 const Function *demotedFunc = 0;
1116 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1117 O << "// " << GVar->getName().str() << " has been demoted\n";
1118 if (localDecls.find(demotedFunc) != localDecls.end())
1119 localDecls[demotedFunc].push_back(GVar);
1120 else {
1121 std::vector temp;
1122 temp.push_back(GVar);
1123 localDecls[demotedFunc] = temp;
1124 }
1125 return;
1126 }
1127
1128 O << ".";
1129 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1130 if (GVar->getAlignment() == 0)
1131 O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1132 else
1133 O << " .align " << GVar->getAlignment();
1134
1135
1136 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) {
1137 O << " .";
1138 O << getPTXFundamentalTypeStr(ETy, false);
1139 O << " ";
1140 O << *Mang->getSymbol(GVar);
1141
1142 // Ptx allows variable initilization only for constant and global state
1143 // spaces.
1144 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1145 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
1146 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
1147 && GVar->hasInitializer()) {
1148 Constant *Initializer = GVar->getInitializer();
1149 if (!Initializer->isNullValue()) {
1150 O << " = " ;
1151 printScalarConstant(Initializer, O);
1152 }
1153 }
1154 } else {
1155 unsigned int ElementSize =0;
1156
1157 // Although PTX has direct support for struct type and array type and
1158 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1159 // targets that support these high level field accesses. Structs, arrays
1160 // and vectors are lowered into arrays of bytes.
1161 switch (ETy->getTypeID()) {
1162 case Type::StructTyID:
1163 case Type::ArrayTyID:
1164 case Type::VectorTyID:
1165 ElementSize = TD->getTypeStoreSize(ETy);
1166 // Ptx allows variable initilization only for constant and
1167 // global state spaces.
1168 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1169 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
1170 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
1171 && GVar->hasInitializer()) {
1172 Constant *Initializer = GVar->getInitializer();
1173 if (!isa(Initializer) &&
1174 !Initializer->isNullValue()) {
1175 AggBuffer aggBuffer(ElementSize, O, *this);
1176 bufferAggregateConstant(Initializer, &aggBuffer);
1177 if (aggBuffer.numSymbols) {
1178 if (nvptxSubtarget.is64Bit()) {
1179 O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
1180 O << ElementSize/8;
1181 }
1182 else {
1183 O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
1184 O << ElementSize/4;
1185 }
1186 O << "]";
1187 }
1188 else {
1189 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
1190 O << ElementSize;
1191 O << "]";
1192 }
1193 O << " = {" ;
1194 aggBuffer.print();
1195 O << "}";
1196 }
1197 else {
1198 O << " .b8 " << *Mang->getSymbol(GVar) ;
1199 if (ElementSize) {
1200 O <<"[" ;
1201 O << ElementSize;
1202 O << "]";
1203 }
1204 }
1205 }
1206 else {
1207 O << " .b8 " << *Mang->getSymbol(GVar);
1208 if (ElementSize) {
1209 O <<"[" ;
1210 O << ElementSize;
1211 O << "]";
1212 }
1213 }
1214 break;
1215 default:
1216 assert( 0 && "type not supported yet");
1217 }
1218
1219 }
1220 O << ";\n";
1221 }
1222
1223 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1224 if (localDecls.find(f) == localDecls.end())
1225 return;
1226
1227 std::vector &gvars = localDecls[f];
1228
1229 for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
1230 O << "\t// demoted variable\n\t";
1231 printModuleLevelGV(gvars[i], O, true);
1232 }
1233 }
1234
1235 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1236 raw_ostream &O) const {
1237 switch (AddressSpace) {
1238 case llvm::ADDRESS_SPACE_LOCAL:
1239 O << "local" ;
1240 break;
1241 case llvm::ADDRESS_SPACE_GLOBAL:
1242 O << "global" ;
1243 break;
1244 case llvm::ADDRESS_SPACE_CONST:
1245 // This logic should be consistent with that in
1246 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
1247 if (nvptxSubtarget.hasGenericLdSt())
1248 O << "global" ;
1249 else
1250 O << "const" ;
1251 break;
1252 case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
1253 O << "const" ;
1254 break;
1255 case llvm::ADDRESS_SPACE_SHARED:
1256 O << "shared" ;
1257 break;
1258 default:
1259 assert(0 && "unexpected address space");
1260 }
1261 }
1262
1263 std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
1264 bool useB4PTR) const {
1265 switch (Ty->getTypeID()) {
1266 default:
1267 llvm_unreachable("unexpected type");
1268 break;
1269 case Type::IntegerTyID: {
1270 unsigned NumBits = cast(Ty)->getBitWidth();
1271 if (NumBits == 1)
1272 return "pred";
1273 else if (NumBits <= 64) {
1274 std::string name = "u";
1275 return name + utostr(NumBits);
1276 } else {
1277 llvm_unreachable("Integer too large");
1278 break;
1279 }
1280 break;
1281 }
1282 case Type::FloatTyID:
1283 return "f32";
1284 case Type::DoubleTyID:
1285 return "f64";
1286 case Type::PointerTyID:
1287 if (nvptxSubtarget.is64Bit())
1288 if (useB4PTR) return "b64";
1289 else return "u64";
1290 else
1291 if (useB4PTR) return "b32";
1292 else return "u32";
1293 }
1294 llvm_unreachable("unexpected type");
1295 return NULL;
1296 }
1297
1298 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
1299 raw_ostream &O) {
1300
1301 const TargetData *TD = TM.getTargetData();
1302
1303 // GlobalVariables are always constant pointers themselves.
1304 const PointerType *PTy = GVar->getType();
1305 Type *ETy = PTy->getElementType();
1306
1307 O << ".";
1308 emitPTXAddressSpace(PTy->getAddressSpace(), O);
1309 if (GVar->getAlignment() == 0)
1310 O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1311 else
1312 O << " .align " << GVar->getAlignment();
1313
1314 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa(ETy)) {
1315 O << " .";
1316 O << getPTXFundamentalTypeStr(ETy);
1317 O << " ";
1318 O << *Mang->getSymbol(GVar);
1319 return;
1320 }
1321
1322 int64_t ElementSize =0;
1323
1324 // Although PTX has direct support for struct type and array type and LLVM IR
1325 // is very similar to PTX, the LLVM CodeGen does not support for targets that
1326 // support these high level field accesses. Structs and arrays are lowered
1327 // into arrays of bytes.
1328 switch (ETy->getTypeID()) {
1329 case Type::StructTyID:
1330 case Type::ArrayTyID:
1331 case Type::VectorTyID:
1332 ElementSize = TD->getTypeStoreSize(ETy);
1333 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
1334 if (ElementSize) {
1335 O << itostr(ElementSize) ;
1336 }
1337 O << "]";
1338 break;
1339 default:
1340 assert( 0 && "type not supported yet");
1341 }
1342 return ;
1343 }
1344
1345
1346 static unsigned int
1347 getOpenCLAlignment(const TargetData *TD,
1348 Type *Ty) {
1349 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa(Ty))
1350 return TD->getPrefTypeAlignment(Ty);
1351
1352 const ArrayType *ATy = dyn_cast(Ty);
1353 if (ATy)
1354 return getOpenCLAlignment(TD, ATy->getElementType());
1355
1356 const VectorType *VTy = dyn_cast(Ty);
1357 if (VTy) {
1358 Type *ETy = VTy->getElementType();
1359 unsigned int numE = VTy->getNumElements();
1360 unsigned int alignE = TD->getPrefTypeAlignment(ETy);
1361 if (numE == 3)
1362 return 4*alignE;
1363 else
1364 return numE*alignE;
1365 }
1366
1367 const StructType *STy = dyn_cast(Ty);
1368 if (STy) {
1369 unsigned int alignStruct = 1;
1370 // Go through each element of the struct and find the
1371 // largest alignment.
1372 for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
1373 Type *ETy = STy->getElementType(i);
1374 unsigned int align = getOpenCLAlignment(TD, ETy);
1375 if (align > alignStruct)
1376 alignStruct = align;
1377 }
1378 return alignStruct;
1379 }
1380
1381 const FunctionType *FTy = dyn_cast(Ty);
1382 if (FTy)
1383 return TD->getPointerPrefAlignment();
1384 return TD->getPrefTypeAlignment(Ty);
1385 }
1386
1387 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1388 int paramIndex, raw_ostream &O) {
1389 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1390 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
1391 O << *CurrentFnSym << "_param_" << paramIndex;
1392 else {
1393 std::string argName = I->getName();
1394 const char *p = argName.c_str();
1395 while (*p) {
1396 if (*p == '.')
1397 O << "_";
1398 else
1399 O << *p;
1400 p++;
1401 }
1402 }
1403 }
1404
1405 void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
1406 Function::const_arg_iterator I, E;
1407 int i = 0;
1408
1409 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1410 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
1411 O << *CurrentFnSym << "_param_" << paramIndex;
1412 return;
1413 }
1414
1415 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
1416 if (i==paramIndex) {
1417 printParamName(I, paramIndex, O);
1418 return;
1419 }
1420 }
1421 llvm_unreachable("paramIndex out of bound");
1422 }
1423
1424 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
1425 raw_ostream &O) {
1426 const TargetData *TD = TM.getTargetData();
1427 const AttrListPtr &PAL = F->getAttributes();
1428 const TargetLowering *TLI = TM.getTargetLowering();
1429 Function::const_arg_iterator I, E;
1430 unsigned paramIndex = 0;
1431 bool first = true;
1432 bool isKernelFunc = llvm::isKernelFunction(*F);
1433 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1434 MVT thePointerTy = TLI->getPointerTy();
1435
1436 O << "(\n";
1437
1438 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1439 const Type *Ty = I->getType();
1440
1441 if (!first)
1442 O << ",\n";
1443
1444 first = false;
1445
1446 // Handle image/sampler parameters
1447 if (llvm::isSampler(*I) || llvm::isImage(*I)) {
1448 if (llvm::isImage(*I)) {
1449 std::string sname = I->getName();
1450 if (llvm::isImageWriteOnly(*I))
1451 O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
1452 else // Default image is read_only
1453 O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
1454 }
1455 else // Should be llvm::isSampler(*I)
1456 O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
1457 << paramIndex;
1458 continue;
1459 }
1460
1461 if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) {
1462 // Just a scalar
1463 const PointerType *PTy = dyn_cast(Ty);
1464 if (isKernelFunc) {
1465 if (PTy) {
1466 // Special handling for pointer arguments to kernel
1467 O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1468
1469 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
1470 Type *ETy = PTy->getElementType();
1471 int addrSpace = PTy->getAddressSpace();
1472 switch(addrSpace) {
1473 default:
1474 O << ".ptr ";
1475 break;
1476 case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
1477 O << ".ptr .const ";
1478 break;
1479 case llvm::ADDRESS_SPACE_SHARED:
1480 O << ".ptr .shared ";
1481 break;
1482 case llvm::ADDRESS_SPACE_GLOBAL:
1483 case llvm::ADDRESS_SPACE_CONST:
1484 O << ".ptr .global ";
1485 break;
1486 }
1487 O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
1488 }
1489 printParamName(I, paramIndex, O);
1490 continue;
1491 }
1492
1493 // non-pointer scalar to kernel func
1494 O << "\t.param ."
1495 << getPTXFundamentalTypeStr(Ty) << " ";
1496 printParamName(I, paramIndex, O);
1497 continue;
1498 }
1499 // Non-kernel function, just print .param .b for ABI
1500 // and .reg .b for non ABY
1501 unsigned sz = 0;
1502 if (is