llvm.org GIT mirror llvm / e1fee48
PTX: Add intrinsics to list of built-in intrinsics, which allows them to be used by Clang. To help Clang integration, the PTX target has been split into two targets: ptx32 and ptx64, depending on the desired pointer size. - Add GCCBuiltin class to all intrinsics - Split PTX target into ptx32 and ptx64 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129851 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Holewinski 9 years ago
31 changed file(s) with 166 addition(s) and 91 deletion(s). Raw diff Collapse all Expand all
6363 x86_64, // X86-64: amd64, x86_64
6464 xcore, // XCore: xcore
6565 mblaze, // MBlaze: mblaze
66 ptx, // PTX: ptx
66 ptx32, // PTX: ptx (32-bit)
67 ptx64, // PTX: ptx (64-bit)
6768
6869 InvalidArch
6970 };
1111 //===----------------------------------------------------------------------===//
1212
1313 let TargetPrefix = "ptx" in {
14 multiclass PTXReadSpecialRegisterIntrinsic_v4i32 {
14 multiclass PTXReadSpecialRegisterIntrinsic_v4i32 {
1515 // FIXME: Do we need the 128-bit integer type version?
1616 // def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
1717
1818 // FIXME: Enable this once v4i32 support is enabled in back-end.
1919 // def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
2020
21 def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
22 def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
23 def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
24 def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
21 def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
22 GCCBuiltin;
23 def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
24 GCCBuiltin;
25 def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
26 GCCBuiltin;
27 def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
28 GCCBuiltin;
2529 }
2630
27 class PTXReadSpecialRegisterIntrinsic_r32
28 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
31 class PTXReadSpecialRegisterIntrinsic_r32
32 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
33 GCCBuiltin;
2934
30 class PTXReadSpecialRegisterIntrinsic_r64
31 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
35 class PTXReadSpecialRegisterIntrinsic_r64
36 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
37 GCCBuiltin;
3238 }
3339
34 defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32;
35 defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32;
40 defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
41 <"__builtin_ptx_read_tid">;
42 defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
43 <"__builtin_ptx_read_ntid">;
3644
37 def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32;
38 def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32;
39 def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32;
45 def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
46 <"__builtin_ptx_read_laneid">;
47 def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
48 <"__builtin_ptx_read_warpid">;
49 def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
50 <"__builtin_ptx_read_nwarpid">;
4051
41 defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32;
42 defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32;
52 defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
53 <"__builtin_ptx_read_ctaid">;
54 defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
55 <"__builtin_ptx_read_nctaid">;
4356
44 def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32;
45 def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32;
46 def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32;
57 def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
58 <"__builtin_ptx_read_smid">;
59 def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
60 <"__builtin_ptx_read_nsmid">;
61 def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
62 <"__builtin_ptx_read_gridid">;
4763
48 def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32;
49 def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32;
50 def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32;
51 def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32;
52 def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32;
64 def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
65 <"__builtin_ptx_read_lanemask_eq">;
66 def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
67 <"__builtin_ptx_read_lanemask_le">;
68 def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
69 <"__builtin_ptx_read_lanemask_lt">;
70 def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
71 <"__builtin_ptx_read_lanemask_ge">;
72 def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
73 <"__builtin_ptx_read_lanemask_gt">;
5374
54 def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32;
55 def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64;
75 def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
76 <"__builtin_ptx_read_clock">;
77 def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
78 <"__builtin_ptx_read_clock64">;
5679
57 def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32;
58 def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32;
59 def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32;
60 def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32;
80 def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
81 <"__builtin_ptx_read_pm0">;
82 def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
83 <"__builtin_ptx_read_pm1">;
84 def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
85 <"__builtin_ptx_read_pm2">;
86 def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
87 <"__builtin_ptx_read_pm3">;
6188
6289 let TargetPrefix = "ptx" in
63 def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>;
90 def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
91 GCCBuiltin<"__builtin_ptx_bar_sync">;
4040 case x86_64: return "x86_64";
4141 case xcore: return "xcore";
4242 case mblaze: return "mblaze";
43 case ptx: return "ptx";
43 case ptx32: return "ptx32";
44 case ptx64: return "ptx64";
4445 }
4546
4647 return "";
7374
7475 case xcore: return "xcore";
7576
76 case ptx: return "ptx";
77 case ptx32: return "ptx";
78 case ptx64: return "ptx";
7779 }
7880 }
7981
164166 return x86_64;
165167 if (Name == "xcore")
166168 return xcore;
167 if (Name == "ptx")
168 return ptx;
169 if (Name == "ptx32")
170 return ptx32;
171 if (Name == "ptx64")
172 return ptx64;
169173
170174 return UnknownArch;
171175 }
204208 Str == "armv6" || Str == "armv7")
205209 return Triple::arm;
206210
207 if (Str == "ptx")
208 return Triple::ptx;
211 if (Str == "ptx32")
212 return Triple::ptx32;
213 if (Str == "ptx64")
214 return Triple::ptx64;
209215
210216 return Triple::UnknownArch;
211217 }
237243 return "armv6";
238244 if (Str == "armv7" || Str == "thumbv7")
239245 return "armv7";
240 if (Str == "ptx")
241 return "ptx";
246 if (Str == "ptx32")
247 return "ptx32";
248 if (Str == "ptx64")
249 return "ptx64";
242250 return NULL;
243251 }
244252
287295 return tce;
288296 else if (ArchName == "xcore")
289297 return xcore;
290 else if (ArchName == "ptx")
291 return ptx;
298 else if (ArchName == "ptx32")
299 return ptx32;
300 else if (ArchName == "ptx64")
301 return ptx64;
292302 else
293303 return UnknownArch;
294304 }
4141 FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
4242 CodeGenOpt::Level OptLevel);
4343
44 extern Target ThePTXTarget;
44 extern Target ThePTX32Target;
45 extern Target ThePTX64Target;
4546 } // namespace llvm;
4647
4748 // Defines symbolic names for PTX registers.
2222
2323 def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
2424 "Do not demote .f64 to .f32">;
25
26 def Feature64Bit : SubtargetFeature<"64bit", "Use64BitAddresses", "true",
27 "Use 64-bit integer types for addresses.">;
2825
2926 //===- PTX Version --------------------------------------------------------===//
3027
446446
447447 // Force static initialization.
448448 extern "C" void LLVMInitializePTXAsmPrinter() {
449 RegisterAsmPrinter X(ThePTXTarget);
450 }
449 RegisterAsmPrinter X(ThePTX32Target);
450 RegisterAsmPrinter Y(ThePTX64Target);
451 }
2121 //===----------------------------------------------------------------------===//
2222
2323 // Addressing
24 def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
25 def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
24 def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
25 def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
2626
2727 // Shader Model Support
2828 def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
1515
1616 using namespace llvm;
1717
18 PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
18 PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
19 bool is64Bit)
1920 : PTXShaderModel(PTX_SM_1_0),
2021 PTXVersion(PTX_VERSION_2_0),
2122 SupportsDouble(false),
22 Use64BitAddresses(false) {
23 Is64Bit(is64Bit) {
2324 std::string TARGET = "generic";
2425 ParseSubtargetFeatures(FS, TARGET);
2526 }
4949 bool SupportsDouble;
5050
5151 // Use .u64 instead of .u32 for addresses.
52 bool Use64BitAddresses;
52 bool Is64Bit;
5353
5454 public:
55 PTXSubtarget(const std::string &TT, const std::string &FS);
55 PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
5656
5757 std::string getTargetString() const;
5858
6060
6161 bool supportsDouble() const { return SupportsDouble; }
6262
63 bool use64BitAddresses() const { return Use64BitAddresses; }
63 bool is64Bit() const { return Is64Bit; }
6464
6565 bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
6666
2929 }
3030
3131 extern "C" void LLVMInitializePTXTarget() {
32 RegisterTargetMachine X(ThePTXTarget);
33 RegisterAsmInfo Y(ThePTXTarget);
34 TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
32
33 RegisterTargetMachine X(ThePTX32Target);
34 RegisterTargetMachine Y(ThePTX64Target);
35
36 RegisterAsmInfo Z(ThePTX32Target);
37 RegisterAsmInfo W(ThePTX64Target);
38
39 TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
40 TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
3541 }
3642
3743 namespace {
4450 // DataLayout and FrameLowering are filled with dummy data
4551 PTXTargetMachine::PTXTargetMachine(const Target &T,
4652 const std::string &TT,
47 const std::string &FS)
53 const std::string &FS,
54 bool is64Bit)
4855 : LLVMTargetMachine(T, TT),
49 // FIXME: This feels like a dirty hack, but Subtarget does not appear to be
50 // initialized at this point, and we need to finish initialization of
51 // DataLayout.
52 DataLayout((FS.find("64bit") != FS.npos) ? DataLayout64 : DataLayout32),
53 Subtarget(TT, FS),
56 DataLayout(is64Bit ? DataLayout64 : DataLayout32),
57 Subtarget(TT, FS, is64Bit),
5458 FrameLowering(Subtarget),
5559 InstrInfo(*this),
5660 TLInfo(*this) {
61 }
62
63 PTX32TargetMachine::PTX32TargetMachine(const Target &T,
64 const std::string& TT,
65 const std::string& FS)
66 : PTXTargetMachine(T, TT, FS, false) {
67 }
68
69 PTX64TargetMachine::PTX64TargetMachine(const Target &T,
70 const std::string& TT,
71 const std::string& FS)
72 : PTXTargetMachine(T, TT, FS, true) {
5773 }
5874
5975 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
3232
3333 public:
3434 PTXTargetMachine(const Target &T, const std::string &TT,
35 const std::string &FS);
35 const std::string &FS, bool is64Bit);
3636
3737 virtual const TargetData *getTargetData() const { return &DataLayout; }
3838
5454 virtual bool addPostRegAlloc(PassManagerBase &PM,
5555 CodeGenOpt::Level OptLevel);
5656 }; // class PTXTargetMachine
57
58
59 class PTX32TargetMachine : public PTXTargetMachine {
60 public:
61
62 PTX32TargetMachine(const Target &T, const std::string &TT,
63 const std::string& FS);
64 }; // class PTX32TargetMachine
65
66 class PTX64TargetMachine : public PTXTargetMachine {
67 public:
68
69 PTX64TargetMachine(const Target &T, const std::string &TT,
70 const std::string& FS);
71 }; // class PTX32TargetMachine
72
5773 } // namespace llvm
5874
5975 #endif // PTX_TARGET_MACHINE_H
1212
1313 using namespace llvm;
1414
15 Target llvm::ThePTXTarget;
15 Target llvm::ThePTX32Target;
16 Target llvm::ThePTX64Target;
1617
1718 extern "C" void LLVMInitializePTXTargetInfo() {
1819 // see llvm/ADT/Triple.h
19 RegisterTarget> X(ThePTXTarget, "ptx", "PTX");
20 RegisterTarget32> X32(ThePTX32Target, "ptx32",
21 "PTX (32-bit) [Experimental]");
22 RegisterTarget X64(ThePTX64Target, "ptx64",
23 "PTX (64-bit) [Experimental]");
2024 }
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
33 ; CHECK: add.u16 rh0, rh1, rh2;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device void @test_bra_direct() {
33 ; CHECK: bra $L__BB0_1;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_kernel void @t1() {
33 ; CHECK: exit;
None ; RUN: llc < %s -march=ptx -mattr=+sm10 | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
11
22 define ptx_device float @t1_f32(float %x, float %y) {
33 ; CHECK: div.approx.f32 f0, f1, f2;
None ; RUN: llc < %s -march=ptx -mattr=+sm13 | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
11
22 define ptx_device float @t1_f32(float %x, float %y) {
33 ; CHECK: div.approx.f32 f0, f1, f2;
None ; RUN: llc < %s -march=ptx -mattr=+ptx20,+sm20 | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
11
22 define ptx_device i32 @test_tid_x() {
33 ; CHECK: mov.u32 r0, %tid.x;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 ;CHECK: .extern .global .b8 array_i16[20];
33 @array_i16 = external global [10 x i16]
None ; RUN: llc < %s -march=ptx -mattr=+ptx20,+sm20 | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
11
22 define ptx_device float @test_sqrt_f32(float %x) {
33 entry:
None ; RUN: llc < %s -march=ptx -mattr=+sm13 | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
11
22 define ptx_device float @t1_f32(float %x, float %y, float %z) {
33 ; CHECK: mad.rn.f32 f0, f1, f2, f3;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i16 @t1_u16() {
33 ; CHECK: mov.u16 rh0, 0;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 ;define ptx_device i32 @t1(i32 %x, i32 %y) {
33 ; %z = mul i32 %x, %y
None ; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
1 ; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
2 ; RUN: llc < %s -march=ptx -mattr=ptx22 | grep ".version 2.2"
3 ; RUN: llc < %s -march=ptx -mattr=sm10 | grep ".target sm_10"
4 ; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
5 ; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
0 ; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
1 ; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
2 ; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
3 ; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
4 ; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
5 ; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
66
77 define ptx_device void @t1() {
88 ret void
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 ; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .u32 r1, .reg .u32 r2)
33 define ptx_device i32 @test_parameter_order(i32 %x, i32 %y) {
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device void @t1() {
33 ; CHECK: ret;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
33 ; CHECK: setp.eq.u32 p0, r1, r2;
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i32 @t1(i32 %x, i32 %y) {
33 ; CHECK: shl.b32 r0, r1, r2
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i32 @t1(i32 %x, i32 %y) {
33 ; CHECK: shr.u32 r0, r1, r2
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 ;CHECK: .extern .global .b8 array_i16[20];
33 @array_i16 = external global [10 x i16]
None ; RUN: llc < %s -march=ptx | FileCheck %s
0 ; RUN: llc < %s -march=ptx32 | FileCheck %s
11
22 define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
33 ; CHECK: sub.u16 rh0, rh1, rh2;