llvm.org GIT mirror llvm / 331ba27
[NVPTX] Add support for cttz/ctlz/ctpop git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185176 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Holewinski 7 years ago
5 changed file(s) with 188 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
214214
215215 // Custom handling for i8 intrinsics
216216 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
217
218 setOperationAction(ISD::CTLZ, MVT::i16, Legal);
219 setOperationAction(ISD::CTLZ, MVT::i32, Legal);
220 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
221 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Legal);
222 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
223 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
224 setOperationAction(ISD::CTTZ, MVT::i16, Expand);
225 setOperationAction(ISD::CTTZ, MVT::i32, Expand);
226 setOperationAction(ISD::CTTZ, MVT::i64, Expand);
227 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
228 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
229 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
230 setOperationAction(ISD::CTPOP, MVT::i16, Legal);
231 setOperationAction(ISD::CTPOP, MVT::i32, Legal);
232 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
217233
218234 // Now deduce the information based on the above mentioned
219235 // actions
24052405 "mov.b64\t{{$d1, $d2}}, $s;",
24062406 []>;
24072407
2408 // Count leading zeros
2409 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2410 "clz.b32\t$d, $a;",
2411 []>;
2412 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2413 "clz.b64\t$d, $a;",
2414 []>;
2415
2416 // 32-bit has a direct PTX instruction
2417 def : Pat<(ctlz Int32Regs:$a),
2418 (CLZr32 Int32Regs:$a)>;
2419 def : Pat<(ctlz_zero_undef Int32Regs:$a),
2420 (CLZr32 Int32Regs:$a)>;
2421
2422 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2423 // to 64-bit to match the LLVM semantics
2424 def : Pat<(ctlz Int64Regs:$a),
2425 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2426 def : Pat<(ctlz_zero_undef Int64Regs:$a),
2427 (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2428
2429 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2430 // to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2431 // than 16 bits to store). We also need to subtract 16 because the
2432 // high-order 16 zeros were counted.
2433 def : Pat<(ctlz Int16Regs:$a),
2434 (SUBi16ri (CVT_u16_u32 (CLZr32
2435 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2436 CvtNONE), 16)>;
2437 def : Pat<(ctlz_zero_undef Int16Regs:$a),
2438 (SUBi16ri (CVT_u16_u32 (CLZr32
2439 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2440 CvtNONE), 16)>;
2441
2442 // Population count
2443 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2444 "popc.b32\t$d, $a;",
2445 []>;
2446 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2447 "popc.b64\t$d, $a;",
2448 []>;
2449
2450 // 32-bit has a direct PTX instruction
2451 def : Pat<(ctpop Int32Regs:$a),
2452 (POPCr32 Int32Regs:$a)>;
2453
2454 // For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2455 // to 64-bit to match the LLVM semantics
2456 def : Pat<(ctpop Int64Regs:$a),
2457 (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2458
2459 // For 16-bit, we zero-extend to 32-bit, then trunc the result back
2460 // to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2461 // than 16 bits to store)
2462 def : Pat<(ctpop Int16Regs:$a),
2463 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2464 CvtNONE)>;
2465
24082466 // fround f64 -> f32
24092467 def : Pat<(f32 (fround Float64Regs:$a)),
24102468 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
3
4 declare i16 @llvm.ctlz.i16(i16, i1) readnone
5 declare i32 @llvm.ctlz.i32(i32, i1) readnone
6 declare i64 @llvm.ctlz.i64(i64, i1) readnone
7
8 define i32 @myctpop(i32 %a) {
9 ; CHECK: clz.b32
10 %val = call i32 @llvm.ctlz.i32(i32 %a, i1 false) readnone
11 ret i32 %val
12 }
13
14 define i16 @myctpop16(i16 %a) {
15 ; CHECK: clz.b32
16 %val = call i16 @llvm.ctlz.i16(i16 %a, i1 false) readnone
17 ret i16 %val
18 }
19
20 define i64 @myctpop64(i64 %a) {
21 ; CHECK: clz.b64
22 %val = call i64 @llvm.ctlz.i64(i64 %a, i1 false) readnone
23 ret i64 %val
24 }
25
26
27 define i32 @myctpop_2(i32 %a) {
28 ; CHECK: clz.b32
29 %val = call i32 @llvm.ctlz.i32(i32 %a, i1 true) readnone
30 ret i32 %val
31 }
32
33 define i16 @myctpop16_2(i16 %a) {
34 ; CHECK: clz.b32
35 %val = call i16 @llvm.ctlz.i16(i16 %a, i1 true) readnone
36 ret i16 %val
37 }
38
39 define i64 @myctpop64_2(i64 %a) {
40 ; CHECK: clz.b64
41 %val = call i64 @llvm.ctlz.i64(i64 %a, i1 true) readnone
42 ret i64 %val
43 }
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
1
2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
3
4 define i32 @myctpop(i32 %a) {
5 ; CHECK: popc.b32
6 %val = tail call i32 @llvm.ctpop.i32(i32 %a)
7 ret i32 %val
8 }
9
10 define i16 @myctpop16(i16 %a) {
11 ; CHECK: popc.b32
12 %val = tail call i16 @llvm.ctpop.i16(i16 %a)
13 ret i16 %val
14 }
15
16 define i64 @myctpop64(i64 %a) {
17 ; CHECK: popc.b64
18 %val = tail call i64 @llvm.ctpop.i64(i64 %a)
19 ret i64 %val
20 }
21
22 declare i16 @llvm.ctpop.i16(i16)
23 declare i32 @llvm.ctpop.i32(i32)
24 declare i64 @llvm.ctpop.i64(i64)
0 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
1
2
3 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
4
5 declare i16 @llvm.cttz.i16(i16, i1) readnone
6 declare i32 @llvm.cttz.i32(i32, i1) readnone
7 declare i64 @llvm.cttz.i64(i64, i1) readnone
8
9 define i32 @myctpop(i32 %a) {
10 ; CHECK: popc.b32
11 %val = call i32 @llvm.cttz.i32(i32 %a, i1 false) readnone
12 ret i32 %val
13 }
14
15 define i16 @myctpop16(i16 %a) {
16 ; CHECK: popc.b32
17 %val = call i16 @llvm.cttz.i16(i16 %a, i1 false) readnone
18 ret i16 %val
19 }
20
21 define i64 @myctpop64(i64 %a) {
22 ; CHECK: popc.b64
23 %val = call i64 @llvm.cttz.i64(i64 %a, i1 false) readnone
24 ret i64 %val
25 }
26
27
28 define i32 @myctpop_2(i32 %a) {
29 ; CHECK: popc.b32
30 %val = call i32 @llvm.cttz.i32(i32 %a, i1 true) readnone
31 ret i32 %val
32 }
33
34 define i16 @myctpop16_2(i16 %a) {
35 ; CHECK: popc.b32
36 %val = call i16 @llvm.cttz.i16(i16 %a, i1 true) readnone
37 ret i16 %val
38 }
39
40 define i64 @myctpop64_2(i64 %a) {
41 ; CHECK: popc.b64
42 %val = call i64 @llvm.cttz.i64(i64 %a, i1 true) readnone
43 ret i64 %val
44 }