llvm.org GIT mirror llvm / 7e65651
[NVPTX] Rename registers %fl -> %fd and %rl -> %rd This matches the internal behavior of NVIDIA tools like libnvvm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213168 91177308-0d34-0410-b5e6-96231b3b80d8 Justin Holewinski 6 years ago
21 changed file(s) with 145 addition(s) and 145 deletion(s). Raw diff Collapse all Expand all
5656 OS << "%r";
5757 break;
5858 case 4:
59 OS << "%rl";
59 OS << "%rd";
6060 break;
6161 case 5:
6262 OS << "%f";
6363 break;
6464 case 6:
65 OS << "%fl";
65 OS << "%fd";
6666 break;
6767 }
6868
20092009 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
20102010 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
20112011 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
2012 // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
2012 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
20132013 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
2014 // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
2014 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
20152015
20162016 // Emit declaration of the virtual registers or 'physical' registers for
20172017 // each register class
5252 return "%f";
5353 }
5454 if (RC == &NVPTX::Float64RegsRegClass) {
55 return "%fl";
55 return "%fd";
5656 } else if (RC == &NVPTX::Int64RegsRegClass) {
57 return "%rl";
57 return "%rd";
5858 } else if (RC == &NVPTX::Int32RegsRegClass) {
5959 return "%r";
6060 } else if (RC == &NVPTX::Int16RegsRegClass) {
3434 def P#i : NVPTXReg<"%p"#i>; // Predicate
3535 def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
3636 def R#i : NVPTXReg<"%r"#i>; // 32-bit
37 def RL#i : NVPTXReg<"%rl"#i>; // 64-bit
37 def RL#i : NVPTXReg<"%rd"#i>; // 64-bit
3838 def F#i : NVPTXReg<"%f"#i>; // 32-bit float
39 def FL#i : NVPTXReg<"%fl"#i>; // 64-bit float
39 def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
4040
4141 // Arguments
4242 def ia#i : NVPTXReg<"%ia"#i>;
88 ;;; f64
99
1010 define double @fadd_f64(double %a, double %b) {
11 ; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
11 ; CHECK: add.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
1212 ; CHECK: ret
1313 %ret = fadd double %a, %b
1414 ret double %ret
1515 }
1616
1717 define double @fsub_f64(double %a, double %b) {
18 ; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
18 ; CHECK: sub.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
1919 ; CHECK: ret
2020 %ret = fsub double %a, %b
2121 ret double %ret
2222 }
2323
2424 define double @fmul_f64(double %a, double %b) {
25 ; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
25 ; CHECK: mul.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
2626 ; CHECK: ret
2727 %ret = fmul double %a, %b
2828 ret double %ret
2929 }
3030
3131 define double @fdiv_f64(double %a, double %b) {
32 ; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}
32 ; CHECK: div.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
3333 ; CHECK: ret
3434 %ret = fdiv double %a, %b
3535 ret double %ret
88 ;;; i64
99
1010 define i64 @add_i64(i64 %a, i64 %b) {
11 ; CHECK: add.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
11 ; CHECK: add.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
1212 ; CHECK: ret
1313 %ret = add i64 %a, %b
1414 ret i64 %ret
1515 }
1616
1717 define i64 @sub_i64(i64 %a, i64 %b) {
18 ; CHECK: sub.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
18 ; CHECK: sub.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
1919 ; CHECK: ret
2020 %ret = sub i64 %a, %b
2121 ret i64 %ret
2222 }
2323
2424 define i64 @mul_i64(i64 %a, i64 %b) {
25 ; CHECK: mul.lo.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
25 ; CHECK: mul.lo.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
2626 ; CHECK: ret
2727 %ret = mul i64 %a, %b
2828 ret i64 %ret
2929 }
3030
3131 define i64 @sdiv_i64(i64 %a, i64 %b) {
32 ; CHECK: div.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
32 ; CHECK: div.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
3333 ; CHECK: ret
3434 %ret = sdiv i64 %a, %b
3535 ret i64 %ret
3636 }
3737
3838 define i64 @udiv_i64(i64 %a, i64 %b) {
39 ; CHECK: div.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
39 ; CHECK: div.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
4040 ; CHECK: ret
4141 %ret = udiv i64 %a, %b
4242 ret i64 %ret
4343 }
4444
4545 define i64 @srem_i64(i64 %a, i64 %b) {
46 ; CHECK: rem.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
46 ; CHECK: rem.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
4747 ; CHECK: ret
4848 %ret = srem i64 %a, %b
4949 ret i64 %ret
5050 }
5151
5252 define i64 @urem_i64(i64 %a, i64 %b) {
53 ; CHECK: rem.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
53 ; CHECK: rem.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
5454 ; CHECK: ret
5555 %ret = urem i64 %a, %b
5656 ret i64 %ret
5757 }
5858
5959 define i64 @and_i64(i64 %a, i64 %b) {
60 ; CHECK: and.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
60 ; CHECK: and.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
6161 ; CHECK: ret
6262 %ret = and i64 %a, %b
6363 ret i64 %ret
6464 }
6565
6666 define i64 @or_i64(i64 %a, i64 %b) {
67 ; CHECK: or.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
67 ; CHECK: or.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
6868 ; CHECK: ret
6969 %ret = or i64 %a, %b
7070 ret i64 %ret
7171 }
7272
7373 define i64 @xor_i64(i64 %a, i64 %b) {
74 ; CHECK: xor.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %rl{{[0-9]+}}
74 ; CHECK: xor.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}
7575 ; CHECK: ret
7676 %ret = xor i64 %a, %b
7777 ret i64 %ret
7979
8080 define i64 @shl_i64(i64 %a, i64 %b) {
8181 ; PTX requires 32-bit shift amount
82 ; CHECK: shl.b64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
82 ; CHECK: shl.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %r{{[0-9]+}}
8383 ; CHECK: ret
8484 %ret = shl i64 %a, %b
8585 ret i64 %ret
8787
8888 define i64 @ashr_i64(i64 %a, i64 %b) {
8989 ; PTX requires 32-bit shift amount
90 ; CHECK: shr.s64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
90 ; CHECK: shr.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %r{{[0-9]+}}
9191 ; CHECK: ret
9292 %ret = ashr i64 %a, %b
9393 ret i64 %ret
9595
9696 define i64 @lshr_i64(i64 %a, i64 %b) {
9797 ; PTX requires 32-bit shift amount
98 ; CHECK: shr.u64 %rl{{[0-9]+}}, %rl{{[0-9]+}}, %r{{[0-9]+}}
98 ; CHECK: shr.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, %r{{[0-9]+}}
9999 ; CHECK: ret
100100 %ret = lshr i64 %a, %b
101101 ret i64 %ret
1919 %buf = alloca [16 x i8], align 4
2020
2121 ; CHECK: .local .align 4 .b8 __local_depot0[16]
22 ; CHECK: mov.u64 %rl[[BUF_REG:[0-9]+]]
23 ; CHECK: cvta.local.u64 %SP, %rl[[BUF_REG]]
22 ; CHECK: mov.u64 %rd[[BUF_REG:[0-9]+]]
23 ; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]]
2424
25 ; CHECK: ld.param.u64 %rl[[A_REG:[0-9]+]], [kernel_func_param_0]
26 ; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rl[[A_REG]]]
25 ; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
26 ; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
2727 ; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
2828
2929 %0 = load float* %a, align 4
4545 %7 = bitcast i8* %arrayidx7 to float*
4646 store float %6, float* %7, align 4
4747
48 ; CHECK: add.u64 %rl[[SP_REG:[0-9]+]], %SP, 0
48 ; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
4949 ; CHECK: .param .b64 param0;
50 ; CHECK-NEXT: st.param.b64 [param0+0], %rl[[A_REG]]
50 ; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]]
5151 ; CHECK-NEXT: .param .b64 param1;
52 ; CHECK-NEXT: st.param.b64 [param1+0], %rl[[SP_REG]]
52 ; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]]
5353 ; CHECK-NEXT: call.uni
5454 ; CHECK-NEXT: callee,
5555
88 ;;; i64
99
1010 define i64 @icmp_eq_i64(i64 %a, i64 %b) {
11 ; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
12 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
11 ; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
12 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
1313 ; CHECK: ret
1414 %cmp = icmp eq i64 %a, %b
1515 %ret = zext i1 %cmp to i64
1717 }
1818
1919 define i64 @icmp_ne_i64(i64 %a, i64 %b) {
20 ; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
21 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
20 ; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
21 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
2222 ; CHECK: ret
2323 %cmp = icmp ne i64 %a, %b
2424 %ret = zext i1 %cmp to i64
2626 }
2727
2828 define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
29 ; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
30 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
29 ; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
30 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
3131 ; CHECK: ret
3232 %cmp = icmp ugt i64 %a, %b
3333 %ret = zext i1 %cmp to i64
3535 }
3636
3737 define i64 @icmp_uge_i64(i64 %a, i64 %b) {
38 ; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
39 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
38 ; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
39 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
4040 ; CHECK: ret
4141 %cmp = icmp uge i64 %a, %b
4242 %ret = zext i1 %cmp to i64
4444 }
4545
4646 define i64 @icmp_ult_i64(i64 %a, i64 %b) {
47 ; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
48 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
47 ; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
48 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
4949 ; CHECK: ret
5050 %cmp = icmp ult i64 %a, %b
5151 %ret = zext i1 %cmp to i64
5353 }
5454
5555 define i64 @icmp_ule_i64(i64 %a, i64 %b) {
56 ; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
57 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
56 ; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
57 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
5858 ; CHECK: ret
5959 %cmp = icmp ule i64 %a, %b
6060 %ret = zext i1 %cmp to i64
6262 }
6363
6464 define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
65 ; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
66 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
65 ; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
66 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
6767 ; CHECK: ret
6868 %cmp = icmp sgt i64 %a, %b
6969 %ret = zext i1 %cmp to i64
7171 }
7272
7373 define i64 @icmp_sge_i64(i64 %a, i64 %b) {
74 ; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
75 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
74 ; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
75 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
7676 ; CHECK: ret
7777 %cmp = icmp sge i64 %a, %b
7878 %ret = zext i1 %cmp to i64
8080 }
8181
8282 define i64 @icmp_slt_i64(i64 %a, i64 %b) {
83 ; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
84 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
83 ; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
84 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
8585 ; CHECK: ret
8686 %cmp = icmp slt i64 %a, %b
8787 %ret = zext i1 %cmp to i64
8989 }
9090
9191 define i64 @icmp_sle_i64(i64 %a, i64 %b) {
92 ; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rl{{[0-9]+}}, %rl{{[0-9]+}}
93 ; CHECK: selp.u64 %rl{{[0-9]+}}, 1, 0, %p[[P0]]
92 ; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
93 ; CHECK: selp.u64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
9494 ; CHECK: ret
9595 %cmp = icmp sle i64 %a, %b
9696 %ret = zext i1 %cmp to i64
99 }
1010
1111 define i16 @cvt_i16_f64(double %x) {
12 ; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fl{{[0-9]+}};
12 ; CHECK: cvt.rzi.u16.f64 %rs{{[0-9]+}}, %fd{{[0-9]+}};
1313 ; CHECK: ret;
1414 %a = fptoui double %x to i16
1515 ret i16 %a
2323 }
2424
2525 define i32 @cvt_i32_f64(double %x) {
26 ; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fl{{[0-9]+}};
26 ; CHECK: cvt.rzi.u32.f64 %r{{[0-9]+}}, %fd{{[0-9]+}};
2727 ; CHECK: ret;
2828 %a = fptoui double %x to i32
2929 ret i32 %a
3131
3232
3333 define i64 @cvt_i64_f32(float %x) {
34 ; CHECK: cvt.rzi.u64.f32 %rl{{[0-9]+}}, %f{{[0-9]+}};
34 ; CHECK: cvt.rzi.u64.f32 %rd{{[0-9]+}}, %f{{[0-9]+}};
3535 ; CHECK: ret;
3636 %a = fptoui float %x to i64
3737 ret i64 %a
3838 }
3939
4040 define i64 @cvt_i64_f64(double %x) {
41 ; CHECK: cvt.rzi.u64.f64 %rl{{[0-9]+}}, %fl{{[0-9]+}};
41 ; CHECK: cvt.rzi.u64.f64 %rd{{[0-9]+}}, %fd{{[0-9]+}};
4242 ; CHECK: ret;
4343 %a = fptoui double %x to i64
4444 ret i64 %a
5959 }
6060
6161 define float @cvt_f32_i64(i64 %x) {
62 ; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rl{{[0-9]+}};
62 ; CHECK: cvt.rn.f32.u64 %f{{[0-9]+}}, %rd{{[0-9]+}};
6363 ; CHECK: ret;
6464 %a = uitofp i64 %x to float
6565 ret float %a
6666 }
6767
6868 define float @cvt_f32_f64(double %x) {
69 ; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fl{{[0-9]+}};
69 ; CHECK: cvt.rn.f32.f64 %f{{[0-9]+}}, %fd{{[0-9]+}};
7070 ; CHECK: ret;
7171 %a = fptrunc double %x to float
7272 ret float %a
8787 }
8888
8989 define float @cvt_f32_s64(i64 %x) {
90 ; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rl{{[0-9]+}}
90 ; CHECK: cvt.rn.f32.s64 %f{{[0-9]+}}, %rd{{[0-9]+}}
9191 ; CHECK: ret
9292 %a = sitofp i64 %x to float
9393 ret float %a
9494 }
9595
9696 define double @cvt_f64_i16(i16 %x) {
97 ; CHECK: cvt.rn.f64.u16 %fl{{[0-9]+}}, %rs{{[0-9]+}};
97 ; CHECK: cvt.rn.f64.u16 %fd{{[0-9]+}}, %rs{{[0-9]+}};
9898 ; CHECK: ret;
9999 %a = uitofp i16 %x to double
100100 ret double %a
101101 }
102102
103103 define double @cvt_f64_i32(i32 %x) {
104 ; CHECK: cvt.rn.f64.u32 %fl{{[0-9]+}}, %r{{[0-9]+}};
104 ; CHECK: cvt.rn.f64.u32 %fd{{[0-9]+}}, %r{{[0-9]+}};
105105 ; CHECK: ret;
106106 %a = uitofp i32 %x to double
107107 ret double %a
108108 }
109109
110110 define double @cvt_f64_i64(i64 %x) {
111 ; CHECK: cvt.rn.f64.u64 %fl{{[0-9]+}}, %rl{{[0-9]+}};
111 ; CHECK: cvt.rn.f64.u64 %fd{{[0-9]+}}, %rd{{[0-9]+}};
112112 ; CHECK: ret;
113113 %a = uitofp i64 %x to double
114114 ret double %a
115115 }
116116
117117 define double @cvt_f64_f32(float %x) {
118 ; CHECK: cvt.f64.f32 %fl{{[0-9]+}}, %f{{[0-9]+}};
118 ; CHECK: cvt.f64.f32 %fd{{[0-9]+}}, %f{{[0-9]+}};
119119 ; CHECK: ret;
120120 %a = fpext float %x to double
121121 ret double %a
122122 }
123123
124124 define double @cvt_f64_s16(i16 %x) {
125 ; CHECK: cvt.rn.f64.s16 %fl{{[0-9]+}}, %rs{{[0-9]+}}
125 ; CHECK: cvt.rn.f64.s16 %fd{{[0-9]+}}, %rs{{[0-9]+}}
126126 ; CHECK: ret
127127 %a = sitofp i16 %x to double
128128 ret double %a
129129 }
130130
131131 define double @cvt_f64_s32(i32 %x) {
132 ; CHECK: cvt.rn.f64.s32 %fl{{[0-9]+}}, %r{{[0-9]+}}
132 ; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
133133 ; CHECK: ret
134134 %a = sitofp i32 %x to double
135135 ret double %a
136136 }
137137
138138 define double @cvt_f64_s64(i64 %x) {
139 ; CHECK: cvt.rn.f64.s64 %fl{{[0-9]+}}, %rl{{[0-9]+}}
139 ; CHECK: cvt.rn.f64.s64 %fd{{[0-9]+}}, %rd{{[0-9]+}}
140140 ; CHECK: ret
141141 %a = sitofp i64 %x to double
142142 ret double %a
4747 ; i64
4848
4949 define i64 @cvt_i64_i16(i16 %x) {
50 ; CHECK: ld.param.u16 %rl[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
51 ; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
50 ; CHECK: ld.param.u16 %rd[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}]
51 ; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rd[[R0]]
5252 ; CHECK: ret
5353 %a = zext i16 %x to i64
5454 ret i64 %a
5555 }
5656
5757 define i64 @cvt_i64_i32(i32 %x) {
58 ; CHECK: ld.param.u32 %rl[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
59 ; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rl[[R0]]
58 ; CHECK: ld.param.u32 %rd[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}]
59 ; CHECK: st.param.b64 [func_retval{{[0-9]+}}+0], %rd[[R0]]
6060 ; CHECK: ret
6161 %a = zext i32 %x to i64
6262 ret i64 %a
88 }
99
1010 define ptx_device double @t1_f64(double %x, double %y, double %z) {
11 ; CHECK: fma.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}};
11 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
1212 ; CHECK: ret;
1313 %a = fmul double %x, %y
1414 %b = fadd double %a, %z
1010 }
1111
1212 ; CHECK: myaddd
13 ; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, 0d3FF0000000000000
13 ; CHECK: add.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, 0d3FF0000000000000
1414 define double @myaddd(double %a) {
1515 %ret = fadd double %a, 1.0
1616 ret double %ret
197197 }
198198
199199 define ptx_device i64 @test_clock64() {
200 ; CHECK: mov.u64 %rl{{[0-9]+}}, %clock64;
200 ; CHECK: mov.u64 %rd{{[0-9]+}}, %clock64;
201201 ; CHECK: ret;
202202 %x = call i64 @llvm.ptx.read.clock64()
203203 ret i64 %x
88 }
99
1010 define ptx_device double @test_fabs(double %d) {
11 ; CHECK: abs.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}};
11 ; CHECK: abs.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}};
1212 ; CHECK: ret;
1313 %x = call double @llvm.fabs.f64(double %d)
1414 ret double %x
55 define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
66 ; PTX32: ld.global.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
77 ; PTX32: ret
8 ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
8 ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
99 ; PTX64: ret
1010 %a = load i8 addrspace(1)* %ptr
1111 ret i8 %a
1414 define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
1515 ; PTX32: ld.shared.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
1616 ; PTX32: ret
17 ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
17 ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1818 ; PTX64: ret
1919 %a = load i8 addrspace(3)* %ptr
2020 ret i8 %a
2323 define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
2424 ; PTX32: ld.local.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
2525 ; PTX32: ret
26 ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
26 ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2727 ; PTX64: ret
2828 %a = load i8 addrspace(5)* %ptr
2929 ret i8 %a
3333 define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
3434 ; PTX32: ld.global.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
3535 ; PTX32: ret
36 ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
36 ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
3737 ; PTX64: ret
3838 %a = load i16 addrspace(1)* %ptr
3939 ret i16 %a
4242 define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
4343 ; PTX32: ld.shared.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
4444 ; PTX32: ret
45 ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
45 ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
4646 ; PTX64: ret
4747 %a = load i16 addrspace(3)* %ptr
4848 ret i16 %a
5151 define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
5252 ; PTX32: ld.local.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
5353 ; PTX32: ret
54 ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
54 ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
5555 ; PTX64: ret
5656 %a = load i16 addrspace(5)* %ptr
5757 ret i16 %a
6161 define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
6262 ; PTX32: ld.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
6363 ; PTX32: ret
64 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
64 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
6565 ; PTX64: ret
6666 %a = load i32 addrspace(1)* %ptr
6767 ret i32 %a
7070 define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
7171 ; PTX32: ld.shared.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
7272 ; PTX32: ret
73 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
73 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
7474 ; PTX64: ret
7575 %a = load i32 addrspace(3)* %ptr
7676 ret i32 %a
7979 define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
8080 ; PTX32: ld.local.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
8181 ; PTX32: ret
82 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
82 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
8383 ; PTX64: ret
8484 %a = load i32 addrspace(5)* %ptr
8585 ret i32 %a
8787
8888 ;; i64
8989 define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
90 ; PTX32: ld.global.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
90 ; PTX32: ld.global.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
9191 ; PTX32: ret
92 ; PTX64: ld.global.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
92 ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
9393 ; PTX64: ret
9494 %a = load i64 addrspace(1)* %ptr
9595 ret i64 %a
9696 }
9797
9898 define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
99 ; PTX32: ld.shared.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
99 ; PTX32: ld.shared.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
100100 ; PTX32: ret
101 ; PTX64: ld.shared.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
101 ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
102102 ; PTX64: ret
103103 %a = load i64 addrspace(3)* %ptr
104104 ret i64 %a
105105 }
106106
107107 define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
108 ; PTX32: ld.local.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
108 ; PTX32: ld.local.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
109109 ; PTX32: ret
110 ; PTX64: ld.local.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
110 ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
111111 ; PTX64: ret
112112 %a = load i64 addrspace(5)* %ptr
113113 ret i64 %a
117117 define float @ld_global_f32(float addrspace(1)* %ptr) {
118118 ; PTX32: ld.global.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
119119 ; PTX32: ret
120 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
120 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
121121 ; PTX64: ret
122122 %a = load float addrspace(1)* %ptr
123123 ret float %a
126126 define float @ld_shared_f32(float addrspace(3)* %ptr) {
127127 ; PTX32: ld.shared.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
128128 ; PTX32: ret
129 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
129 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
130130 ; PTX64: ret
131131 %a = load float addrspace(3)* %ptr
132132 ret float %a
135135 define float @ld_local_f32(float addrspace(5)* %ptr) {
136136 ; PTX32: ld.local.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
137137 ; PTX32: ret
138 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
138 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
139139 ; PTX64: ret
140140 %a = load float addrspace(5)* %ptr
141141 ret float %a
143143
144144 ;; f64
145145 define double @ld_global_f64(double addrspace(1)* %ptr) {
146 ; PTX32: ld.global.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
146 ; PTX32: ld.global.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
147147 ; PTX32: ret
148 ; PTX64: ld.global.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
148 ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
149149 ; PTX64: ret
150150 %a = load double addrspace(1)* %ptr
151151 ret double %a
152152 }
153153
154154 define double @ld_shared_f64(double addrspace(3)* %ptr) {
155 ; PTX32: ld.shared.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
155 ; PTX32: ld.shared.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
156156 ; PTX32: ret
157 ; PTX64: ld.shared.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
157 ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
158158 ; PTX64: ret
159159 %a = load double addrspace(3)* %ptr
160160 ret double %a
161161 }
162162
163163 define double @ld_local_f64(double addrspace(5)* %ptr) {
164 ; PTX32: ld.local.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
164 ; PTX32: ld.local.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
165165 ; PTX32: ret
166 ; PTX64: ld.local.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
166 ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
167167 ; PTX64: ret
168168 %a = load double addrspace(5)* %ptr
169169 ret double %a
55 define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
66 ; PTX32: ld.u8 %r{{[0-9]+}}, [%r{{[0-9]+}}]
77 ; PTX32: ret
8 ; PTX64: ld.u8 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
8 ; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
99 ; PTX64: ret
1010 %a = load i8 addrspace(0)* %ptr
1111 ret i8 %a
1515 define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
1616 ; PTX32: ld.u16 %r{{[0-9]+}}, [%r{{[0-9]+}}]
1717 ; PTX32: ret
18 ; PTX64: ld.u16 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
18 ; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
1919 ; PTX64: ret
2020 %a = load i16 addrspace(0)* %ptr
2121 ret i16 %a
2525 define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
2626 ; PTX32: ld.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}]
2727 ; PTX32: ret
28 ; PTX64: ld.u32 %r{{[0-9]+}}, [%rl{{[0-9]+}}]
28 ; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
2929 ; PTX64: ret
3030 %a = load i32 addrspace(0)* %ptr
3131 ret i32 %a
3333
3434 ;; i64
3535 define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
36 ; PTX32: ld.u64 %rl{{[0-9]+}}, [%r{{[0-9]+}}]
36 ; PTX32: ld.u64 %rd{{[0-9]+}}, [%r{{[0-9]+}}]
3737 ; PTX32: ret
38 ; PTX64: ld.u64 %rl{{[0-9]+}}, [%rl{{[0-9]+}}]
38 ; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
3939 ; PTX64: ret
4040 %a = load i64 addrspace(0)* %ptr
4141 ret i64 %a
4545 define float @ld_global_f32(float addrspace(0)* %ptr) {
4646 ; PTX32: ld.f32 %f{{[0-9]+}}, [%r{{[0-9]+}}]
4747 ; PTX32: ret
48 ; PTX64: ld.f32 %f{{[0-9]+}}, [%rl{{[0-9]+}}]
48 ; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
4949 ; PTX64: ret
5050 %a = load float addrspace(0)* %ptr
5151 ret float %a
5353
5454 ;; f64
5555 define double @ld_global_f64(double addrspace(0)* %ptr) {
56 ; PTX32: ld.f64 %fl{{[0-9]+}}, [%r{{[0-9]+}}]
56 ; PTX32: ld.f64 %fd{{[0-9]+}}, [%r{{[0-9]+}}]
5757 ; PTX32: ret
58 ; PTX64: ld.f64 %fl{{[0-9]+}}, [%rl{{[0-9]+}}]
58 ; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
5959 ; PTX64: ret
6060 %a = load double addrspace(0)* %ptr
6161 ret double %a
66 ; PTX32: cvta.local.u32 %SP, %r{{[0-9]+}};
77 ; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
88 ; PTX32: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
9 ; PTX64: mov.u64 %rl{{[0-9]+}}, __local_depot{{[0-9]+}};
10 ; PTX64: cvta.local.u64 %SP, %rl{{[0-9]+}};
9 ; PTX64: mov.u64 %rd{{[0-9]+}}, __local_depot{{[0-9]+}};
10 ; PTX64: cvta.local.u64 %SP, %rd{{[0-9]+}};
1111 ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
1212 ; PTX64: st.volatile.u32 [%SP+0], %r{{[0-9]+}};
1313 define void @foo(i32 %a) {
44 ; PTX32: mov.u16 %rs{{[0-9]+}}, 0;
55 ; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}};
66 ; PTX64: mov.u16 %rs{{[0-9]+}}, 0;
7 ; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}};
7 ; PTX64-NEXT: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}};
88 store i1 false, i1* %a
99 ret void
1010 }
1414 ; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}]
1515 ; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
1616 ; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
17 ; PTX64: ld.u8 %rs{{[0-9]+}}, [%rl{{[0-9]+}}]
17 ; PTX64: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}]
1818 ; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
1919 ; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
2020
66 define void @st_global_i8(i8 addrspace(1)* %ptr, i8 %a) {
77 ; PTX32: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
88 ; PTX32: ret
9 ; PTX64: st.global.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
9 ; PTX64: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1010 ; PTX64: ret
1111 store i8 %a, i8 addrspace(1)* %ptr
1212 ret void
1515 define void @st_shared_i8(i8 addrspace(3)* %ptr, i8 %a) {
1616 ; PTX32: st.shared.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
1717 ; PTX32: ret
18 ; PTX64: st.shared.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
18 ; PTX64: st.shared.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1919 ; PTX64: ret
2020 store i8 %a, i8 addrspace(3)* %ptr
2121 ret void
2424 define void @st_local_i8(i8 addrspace(5)* %ptr, i8 %a) {
2525 ; PTX32: st.local.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
2626 ; PTX32: ret
27 ; PTX64: st.local.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
27 ; PTX64: st.local.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2828 ; PTX64: ret
2929 store i8 %a, i8 addrspace(5)* %ptr
3030 ret void
3535 define void @st_global_i16(i16 addrspace(1)* %ptr, i16 %a) {
3636 ; PTX32: st.global.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
3737 ; PTX32: ret
38 ; PTX64: st.global.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
38 ; PTX64: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
3939 ; PTX64: ret
4040 store i16 %a, i16 addrspace(1)* %ptr
4141 ret void
4444 define void @st_shared_i16(i16 addrspace(3)* %ptr, i16 %a) {
4545 ; PTX32: st.shared.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
4646 ; PTX32: ret
47 ; PTX64: st.shared.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
47 ; PTX64: st.shared.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
4848 ; PTX64: ret
4949 store i16 %a, i16 addrspace(3)* %ptr
5050 ret void
5353 define void @st_local_i16(i16 addrspace(5)* %ptr, i16 %a) {
5454 ; PTX32: st.local.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
5555 ; PTX32: ret
56 ; PTX64: st.local.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
56 ; PTX64: st.local.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
5757 ; PTX64: ret
5858 store i16 %a, i16 addrspace(5)* %ptr
5959 ret void
6464 define void @st_global_i32(i32 addrspace(1)* %ptr, i32 %a) {
6565 ; PTX32: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
6666 ; PTX32: ret
67 ; PTX64: st.global.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
67 ; PTX64: st.global.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
6868 ; PTX64: ret
6969 store i32 %a, i32 addrspace(1)* %ptr
7070 ret void
7373 define void @st_shared_i32(i32 addrspace(3)* %ptr, i32 %a) {
7474 ; PTX32: st.shared.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
7575 ; PTX32: ret
76 ; PTX64: st.shared.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
76 ; PTX64: st.shared.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
7777 ; PTX64: ret
7878 store i32 %a, i32 addrspace(3)* %ptr
7979 ret void
8282 define void @st_local_i32(i32 addrspace(5)* %ptr, i32 %a) {
8383 ; PTX32: st.local.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
8484 ; PTX32: ret
85 ; PTX64: st.local.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
85 ; PTX64: st.local.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
8686 ; PTX64: ret
8787 store i32 %a, i32 addrspace(5)* %ptr
8888 ret void
9191 ;; i64
9292
9393 define void @st_global_i64(i64 addrspace(1)* %ptr, i64 %a) {
94 ; PTX32: st.global.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
94 ; PTX32: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
9595 ; PTX32: ret
96 ; PTX64: st.global.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
96 ; PTX64: st.global.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
9797 ; PTX64: ret
9898 store i64 %a, i64 addrspace(1)* %ptr
9999 ret void
100100 }
101101
102102 define void @st_shared_i64(i64 addrspace(3)* %ptr, i64 %a) {
103 ; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
103 ; PTX32: st.shared.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
104104 ; PTX32: ret
105 ; PTX64: st.shared.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
105 ; PTX64: st.shared.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
106106 ; PTX64: ret
107107 store i64 %a, i64 addrspace(3)* %ptr
108108 ret void
109109 }
110110
111111 define void @st_local_i64(i64 addrspace(5)* %ptr, i64 %a) {
112 ; PTX32: st.local.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
112 ; PTX32: st.local.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
113113 ; PTX32: ret
114 ; PTX64: st.local.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
114 ; PTX64: st.local.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
115115 ; PTX64: ret
116116 store i64 %a, i64 addrspace(5)* %ptr
117117 ret void
122122 define void @st_global_f32(float addrspace(1)* %ptr, float %a) {
123123 ; PTX32: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
124124 ; PTX32: ret
125 ; PTX64: st.global.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
125 ; PTX64: st.global.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
126126 ; PTX64: ret
127127 store float %a, float addrspace(1)* %ptr
128128 ret void
131131 define void @st_shared_f32(float addrspace(3)* %ptr, float %a) {
132132 ; PTX32: st.shared.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
133133 ; PTX32: ret
134 ; PTX64: st.shared.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
134 ; PTX64: st.shared.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
135135 ; PTX64: ret
136136 store float %a, float addrspace(3)* %ptr
137137 ret void
140140 define void @st_local_f32(float addrspace(5)* %ptr, float %a) {
141141 ; PTX32: st.local.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
142142 ; PTX32: ret
143 ; PTX64: st.local.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
143 ; PTX64: st.local.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
144144 ; PTX64: ret
145145 store float %a, float addrspace(5)* %ptr
146146 ret void
149149 ;; f64
150150
151151 define void @st_global_f64(double addrspace(1)* %ptr, double %a) {
152 ; PTX32: st.global.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
152 ; PTX32: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
153153 ; PTX32: ret
154 ; PTX64: st.global.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
154 ; PTX64: st.global.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
155155 ; PTX64: ret
156156 store double %a, double addrspace(1)* %ptr
157157 ret void
158158 }
159159
160160 define void @st_shared_f64(double addrspace(3)* %ptr, double %a) {
161 ; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
161 ; PTX32: st.shared.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
162162 ; PTX32: ret
163 ; PTX64: st.shared.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
163 ; PTX64: st.shared.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
164164 ; PTX64: ret
165165 store double %a, double addrspace(3)* %ptr
166166 ret void
167167 }
168168
169169 define void @st_local_f64(double addrspace(5)* %ptr, double %a) {
170 ; PTX32: st.local.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
170 ; PTX32: st.local.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
171171 ; PTX32: ret
172 ; PTX64: st.local.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
172 ; PTX64: st.local.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
173173 ; PTX64: ret
174174 store double %a, double addrspace(5)* %ptr
175175 ret void
66 define void @st_global_i8(i8 addrspace(0)* %ptr, i8 %a) {
77 ; PTX32: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}
88 ; PTX32: ret
9 ; PTX64: st.u8 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
9 ; PTX64: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
1010 ; PTX64: ret
1111 store i8 %a, i8 addrspace(0)* %ptr
1212 ret void
1717 define void @st_global_i16(i16 addrspace(0)* %ptr, i16 %a) {
1818 ; PTX32: st.u16 [%r{{[0-9]+}}], %rs{{[0-9]+}}
1919 ; PTX32: ret
20 ; PTX64: st.u16 [%rl{{[0-9]+}}], %rs{{[0-9]+}}
20 ; PTX64: st.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}}
2121 ; PTX64: ret
2222 store i16 %a, i16 addrspace(0)* %ptr
2323 ret void
2828 define void @st_global_i32(i32 addrspace(0)* %ptr, i32 %a) {
2929 ; PTX32: st.u32 [%r{{[0-9]+}}], %r{{[0-9]+}}
3030 ; PTX32: ret
31 ; PTX64: st.u32 [%rl{{[0-9]+}}], %r{{[0-9]+}}
31 ; PTX64: st.u32 [%rd{{[0-9]+}}], %r{{[0-9]+}}
3232 ; PTX64: ret
3333 store i32 %a, i32 addrspace(0)* %ptr
3434 ret void
3737 ;; i64
3838
3939 define void @st_global_i64(i64 addrspace(0)* %ptr, i64 %a) {
40 ; PTX32: st.u64 [%r{{[0-9]+}}], %rl{{[0-9]+}}
40 ; PTX32: st.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}}
4141 ; PTX32: ret
42 ; PTX64: st.u64 [%rl{{[0-9]+}}], %rl{{[0-9]+}}
42 ; PTX64: st.u64 [%rd{{[0-9]+}}], %rd{{[0-9]+}}
4343 ; PTX64: ret
4444 store i64 %a, i64 addrspace(0)* %ptr
4545 ret void
5050 define void @st_global_f32(float addrspace(0)* %ptr, float %a) {
5151 ; PTX32: st.f32 [%r{{[0-9]+}}], %f{{[0-9]+}}
5252 ; PTX32: ret
53 ; PTX64: st.f32 [%rl{{[0-9]+}}], %f{{[0-9]+}}
53 ; PTX64: st.f32 [%rd{{[0-9]+}}], %f{{[0-9]+}}
5454 ; PTX64: ret
5555 store float %a, float addrspace(0)* %ptr
5656 ret void
5959 ;; f64
6060
6161 define void @st_global_f64(double addrspace(0)* %ptr, double %a) {
62 ; PTX32: st.f64 [%r{{[0-9]+}}], %fl{{[0-9]+}}
62 ; PTX32: st.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}}
6363 ; PTX32: ret
64 ; PTX64: st.f64 [%rl{{[0-9]+}}], %fl{{[0-9]+}}
64 ; PTX64: st.f64 [%rd{{[0-9]+}}], %fd{{[0-9]+}}
6565 ; PTX64: ret
6666 store double %a, double addrspace(0)* %ptr
6767 ret void
4444 ret void
4545 }
4646 ; PTX-LABEL: sum_of_array(
47 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
47 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
4848 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
4949 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
5050 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
8787 ret void
8888 }
8989 ; PTX-LABEL: sum_of_array2(
90 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
90 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
9191 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
9292 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
9393 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
128128 ret void
129129 }
130130 ; PTX-LABEL: sum_of_array3(
131 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rl|r)[0-9]+]]{{\]}}
131 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG:%(rd|r)[0-9]+]]{{\]}}
132132 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+4{{\]}}
133133 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+128{{\]}}
134134 ; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}