llvm.org GIT mirror llvm / 5556d84 test / CodeGen / AMDGPU / add.ll
5556d84

Tree @5556d84 (Download .tar.gz)

add.ll @5556d84

d707747
d019e86
d707747
f98f2ce
56077f5
90eef42
b19b474
f9e9586
 
 
d706d03
198d8ba
7c9c6ed
 
b19b474
 
 
 
 
56077f5
90eef42
 
f98f2ce
f9e9586
 
fe91c51
d706d03
198d8ba
7c9c6ed
 
fe91c51
 
 
 
 
56077f5
90eef42
 
 
 
fe91c51
f9e9586
 
 
 
fe91c51
d706d03
198d8ba
7c9c6ed
 
f98f2ce
 
 
 
0f2e653
56077f5
90eef42
 
 
 
 
 
 
 
e3d17d7
90eef42
 
 
 
 
 
 
 
d706d03
0f2e653
 
 
 
 
d8c3104
56077f5
90eef42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3d17d7
90eef42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d706d03
9f0d68f
 
 
 
 
 
56077f5
90eef42
 
e3d17d7
df58717
 
e3d17d7
 
df58717
e3d17d7
d706d03
d8c3104
 
 
 
 
6cadd40
8eaed0f
6cadd40
 
 
 
56077f5
90eef42
e3d17d7
df58717
 
e3d17d7
 
df58717
e3d17d7
d706d03
6cadd40
7c9c6ed
6cadd40
 
 
 
c1de569
81c6c96
56077f5
90eef42
 
e3d17d7
df58717
 
e3d17d7
 
df58717
e3d17d7
d706d03
c1de569
 
 
 
 
7c9c6ed
c1de569
 
 
 
 
 
 
 
 
 
 
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

;FUNC-LABEL: {{^}}test1:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

;SI: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}}
;SI: v_mov_b32_e32 v[[REG]], s[[REG]]
;SI: buffer_store_dword v[[REG]],
define amdgpu_kernel void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
  %a = load i32, i32 addrspace(1)* %in
  %b = load i32, i32 addrspace(1)* %b_ptr
  %result = add i32 %a, %b
  store i32 %result, i32 addrspace(1)* %out
  ret void
}

;FUNC-LABEL: {{^}}test2:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}

define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
  %result = add <2 x i32> %a, %b
  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
  ret void
}

;FUNC-LABEL: {{^}}test4:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}

;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
;SI: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}}

define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
  %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
  %result = add <4 x i32> %a, %b
  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}test8:
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT

; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
define amdgpu_kernel void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
entry:
  %0 = add <8 x i32> %a, %b
  store <8 x i32> %0, <8 x i32> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}test16:
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT

; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
define amdgpu_kernel void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
entry:
  %0 = add <16 x i32> %a, %b
  store <16 x i32> %0, <16 x i32> addrspace(1)* %out
  ret void
}

; FUNC-LABEL: {{^}}add64:
; SI: s_add_u32
; SI: s_addc_u32

; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
; EG-DAG: ADD_INT {{[* ]*}}
; EG-DAG: ADDC_UINT
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
  %0 = add i64 %a, %b
  store i64 %0, i64 addrspace(1)* %out
  ret void
}

; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
; use VCC.  The test is designed so that %a will be stored in an SGPR and
; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
; to a VGPR before doing the add.

; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
; SI-NOT: v_addc_u32_e32 s

; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
; EG-DAG: ADD_INT {{[* ]*}}
; EG-DAG: ADDC_UINT
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
  %0 = load i64, i64 addrspace(1)* %in
  %1 = add i64 %a, %0
  store i64 %1, i64 addrspace(1)* %out
  ret void
}

; Test i64 add inside a branch.
; FUNC-LABEL: {{^}}add64_in_branch:
; SI: s_add_u32
; SI: s_addc_u32

; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.XY]]
; EG-DAG: ADD_INT {{[* ]*}}
; EG-DAG: ADDC_UINT
; EG-DAG: ADD_INT
; EG-DAG: ADD_INT {{[* ]*}}
; EG-NOT: SUB
define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
  %0 = icmp eq i64 %a, 0
  br i1 %0, label %if, label %else

if:
  %1 = load i64, i64 addrspace(1)* %in
  br label %endif

else:
  %2 = add i64 %a, %b
  br label %endif

endif:
  %3 = phi i64 [%1, %if], [%2, %else]
  store i64 %3, i64 addrspace(1)* %out
  ret void
}