llvm.org GIT mirror llvm / c95a737 test / CodeGen / AMDGPU / fsub.ll
c95a737

Tree @c95a737 (Download .tar.gz)

fsub.ll @c95a737

d707747
fd55bcd
d707747
60178b1
56077f5
8eaed0f
60178b1
198d8ba
7c9c6ed
 
60178b1
 
 
 
 
56077f5
60178b1
 
8eaed0f
60178b1
 
 
0991c31
f98f2ce
 
5fecfa2
f98f2ce
 
 
56077f5
60178b1
 
 
702b589
 
692ee10
60178b1
 
692ee10
 
3abd23b
56077f5
60178b1
 
 
 
 
8eaed0f
 
 
 
60178b1
198d8ba
7c9c6ed
 
60178b1
 
 
 
 
56077f5
702b589
 
 
 
8eaed0f
60178b1
3abd23b
60178b1
3abd23b
 
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s

; FUNC-LABEL: {{^}}v_fsub_f32:
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
  %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
  %a = load float, float addrspace(1)* %in, align 4
  %b = load float, float addrspace(1)* %b_ptr, align 4
  %result = fsub float %a, %b
  store float %result, float addrspace(1)* %out, align 4
  ret void
}

; FUNC-LABEL: {{^}}s_fsub_f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W

; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
  %sub = fsub float %a, %b
  store float %sub, float addrspace(1)* %out, align 4
  ret void
}

declare float @llvm.r600.load.input(i32) readnone

declare void @llvm.AMDGPU.store.output(float, i32)

; FUNC-LABEL: {{^}}fsub_v2f32:
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y

; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
  %sub = fsub <2 x float> %a, %b
  store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
  ret void
}

; FUNC-LABEL: {{^}}v_fsub_v4f32:
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}

; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
  %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
  %result = fsub <4 x float> %a, %b
  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
  ret void
}

; FUNC-LABEL: {{^}}s_fsub_v4f32:
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: s_endpgm
define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
  %result = fsub <4 x float> %a, %b
  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
  ret void
}