llvm.org GIT mirror llvm / testing test / CodeGen / AMDGPU / inline-asm.ll
testing

Tree @testing (Download .tar.gz)

inline-asm.ll @testing

61691ce
d019e86
a4cb890
61691ce
a4cb890
 
d706d03
a4cb890
 
 
 
 
bd0b681
61691ce
bd0b681
 
ea7a0c0
bd0b681
 
 
 
 
98ef447
 
 
 
 
d706d03
98ef447
 
 
 
 
 
 
 
 
 
 
0262953
61691ce
0262953
7eba65d
0262953
 
 
d706d03
7eba65d
0262953
 
 
61691ce
 
 
d706d03
61691ce
 
 
 
 
 
 
 
d706d03
61691ce
 
 
 
 
 
 
d706d03
61691ce
 
 
 
 
 
 
4a37139
 
 
d706d03
4a37139
 
 
 
 
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
 
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
 
 
 
 
 
 
d706d03
4a37139
 
 
 
 
 
 
 
 
 
 
651ac56
 
 
 
 
 
 
 
a0540d3
651ac56
271bf6e
651ac56
 
ab28f3b
 
 
 
 
 
271bf6e
ab28f3b
 
 
 
 
 
 
 
 
 
 
 
271bf6e
ab28f3b
 
 
 
 
 
 
8763b3a
 
ab28f3b
 
 
 
271bf6e
ab28f3b
 
23450e5
 
 
 
 
 
 
 
271bf6e
 
23450e5
 
 
 
e695a23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s

; CHECK-LABEL: {{^}}inline_asm:
; CHECK: s_endpgm
; CHECK: s_endpgm
define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) {
entry:
  store i32 5, i32 addrspace(1)* %out
  call void asm sideeffect "s_endpgm", ""()
  ret void
}

; CHECK-LABEL: {{^}}inline_asm_shader:
; CHECK: s_endpgm
; CHECK: s_endpgm
define amdgpu_ps void @inline_asm_shader() {
entry:
  call void asm sideeffect "s_endpgm", ""()
  ret void
}


; CHECK: {{^}}branch_on_asm:
; Make sure inline assembly is treted as divergent.
; CHECK: s_mov_b32 s{{[0-9]+}}, 0
; CHECK: s_and_saveexec_b64
define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) {
	%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
	%cmp = icmp eq i32 %zero, 0
	br i1 %cmp, label %if, label %endif

if:
	store i32 0, i32 addrspace(1)* %out
	br label %endif

endif:
  ret void
}

; CHECK-LABEL: {{^}}v_cmp_asm:
; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) {
  %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
  store i64 %sgpr, i64 addrspace(1)* %out
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm:
; CHECK: codeLenInByte = 12
define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "v_nop_e64", ""()
  ret void
}

; All inlineasm instructions are assumed to be the maximum size
; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst:
; CHECK: codeLenInByte = 12
define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "v_nop_e32", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst:
; CHECK: codeLenInByte = 20
define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "
    v_nop_e64
    v_nop_e64
   ", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline:
; CHECK: codeLenInByte = 20
define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "
    v_nop_e64

    v_nop_e64
   ", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; comment", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "
; comment", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; comment
", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; first comment ; second comment", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace:
; CHECK: codeLenInByte = 4
define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; first comment;second comment", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0:
; CHECK: codeLenInByte = 20
define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; comment
    v_nop_e64 ; inline comment
; separate comment
    v_nop_e64

    ; trailing comment
    ; extra comment
  ", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1:
; CHECK: codeLenInByte = 20
define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "v_nop_e64 ; inline comment
; separate comment
    v_nop_e64

    ; trailing comment
    ; extra comment
  ", ""()
  ret void
}

; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands:
; CHECK: codeLenInByte = 20
define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) {
entry:
  call void asm sideeffect "; comment
    v_add_i32_e32 v0, vcc, v1, v2 ; inline comment
; separate comment
    v_bfrev_b32_e32 v0, 1

    ; trailing comment
    ; extra comment
  ", ""()
  ret void
}

; FIXME: Should not have intermediate sgprs
; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
; CHECK: s_mov_b32 s1, 0
; CHECK: s_mov_b32 s0, 0x1e240
; CHECK: v_mov_b32_e32 v0, s0
; CHECK: v_mov_b32_e32 v1, s1
; CHECK: use v[0:1]
define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
entry:
  call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
  ret void
}

; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr:
; CHECK: v_mov_b32_e32 v0, -1{{$}}
; CHECK: ; use v0
define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
entry:
  call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
  ret void
}

; CHECK-LABEL: {{^}}i1_input_phys_vgpr:
; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]]
; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]]
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]]
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
; CHECK: ; use v0
define amdgpu_kernel void @i1_input_phys_vgpr() {
entry:
  %val = load i1, i1 addrspace(1)* undef
  call void asm sideeffect "; use $0 ", "{v0}"(i1 %val)
  ret void
}

; FIXME: Should be scheduled to shrink vcc
; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2:
; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0
; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc
; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1
; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc
define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
entry:
  %val0 = load volatile i1, i1 addrspace(1)* undef
  %val1 = load volatile i1, i1 addrspace(1)* undef
  call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
  ret void
}

; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
; CHECK: ; def v0
; CHECK: v_mov_b32_e32 v1, v0
; CHECK: ; def v0
; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
define amdgpu_kernel void @muliple_def_phys_vgpr() {
entry:
  %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
  %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
  %add = shl i32 %def0, %def1
  store i32 %add, i32 addrspace(1)* undef
  ret void
}

; CHECK-LABEL: {{^}}asm_constraint_c_n:
; CHECK: s_trap 10{{$}}
define amdgpu_kernel void @asm_constraint_c_n()  {
entry:
  tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1
  ret void
}

; CHECK-LABEL: {{^}}asm_constraint_n_n:
; CHECK: s_trap -10{{$}}
define amdgpu_kernel void @asm_constraint_n_n()  {
entry:
  tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
  ret void
}