llvm.org GIT mirror llvm / testing test / CodeGen / AMDGPU / cgp-bitfield-extract.ll
testing

Tree @testing (Download .tar.gz)

cgp-bitfield-extract.ll @testing

12cb9f0
d019e86
12cb9f0
d019e86
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b698fe
12cb9f0
 
 
 
 
 
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0deee39
12cb9f0
 
 
3081264
 
0b698fe
12cb9f0
0deee39
3081264
0deee39
12cb9f0
0deee39
3081264
12cb9f0
 
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b38a13
 
 
12cb9f0
 
8b38a13
12cb9f0
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b698fe
12cb9f0
88c88b2
12cb9f0
 
88c88b2
12cb9f0
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b698fe
12cb9f0
 
 
 
 
 
 
d706d03
12cb9f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s

; This particular case will actually be worse in terms of code size
; from sinking into both.

; OPT-LABEL: @sink_ubfe_i32(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = lshr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = lshr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret


; GCN-LABEL: {{^}}sink_ubfe_i32:
; GCN-NOT: lshr
; GCN: s_cbranch_scc1

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008
; GCN: BB0_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008

; GCN: BB0_3:
; GCN: buffer_store_dword
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
  %shr = lshr i32 %arg1, 8
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i32 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i32 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i32 %phi, i32 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_sbfe_i32(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = ashr i32 %arg1, 8
; OPT-NEXT: %val0 = and i32 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = ashr i32 %arg1, 8
; OPT-NEXT: %val1 = and i32 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_sbfe_i32:
define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 {
entry:
  %shr = ashr i32 %arg1, 8
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i32 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i32 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i32 %phi, i32 addrspace(1)* %out
  ret void
}


; OPT-LABEL: @sink_ubfe_i16(
; OPT: entry:
; OPT-NEXT: br i1

; OPT: bb0:
; OPT: %0 = lshr i16 %arg1, 4
; OPT-NEXT: %val0 = and i16 %0, 255
; OPT: br label

; OPT: bb1:
; OPT: %1 = lshr i16 %arg1, 4
; OPT-NEXT: %val1 = and i16 %1, 127
; OPT: br label

; OPT: ret:
; OPT: store
; OPT: ret

; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.

; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c
; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004
; GCN: s_cbranch_scc1

; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0xff

; GCN: BB2_2:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: s_and_b32 s{{[0-9]+}}, [[BFE]], 0x7f

; GCN: BB2_3:
; GCN: buffer_store_short
; GCN: s_endpgm
define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 {
entry:
  %shr = lshr i16 %arg1, 4
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i16 %shr, 255
  store volatile i16 0, i16 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i16 %shr, 127
  store volatile i16 0, i16 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i16 %phi, i16 addrspace(1)* %out
  ret void
}

; We don't really want to sink this one since it isn't reducible to a
; 32-bit BFE on one half of the integer.

; OPT-LABEL: @sink_ubfe_i64_span_midpoint(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 30
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 30
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:

; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: s_cbranch_scc1 BB3_2
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]

; GCN: BB3_2:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]

; GCN: BB3_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 30
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_ubfe_i64_low32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 15
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 15
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_low32:

; GCN: s_cbranch_scc1 BB4_2

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f

; GCN: BB4_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f

; GCN: BB4_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 15
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

; OPT-LABEL: @sink_ubfe_i64_high32(
; OPT: entry:
; OPT-NOT: lshr
; OPT: br i1

; OPT: bb0:
; OPT: %0 = lshr i64 %arg1, 35
; OPT-NEXT: %val0 = and i64 %0, 255

; OPT: bb1:
; OPT: %1 = lshr i64 %arg1, 35
; OPT-NEXT: %val1 = and i64 %1, 127

; OPT: ret:
; OPT: store
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc1 BB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003

; GCN: BB5_2:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003

; GCN: BB5_3:
; GCN: buffer_store_dwordx2
define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 {
entry:
  %shr = lshr i64 %arg1, 35
  br i1 undef, label %bb0, label %bb1

bb0:
  %val0 = and i64 %shr, 255
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

bb1:
  %val1 = and i64 %shr, 127
  store volatile i32 0, i32 addrspace(1)* undef
  br label %ret

ret:
  %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ]
  store i64 %phi, i64 addrspace(1)* %out
  ret void
}

attributes #0 = { nounwind }