Tree @release_38 (Download .tar.gz)
- ..
- 32-bit-local-address-space.ll
- add-debug.ll
- add.ll
- add_i64.ll
- address-space.ll
- addrspacecast.ll
- and.ll
- annotate-kernel-features.ll
- anyext.ll
- array-ptr-calc-i32.ll
- array-ptr-calc-i64.ll
- atomic_cmp_swap_local.ll
- atomic_load_add.ll
- atomic_load_sub.ll
- basic-branch.ll
- basic-loop.ll
- bfe_uint.ll
- bfi_int.ll
- big_alu.ll
- bitcast.ll
- bitreverse.ll
- bswap.ll
- bug-vopc-commute.ll
- build_vector.ll
- call.ll
- call_fs.ll
- calling-conventions.ll
- cayman-loop-bug.ll
- cf-stack-bug.ll
- cf_end.ll
- cgp-addressing-modes-flat.ll
- cgp-addressing-modes.ll
- ci-use-flat-for-global.ll
- coalescer_remat.ll
- codegen-prepare-addrmode-sext.ll
- combine_vloads.ll
- commute-compares.ll
- commute-shifts.ll
- commute_modifiers.ll
- complex-folding.ll
- concat_vectors.ll
- copy-illegal-type.ll
- copy-to-reg.ll
- ctlz.ll
- ctlz_zero_undef.ll
- ctpop.ll
- ctpop64.ll
- cttz_zero_undef.ll
- cvt_f32_ubyte.ll
- cvt_flr_i32_f32.ll
- cvt_rpi_i32_f32.ll
- dagcombiner-bug-illegal-vec4-int-to-fp.ll
- debug.ll
- default-fp-mode.ll
- disconnected-predset-break-bug.ll
- dot4-folding.ll
- drop-mem-operand-move-smrd.ll
- ds-negative-offset-addressing-mode-loop.ll
- ds-sub-offset.ll
- ds_read2.ll
- ds_read2_offset_order.ll
- ds_read2_superreg.ll
- ds_read2st64.ll
- ds_write2.ll
- ds_write2st64.ll
- dynamic_stackalloc.ll
- elf.ll
- elf.r600.ll
- empty-function.ll
- endcf-loop-header.ll
- extload-private.ll
- extload.ll
- extract-vector-elt-i64.ll
- extract_vector_elt_i16.ll
- fabs.f64.ll
- fabs.ll
- fadd.ll
- fadd64.ll
- fceil.ll
- fceil64.ll
- fcmp-cnd.ll
- fcmp-cnde-int-args.ll
- fcmp.ll
- fcmp64.ll
- fconst64.ll
- fcopysign.f32.ll
- fcopysign.f64.ll
- fdiv.f64.ll
- fdiv.ll
- fetch-limits.r600.ll
- fetch-limits.r700+.ll
- ffloor.f64.ll
- ffloor.ll
- flat-address-space.ll
- flat-scratch-reg.ll
- floor.ll
- fma-combine.ll
- fma.f64.ll
- fma.ll
- fmad.ll
- fmax.ll
- fmax3.f64.ll
- fmax3.ll
- fmax_legacy.f64.ll
- fmax_legacy.ll
- fmaxnum.f64.ll
- fmaxnum.ll
- fmin.ll
- fmin3.ll
- fmin_legacy.f64.ll
- fmin_legacy.ll
- fminnum.f64.ll
- fminnum.ll
- fmul-2-combine-multi-use.ll
- fmul.ll
- fmul64.ll
- fmuladd.ll
- fnearbyint.ll
- fneg-fabs.f64.ll
- fneg-fabs.ll
- fneg.f64.ll
- fneg.ll
- fp-classify.ll
- fp16_to_fp.ll
- fp32_to_fp16.ll
- fp_to_sint.f64.ll
- fp_to_sint.ll
- fp_to_uint.f64.ll
- fp_to_uint.ll
- fpext.ll
- fptrunc.ll
- frem.ll
- fsqrt.ll
- fsub.ll
- fsub64.ll
- ftrunc.f64.ll
- ftrunc.ll
- gep-address-space.ll
- global-constant.ll
- global-directive.ll
- global-extload-i1.ll
- global-extload-i16.ll
- global-extload-i32.ll
- global-extload-i8.ll
- global-zero-initializer.ll
- global_atomics.ll
- gv-const-addrspace-fail.ll
- gv-const-addrspace.ll
- half.ll
- hsa-default-device.ll
- hsa-globals.ll
- hsa-group-segment.ll
- hsa-note-no-func.ll
- hsa.ll
- i1-copy-implicit-def.ll
- i1-copy-phi.ll
- i8-to-double-to-float.ll
- icmp-select-sete-reverse-args.ll
- icmp64.ll
- image-attributes.ll
- image-resource-id.ll
- imm.ll
- indirect-addressing-si.ll
- indirect-private-64.ll
- infinite-loop-evergreen.ll
- infinite-loop.ll
- inline-asm.ll
- inline-calls.ll
- inline-constraints.ll
- input-mods.ll
- insert_subreg.ll
- insert_vector_elt.ll
- invariant-load-no-alias-store.ll
- jump-address.ll
- kcache-fold.ll
- kernel-args.ll
- large-alloca-compute.ll
- large-alloca-graphics.ll
- large-constant-initializer.ll
- lds-initializer.ll
- lds-oqap-crash.ll
- lds-output-queue.ll
- lds-size.ll
- lds-zero-initializer.ll
- legalizedag-bug-expand-setcc.ll
- lit.local.cfg
- literals.ll
- llvm.amdgcn.buffer.wbinvl1.ll
- llvm.amdgcn.buffer.wbinvl1.sc.ll
- llvm.amdgcn.buffer.wbinvl1.vol.ll
- llvm.amdgcn.dispatch.ptr.ll
- llvm.amdgcn.interp.ll
- llvm.amdgcn.mbcnt.ll
- llvm.amdgcn.s.dcache.inv.ll
- llvm.amdgcn.s.dcache.inv.vol.ll
- llvm.amdgcn.s.dcache.wb.ll
- llvm.amdgcn.s.dcache.wb.vol.ll
- llvm.AMDGPU.abs.ll
- llvm.AMDGPU.barrier.global.ll
- llvm.AMDGPU.barrier.local.ll
- llvm.AMDGPU.bfe.i32.ll
- llvm.AMDGPU.bfe.u32.ll
- llvm.AMDGPU.bfi.ll
- llvm.AMDGPU.bfm.ll
- llvm.AMDGPU.clamp.ll
- llvm.AMDGPU.class.ll
- llvm.AMDGPU.cube.ll
- llvm.AMDGPU.cvt_f32_ubyte.ll
- llvm.AMDGPU.div_fixup.ll
- llvm.AMDGPU.div_fmas.ll
- llvm.AMDGPU.div_scale.ll
- llvm.amdgpu.dp4.ll
- llvm.AMDGPU.flbit.i32.ll
- llvm.AMDGPU.fract.f64.ll
- llvm.AMDGPU.fract.ll
- llvm.AMDGPU.imad24.ll
- llvm.AMDGPU.imax.ll
- llvm.AMDGPU.imin.ll
- llvm.AMDGPU.imul24.ll
- llvm.AMDGPU.kill.ll
- llvm.amdgpu.kilp.ll
- llvm.AMDGPU.ldexp.ll
- llvm.AMDGPU.legacy.rsq.ll
- llvm.amdgpu.lrp.ll
- llvm.AMDGPU.mul.ll
- llvm.AMDGPU.rcp.f64.ll
- llvm.AMDGPU.rcp.ll
- llvm.AMDGPU.read.workdim.ll
- llvm.AMDGPU.rsq.clamped.f64.ll
- llvm.AMDGPU.rsq.clamped.ll
- llvm.AMDGPU.rsq.ll
- llvm.AMDGPU.tex.ll
- llvm.AMDGPU.trig_preop.ll
- llvm.AMDGPU.trunc.ll
- llvm.AMDGPU.umad24.ll
- llvm.AMDGPU.umax.ll
- llvm.AMDGPU.umin.ll
- llvm.AMDGPU.umul24.ll
- llvm.cos.ll
- llvm.dbg.value.ll
- llvm.exp2.ll
- llvm.log2.ll
- llvm.memcpy.ll
- llvm.pow.ll
- llvm.r600.read.local.size.ll
- llvm.rint.f64.ll
- llvm.rint.ll
- llvm.round.f64.ll
- llvm.round.ll
- llvm.SI.fs.interp.ll
- llvm.SI.gather4.ll
- llvm.SI.getlod.ll
- llvm.SI.image.ll
- llvm.SI.image.sample.ll
- llvm.SI.image.sample.o.ll
- llvm.SI.imageload.ll
- llvm.SI.load.dword.ll
- llvm.SI.packf16.ll
- llvm.SI.resinfo.ll
- llvm.SI.sample-masked.ll
- llvm.SI.sample.ll
- llvm.SI.sampled.ll
- llvm.SI.sendmsg-m0.ll
- llvm.SI.sendmsg.ll
- llvm.SI.tbuffer.store.ll
- llvm.SI.tid.ll
- llvm.sin.ll
- llvm.sqrt.ll
- load-i1.ll
- load-input-fold.ll
- load.ll
- load.vec.ll
- load64.ll
- local-64.ll
- local-atomics.ll
- local-atomics64.ll
- local-memory-two-objects.ll
- local-memory.ll
- loop-address.ll
- loop-idiom.ll
- lshl.ll
- lshr.ll
- m0-spill.ll
- mad-combine.ll
- mad-sub.ll
- mad_int24.ll
- mad_uint24.ll
- madak.ll
- madmk.ll
- max-literals.ll
- max.ll
- max3.ll
- merge-stores.ll
- min.ll
- min3.ll
- missing-store.ll
- move-addr64-rsrc-dead-subreg-writes.ll
- move-to-valu-atomicrmw.ll
- mubuf.ll
- mul.ll
- mul_int24.ll
- mul_uint24.ll
- mulhu.ll
- multilevel-break.ll
- no-hsa-graphics-shaders.ll
- no-initializer-constant-addrspace.ll
- no-shrink-extloads.ll
- opencl-image-metadata.ll
- operand-folding.ll
- operand-spacing.ll
- or.ll
- packetizer.ll
- parallelandifcollapse.ll
- parallelorifcollapse.ll
- partially-dead-super-register-immediate.ll
- predicate-dp4.ll
- predicates.ll
- private-memory-atomics.ll
- private-memory-broken.ll
- private-memory.ll
- promote-alloca-array-allocation.ll
- promote-alloca-bitcast-function.ll
- promote-alloca-invariant-markers.ll
- promote-alloca-mem-intrinsics.ll
- promote-alloca-stored-pointer-value.ll
- promote-alloca-unhandled-intrinsic.ll
- promote-alloca-volatile.ll
- pv-packing.ll
- pv.ll
- r600-encoding.ll
- r600-export-fix.ll
- r600-infinite-loop-bug-while-reorganizing-vector.ll
- r600cfg.ll
- README
- reciprocal.ll
- register-count-comments.ll
- reorder-stores.ll
- ret.ll
- ret_jump.ll
- rotl.i64.ll
- rotl.ll
- rotr.i64.ll
- rotr.ll
- rsq.ll
- rv7x0_count3.ll
- s_movk_i32.ll
- saddo.ll
- salu-to-valu.ll
- sampler-resource-id.ll
- scalar_to_vector.ll
- schedule-fs-loop-nested-if.ll
- schedule-fs-loop-nested.ll
- schedule-fs-loop.ll
- schedule-global-loads.ll
- schedule-if-2.ll
- schedule-if.ll
- schedule-kernel-arg-loads.ll
- schedule-vs-if-nested-loop-failure.ll
- schedule-vs-if-nested-loop.ll
- scratch-buffer.ll
- sdiv.ll
- sdivrem24.ll
- sdivrem64.ll
- select-i1.ll
- select-vectors.ll
- select.ll
- select64.ll
- selectcc-cnd.ll
- selectcc-cnde-int.ll
- selectcc-icmp-select-float.ll
- selectcc-opt.ll
- selectcc.ll
- set-dx10.ll
- setcc-equivalent.ll
- setcc-opt.ll
- setcc.ll
- setcc64.ll
- seto.ll
- setuo.ll
- sext-eliminate.ll
- sext-in-reg.ll
- sgpr-control-flow.ll
- sgpr-copy-duplicate-operand.ll
- sgpr-copy.ll
- shared-op-cycle.ll
- shl.ll
- shl_add_constant.ll
- shl_add_ptr.ll
- si-annotate-cf-assertion.ll
- si-annotate-cf.ll
- si-annotate-cfg-loop-assert.ll
- si-instr-info-correct-implicit-operands.ll
- si-literal-folding.ll
- si-lod-bias.ll
- si-scheduler.ll
- si-sgpr-spill.ll
- si-spill-cf.ll
- si-spill-sgpr-stack.ll
- si-triv-disjoint-mem-access.ll
- si-vector-hang.ll
- sign_extend.ll
- simplify-demanded-bits-build-pair.ll
- sint_to_fp.f64.ll
- sint_to_fp.i64.ll
- sint_to_fp.ll
- sminmax.ll
- smrd.ll
- spill-alloc-sgpr-init-bug.ll
- spill-scavenge-offset.ll
- split-scalar-i64-add.ll
- split-smrd.ll
- split-vector-memoperand-offsets.ll
- sra.ll
- srem.ll
- srl.ll
- ssubo.ll
- store-barrier.ll
- store-v3i32.ll
- store-v3i64.ll
- store-vector-ptrs.ll
- store.ll
- store.r600.ll
- store_typed.ll
- structurize.ll
- structurize1.ll
- sub.ll
- subreg-coalescer-crash.ll
- subreg-coalescer-undef-use.ll
- subreg-eliminate-dead.ll
- swizzle-export.ll
- tex-clause-antidep.ll
- texture-input-merge.ll
- trunc-cmp-constant.ll
- trunc-store-f64-to-f16.ll
- trunc-store-i1.ll
- trunc-store.ll
- trunc-vector-store-assertion-failure.ll
- trunc.ll
- tti-unroll-prefs.ll
- uaddo.ll
- udiv.ll
- udivrem.ll
- udivrem24.ll
- udivrem64.ll
- uint_to_fp.f64.ll
- uint_to_fp.i64.ll
- uint_to_fp.ll
- unaligned-load-store.ll
- unhandled-loop-condition-assertion.ll
- unroll.ll
- unsupported-cc.ll
- urecip.ll
- urem.ll
- use-sgpr-multiple-times.ll
- usubo.ll
- v1i64-kernel-arg.ll
- v_cndmask.ll
- v_mac.ll
- valu-i1.ll
- vector-alloca.ll
- vertex-fetch-encoding.ll
- vgpr-spill-emergency-stack-slot-compute.ll
- vgpr-spill-emergency-stack-slot.ll
- vop-shrink.ll
- vselect.ll
- vselect64.ll
- vtx-fetch-branch.ll
- vtx-schedule.ll
- wait.ll
- work-item-intrinsics.ll
- wrong-transalu-pos-fix.ll
- xor.ll
- zero_extend.ll
bitreverse.ll @release_38 — raw · history · blame
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i16 @llvm.bitreverse.i16(i16) #1
declare i32 @llvm.bitreverse.i32(i32) #1
declare i64 @llvm.bitreverse.i64(i64) #1
declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
declare i32 @llvm.AMDGPU.brev(i32) #1
; FUNC-LABEL: {{^}}s_brev_i16:
; SI: s_brev_b32
define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
store i16 %brev, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_i16:
; SI: v_bfrev_b32_e32
define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
%val = load i16, i16 addrspace(1)* %valptr
%brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
store i16 %brev, i16 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_brev_i32:
; SI: s_load_dword [[VAL:s[0-9]+]],
; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
; SI: buffer_store_dword [[VRESULT]],
; SI: s_endpgm
define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_i32:
; SI: buffer_load_dword [[VAL:v[0-9]+]],
; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
%val = load i32, i32 addrspace(1)* %valptr
%brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_brev_v2i32:
; SI: s_brev_b32
; SI: s_brev_b32
define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_v2i32:
; SI: v_bfrev_b32_e32
; SI: v_bfrev_b32_e32
define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr
%brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_brev_i64:
define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
store i64 %brev, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_i64:
define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
%val = load i64, i64 addrspace(1)* %valptr
%brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
store i64 %brev, i64 addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}s_brev_v2i64:
define void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}v_brev_v2i64:
define void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr
%brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}legacy_s_brev_i32:
; SI: s_brev_b32
define void @legacy_s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
%brev = call i32 @llvm.AMDGPU.brev(i32 %val) #1
store i32 %brev, i32 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
|