Tree @release_70 (Download .tar.gz)
- ..
- GlobalISel
- 32-bit-local-address-space.ll
- add-debug.ll
- add.i16.ll
- add.ll
- add.v2i16.ll
- add_i128.ll
- add_i64.ll
- addrspacecast-captured.ll
- addrspacecast-constantexpr.ll
- addrspacecast.ll
- adjust-writemask-invalid-copy.ll
- alignbit-pat.ll
- alloca.ll
- always-uniform.ll
- amdgcn.bitcast.ll
- amdgcn.private-memory.ll
- amdgpu-alias-analysis.ll
- amdgpu-codegenprepare-fdiv.ll
- amdgpu-codegenprepare-i16-to-i32.ll
- amdgpu-codegenprepare-idiv.ll
- amdgpu-inline.ll
- amdgpu-shader-calling-convention.ll
- amdgpu.private-memory.ll
- amdgpu.work-item-intrinsics.deprecated.ll
- amdhsa-trap-num-sgprs.ll
- amdpal-cs.ll
- amdpal-es.ll
- amdpal-gs.ll
- amdpal-hs.ll
- amdpal-ls.ll
- amdpal-ps.ll
- amdpal-psenable.ll
- amdpal-vs.ll
- amdpal.ll
- amdpal_scratch_mergedshader.ll
- and-gcn.ll
- and.ll
- annotate-kernel-features-hsa-call.ll
- annotate-kernel-features-hsa.ll
- annotate-kernel-features.ll
- anonymous-gv.ll
- any_extend_vector_inreg.ll
- anyext.ll
- array-ptr-calc-i32.ll
- array-ptr-calc-i64.ll
- ashr.v2i16.ll
- atomic_cmp_swap_local.ll
- atomic_load_add.ll
- atomic_load_local.ll
- atomic_load_sub.ll
- atomic_store_local.ll
- attr-amdgpu-flat-work-group-size.ll
- attr-amdgpu-num-sgpr-spill-to-smem.ll
- attr-amdgpu-num-sgpr.ll
- attr-amdgpu-num-vgpr.ll
- attr-amdgpu-waves-per-eu.ll
- attr-unparseable.ll
- barrier-elimination.ll
- basic-branch.ll
- basic-call-return.ll
- basic-loop.ll
- bfe-combine.ll
- bfe-patterns.ll
- bfe_uint.ll
- bfi_int.ll
- bfm.ll
- big_alu.ll
- bitcast-vector-extract.ll
- bitreverse-inline-immediates.ll
- bitreverse.ll
- br_cc.f16.ll
- branch-condition-and.ll
- branch-relax-bundle.ll
- branch-relax-spill.ll
- branch-relaxation.ll
- branch-uniformity.ll
- break-smem-soft-clauses.mir
- break-vmem-soft-clauses.mir
- bswap.ll
- buffer-schedule.ll
- bug-vopc-commute.ll
- build_vector.ll
- byval-frame-setup.ll
- call-argument-types.ll
- call-encoding.ll
- call-graph-register-usage.ll
- call-preserved-registers.ll
- call-return-types.ll
- call_fs.ll
- callee-frame-setup.ll
- callee-special-input-sgprs.ll
- callee-special-input-vgprs.ll
- calling-conventions.ll
- captured-frame-index.ll
- cayman-loop-bug.ll
- cf-loop-on-constant.ll
- cf-stack-bug.ll
- cf_end.ll
- cgp-addressing-modes-flat.ll
- cgp-addressing-modes.ll
- cgp-bitfield-extract.ll
- clamp-modifier.ll
- clamp-omod-special-case.mir
- clamp.ll
- cluster-flat-loads-postra.mir
- cluster-flat-loads.mir
- cndmask-no-def-vcc.ll
- coalescer-extend-pruned-subrange.mir
- coalescer-identical-values-undef.mir
- coalescer-subranges-another-copymi-not-live.mir
- coalescer-subranges-another-prune-error.mir
- coalescer-subreg-join.mir
- coalescer-subregjoin-fullcopy.mir
- coalescer-with-subregs-bad-identical.mir
- coalescer_distribute.ll
- coalescer_remat.ll
- coalescing-with-subregs-in-loop-bug.mir
- code-object-v3.ll
- codegen-prepare-addrmode-sext.ll
- collapse-endcf.ll
- combine-and-sext-bool.ll
- combine-cond-add-sub.ll
- combine-ftrunc.ll
- combine_vloads.ll
- comdat.ll
- commute-compares.ll
- commute-shifts.ll
- commute_modifiers.ll
- complex-folding.ll
- concat_vectors.ll
- constant-address-space-32bit.ll
- constant-fold-imm-immreg.mir
- constant-fold-mi-operands.ll
- control-flow-fastregalloc.ll
- control-flow-optnone.ll
- convergent-inlineasm.ll
- copy-illegal-type.ll
- copy-to-reg.ll
- couldnt-join-subrange-3.mir
- ctlz.ll
- ctlz_zero_undef.ll
- ctpop.ll
- ctpop16.ll
- ctpop64.ll
- cttz_zero_undef.ll
- cube.ll
- cvt_f32_ubyte.ll
- cvt_flr_i32_f32.ll
- cvt_rpi_i32_f32.ll
- dagcomb-shuffle-vecextend-non2.ll
- dagcombine-reassociate-bug.ll
- dagcombine-select.ll
- dagcombine-setcc-select.ll
- dagcombiner-bug-illegal-vec4-int-to-fp.ll
- dead_copy.mir
- debug-value.ll
- debug-value2.ll
- debug.ll
- debugger-emit-prologue.ll
- debugger-insert-nops.ll
- default-fp-mode.ll
- detect-dead-lanes.mir
- disconnected-predset-break-bug.ll
- diverge-extra-formal-args.ll
- diverge-interp-mov-lower.ll
- divrem24-assume.ll
- drop-mem-operand-move-smrd.ll
- ds-combine-large-stride.ll
- ds-negative-offset-addressing-mode-loop.ll
- ds-sub-offset.ll
- ds_read2.ll
- ds_read2_offset_order.ll
- ds_read2_superreg.ll
- ds_read2st64.ll
- ds_write2.ll
- ds_write2st64.ll
- dynamic_stackalloc.ll
- early-if-convert-cost.ll
- early-if-convert.ll
- early-inline-alias.ll
- early-inline.ll
- elf-header-flags-mach.ll
- elf-header-flags-xnack.ll
- elf-header-osabi.ll
- elf-notes.ll
- elf.ll
- elf.r600.ll
- else.ll
- empty-function.ll
- enable-no-signed-zeros-fp-math.ll
- endcf-loop-header.ll
- endpgm-dce.mir
- enqueue-kernel.ll
- env-amdgiz.ll
- env-amdgizcl.ll
- exceed-max-sgprs.ll
- extend-bit-ops-i16.ll
- extload-align.ll
- extload-private.ll
- extload.ll
- extract-lowbits.ll
- extract-subvector-equal-length.ll
- extract-vector-elt-build-vector-combine.ll
- extract_vector_elt-f16.ll
- extract_vector_elt-f64.ll
- extract_vector_elt-i16.ll
- extract_vector_elt-i64.ll
- extract_vector_elt-i8.ll
- extractelt-to-trunc.ll
- fabs.f16.ll
- fabs.f64.ll
- fabs.ll
- fadd-fma-fmul-combine.ll
- fadd.f16.ll
- fadd.ll
- fadd64.ll
- fcanonicalize-elimination.ll
- fcanonicalize.f16.ll
- fcanonicalize.ll
- fceil.ll
- fceil64.ll
- fcmp-cnd.ll
- fcmp-cnde-int-args.ll
- fcmp.f16.ll
- fcmp.ll
- fcmp64.ll
- fconst64.ll
- fcopysign.f16.ll
- fcopysign.f32.ll
- fcopysign.f64.ll
- fdiv.f16.ll
- fdiv.f64.ll
- fdiv.ll
- fdiv32-to-rcp-folding.ll
- fdot2.ll
- fence-amdgiz.ll
- fence-barrier.ll
- fetch-limits.r600.ll
- fetch-limits.r700+.ll
- ffloor.f64.ll
- ffloor.ll
- fix-vgpr-copies.mir
- fix-wwm-liveness.mir
- flat-address-space.ll
- flat-for-global-subtarget-feature.ll
- flat-load-clustering.mir
- flat-scratch-reg.ll
- flat_atomics.ll
- flat_atomics_i64.ll
- floor.ll
- fma-combine.ll
- fma.f64.ll
- fma.ll
- fmad.ll
- fmax.ll
- fmax3.f64.ll
- fmax3.ll
- fmax_legacy.f64.ll
- fmax_legacy.ll
- fmaxnum.f64.ll
- fmaxnum.ll
- fmaxnum.r600.ll
- fmed3.ll
- fmin.ll
- fmin3.ll
- fmin_fmax_legacy.amdgcn.ll
- fmin_legacy.f64.ll
- fmin_legacy.ll
- fminnum.f64.ll
- fminnum.ll
- fminnum.r600.ll
- fmul-2-combine-multi-use.ll
- fmul.f16.ll
- fmul.ll
- fmul64.ll
- fmuladd.f16.ll
- fmuladd.f32.ll
- fmuladd.f64.ll
- fmuladd.v2f16.ll
- fnearbyint.ll
- fneg-combines.ll
- fneg-fabs.f16.ll
- fneg-fabs.f64.ll
- fneg-fabs.ll
- fneg.f16.ll
- fneg.f64.ll
- fneg.ll
- fold-cndmask.mir
- fold-fmul-to-neg-abs.ll
- fold-imm-f16-f32.mir
- fold-immediate-output-mods.mir
- fold-implicit-operand.mir
- fold-multiple.mir
- fold-operands-order.mir
- force-alwaysinline-lds-global-address.ll
- fp-classify.ll
- fp16_to_fp32.ll
- fp16_to_fp64.ll
- fp32_to_fp16.ll
- fp_to_sint.f64.ll
- fp_to_sint.ll
- fp_to_uint.f64.ll
- fp_to_uint.ll
- fpext-free.ll
- fpext.f16.ll
- fpext.ll
- fptosi.f16.ll
- fptoui.f16.ll
- fptrunc.f16.ll
- fptrunc.ll
- fract.f64.ll
- fract.ll
- frame-index-amdgiz.ll
- frame-index-elimination.ll
- frem.ll
- fsqrt.f64.ll
- fsqrt.ll
- fsub.f16.ll
- fsub.ll
- fsub64.ll
- ftrunc.f64.ll
- ftrunc.ll
- function-args.ll
- function-returns.ll
- gep-address-space.ll
- global-constant.ll
- global-directive.ll
- global-extload-i16.ll
- global-smrd-unknown.ll
- global-variable-relocs.ll
- global_atomics.ll
- global_atomics_i64.ll
- global_smrd.ll
- global_smrd_cfg.ll
- gv-const-addrspace.ll
- gv-offset-folding.ll
- half.ll
- hazard-buffer-store-v-interp.mir
- hazard-inlineasm.mir
- hazard.mir
- hoist-cond.ll
- hsa-default-device.ll
- hsa-fp-mode.ll
- hsa-func-align.ll
- hsa-func.ll
- hsa-globals.ll
- hsa-group-segment.ll
- hsa-metadata-deduce-ro-arg.ll
- hsa-metadata-enqueue-kernel.ll
- hsa-metadata-from-llvm-ir-full.ll
- hsa-metadata-hidden-args.ll
- hsa-metadata-images.ll
- hsa-metadata-invalid-ocl-version-1.ll
- hsa-metadata-invalid-ocl-version-2.ll
- hsa-metadata-invalid-ocl-version-3.ll
- hsa-metadata-kernel-code-props.ll
- hsa-metadata-kernel-debug-props.ll
- hsa-note-no-func.ll
- hsa.ll
- huge-private-buffer.ll
- i1-copy-from-loop.ll
- i1-copy-implicit-def.ll
- i1-copy-phi.ll
- i8-to-double-to-float.ll
- icmp-select-sete-reverse-args.ll
- icmp.i16.ll
- icmp64.ll
- idiv-licm.ll
- illegal-sgpr-to-vgpr-copy.ll
- image-attributes.ll
- image-resource-id.ll
- image-schedule.ll
- imm.ll
- imm16.ll
- immv216.ll
- indirect-addressing-si-noopt.ll
- indirect-addressing-si.ll
- indirect-private-64.ll
- infer-addrpace-pipeline.ll
- infinite-loop-evergreen.ll
- infinite-loop.ll
- inline-asm.ll
- inline-attr.ll
- inline-calls.ll
- inline-constraints.ll
- inlineasm-16.ll
- inlineasm-illegal-type.ll
- inlineasm-packed.ll
- InlineAsmCrash.ll
- input-mods.ll
- insert-skips-kill-uncond.mir
- insert-waitcnts-callee.mir
- insert-waitcnts-exp.mir
- insert_subreg.ll
- insert_vector_elt.ll
- insert_vector_elt.v2i16.ll
- inserted-wait-states.mir
- internalize.ll
- invalid-addrspacecast.ll
- invalid-alloca.ll
- invariant-load-no-alias-store.ll
- invert-br-undef-vcc.mir
- ipra.ll
- jump-address.ll
- kcache-fold.ll
- kernarg-stack-alignment.ll
- kernel-args.ll
- kernel-argument-dag-lowering.ll
- knownbits-recursion.ll
- large-alloca-compute.ll
- large-alloca-graphics.ll
- large-constant-initializer.ll
- large-work-group-promote-alloca.ll
- lds-alignment.ll
- lds-global-non-entry-func.ll
- lds-initializer.ll
- lds-m0-init-in-loop.ll
- lds-oqap-crash.ll
- lds-output-queue.ll
- lds-size.ll
- lds-zero-initializer.ll
- lds_atomic_f32.ll
- legalize-fp-load-invariant.ll
- legalizedag-bug-expand-setcc.ll
- limit-coalesce.mir
- lit.local.cfg
- literals.ll
- liveness.mir
- llvm.amdgcn.alignb.ll
- llvm.amdgcn.atomic.dec.ll
- llvm.amdgcn.atomic.inc.ll
- llvm.amdgcn.buffer.atomic.ll
- llvm.amdgcn.buffer.load.format.d16.ll
- llvm.amdgcn.buffer.load.format.ll
- llvm.amdgcn.buffer.load.ll
- llvm.amdgcn.buffer.store.format.d16.ll
- llvm.amdgcn.buffer.store.format.ll
- llvm.amdgcn.buffer.store.ll
- llvm.amdgcn.buffer.wbinvl1.ll
- llvm.amdgcn.buffer.wbinvl1.sc.ll
- llvm.amdgcn.buffer.wbinvl1.vol.ll
- llvm.amdgcn.class.f16.ll
- llvm.amdgcn.class.ll
- llvm.amdgcn.cos.f16.ll
- llvm.amdgcn.cos.ll
- llvm.amdgcn.cubeid.ll
- llvm.amdgcn.cubema.ll
- llvm.amdgcn.cubesc.ll
- llvm.amdgcn.cubetc.ll
- llvm.amdgcn.cvt.pk.i16.ll
- llvm.amdgcn.cvt.pk.u16.ll
- llvm.amdgcn.cvt.pknorm.i16.ll
- llvm.amdgcn.cvt.pknorm.u16.ll
- llvm.amdgcn.cvt.pkrtz.ll
- llvm.amdgcn.dispatch.id.ll
- llvm.amdgcn.dispatch.ptr.ll
- llvm.amdgcn.div.fixup.f16.ll
- llvm.amdgcn.div.fixup.ll
- llvm.amdgcn.div.fmas.ll
- llvm.amdgcn.div.scale.ll
- llvm.amdgcn.ds.bpermute.ll
- llvm.amdgcn.ds.permute.ll
- llvm.amdgcn.ds.swizzle.ll
- llvm.amdgcn.exp.compr.ll
- llvm.amdgcn.exp.ll
- llvm.amdgcn.fcmp.ll
- llvm.amdgcn.fdiv.fast.ll
- llvm.amdgcn.fdot2.ll
- llvm.amdgcn.fmad.ftz.f16.ll
- llvm.amdgcn.fmad.ftz.ll
- llvm.amdgcn.fmed3.f16.ll
- llvm.amdgcn.fmed3.ll
- llvm.amdgcn.fmul.legacy.ll
- llvm.amdgcn.fract.f16.ll
- llvm.amdgcn.fract.ll
- llvm.amdgcn.frexp.exp.f16.ll
- llvm.amdgcn.frexp.exp.ll
- llvm.amdgcn.frexp.mant.f16.ll
- llvm.amdgcn.frexp.mant.ll
- llvm.amdgcn.groupstaticsize.ll
- llvm.amdgcn.icmp.ll
- llvm.amdgcn.image.atomic.dim.ll
- llvm.amdgcn.image.d16.dim.ll
- llvm.amdgcn.image.dim.ll
- llvm.amdgcn.image.gather4.d16.dim.ll
- llvm.amdgcn.image.gather4.dim.ll
- llvm.amdgcn.image.gather4.o.dim.ll
- llvm.amdgcn.image.getlod.dim.ll
- llvm.amdgcn.image.sample.d16.dim.ll
- llvm.amdgcn.image.sample.dim.ll
- llvm.amdgcn.image.sample.ltolz.ll
- llvm.amdgcn.image.sample.o.dim.ll
- llvm.amdgcn.implicit.buffer.ptr.hsa.ll
- llvm.amdgcn.implicit.buffer.ptr.ll
- llvm.amdgcn.implicitarg.ptr.ll
- llvm.amdgcn.init.exec.ll
- llvm.amdgcn.interp.ll
- llvm.amdgcn.kernarg.segment.ptr.ll
- llvm.amdgcn.kill.ll
- llvm.amdgcn.ldexp.f16.ll
- llvm.amdgcn.ldexp.ll
- llvm.amdgcn.lerp.ll
- llvm.amdgcn.log.clamp.ll
- llvm.amdgcn.mbcnt.ll
- llvm.amdgcn.mov.dpp.ll
- llvm.amdgcn.mqsad.pk.u16.u8.ll
- llvm.amdgcn.mqsad.u32.u8.ll
- llvm.amdgcn.msad.u8.ll
- llvm.amdgcn.ps.live.ll
- llvm.amdgcn.qsad.pk.u16.u8.ll
- llvm.amdgcn.queue.ptr.ll
- llvm.amdgcn.rcp.f16.ll
- llvm.amdgcn.rcp.legacy.ll
- llvm.amdgcn.rcp.ll
- llvm.amdgcn.readfirstlane.ll
- llvm.amdgcn.readlane.ll
- llvm.amdgcn.rsq.clamp.ll
- llvm.amdgcn.rsq.f16.ll
- llvm.amdgcn.rsq.legacy.ll
- llvm.amdgcn.rsq.ll
- llvm.amdgcn.s.barrier.ll
- llvm.amdgcn.s.dcache.inv.ll
- llvm.amdgcn.s.dcache.inv.vol.ll
- llvm.amdgcn.s.dcache.wb.ll
- llvm.amdgcn.s.dcache.wb.vol.ll
- llvm.amdgcn.s.decperflevel.ll
- llvm.amdgcn.s.getpc.ll
- llvm.amdgcn.s.getreg.ll
- llvm.amdgcn.s.incperflevel.ll
- llvm.amdgcn.s.memrealtime.ll
- llvm.amdgcn.s.memtime.ll
- llvm.amdgcn.s.sleep.ll
- llvm.amdgcn.s.waitcnt.ll
- llvm.amdgcn.sad.hi.u8.ll
- llvm.amdgcn.sad.u16.ll
- llvm.amdgcn.sad.u8.ll
- llvm.amdgcn.sbfe.ll
- llvm.amdgcn.sdot2.ll
- llvm.amdgcn.sdot4.ll
- llvm.amdgcn.sdot8.ll
- llvm.amdgcn.sendmsg.ll
- llvm.amdgcn.set.inactive.ll
- llvm.amdgcn.sffbh.ll
- llvm.amdgcn.sin.f16.ll
- llvm.amdgcn.sin.ll
- llvm.amdgcn.tbuffer.load.d16.ll
- llvm.amdgcn.tbuffer.load.ll
- llvm.amdgcn.tbuffer.store.d16.ll
- llvm.amdgcn.tbuffer.store.ll
- llvm.amdgcn.trig.preop.ll
- llvm.amdgcn.ubfe.ll
- llvm.amdgcn.udot2.ll
- llvm.amdgcn.udot4.ll
- llvm.amdgcn.udot8.ll
- llvm.amdgcn.unreachable.ll
- llvm.amdgcn.update.dpp.ll
- llvm.amdgcn.wave.barrier.ll
- llvm.amdgcn.workgroup.id.ll
- llvm.amdgcn.workitem.id.ll
- llvm.amdgcn.wqm.vote.ll
- llvm.amdgcn.writelane.ll
- llvm.AMDGPU.kill.ll
- llvm.ceil.f16.ll
- llvm.cos.f16.ll
- llvm.cos.ll
- llvm.dbg.value.ll
- llvm.exp2.f16.ll
- llvm.exp2.ll
- llvm.floor.f16.ll
- llvm.fma.f16.ll
- llvm.fmuladd.f16.ll
- llvm.log.f16.ll
- llvm.log.ll
- llvm.log10.f16.ll
- llvm.log10.ll
- llvm.log2.f16.ll
- llvm.log2.ll
- llvm.maxnum.f16.ll
- llvm.memcpy.ll
- llvm.minnum.f16.ll
- llvm.pow.ll
- llvm.r600.cube.ll
- llvm.r600.dot4.ll
- llvm.r600.group.barrier.ll
- llvm.r600.read.local.size.ll
- llvm.r600.recipsqrt.clamped.ll
- llvm.r600.recipsqrt.ieee.ll
- llvm.r600.tex.ll
- llvm.rint.f16.ll
- llvm.rint.f64.ll
- llvm.rint.ll
- llvm.round.f64.ll
- llvm.round.ll
- llvm.SI.load.dword.ll
- llvm.SI.tbuffer.store.ll
- llvm.sin.f16.ll
- llvm.sin.ll
- llvm.sqrt.f16.ll
- llvm.trunc.f16.ll
- load-constant-f32.ll
- load-constant-f64.ll
- load-constant-i1.ll
- load-constant-i16.ll
- load-constant-i32.ll
- load-constant-i64.ll
- load-constant-i8.ll
- load-global-f32.ll
- load-global-f64.ll
- load-global-i1.ll
- load-global-i16.ll
- load-global-i32.ll
- load-global-i64.ll
- load-global-i8.ll
- load-hi16.ll
- load-input-fold.ll
- load-lo16.ll
- load-local-f32-no-ds128.ll
- load-local-f32.ll
- load-local-f64.ll
- load-local-i1.ll
- load-local-i16.ll
- load-local-i32.ll
- load-local-i64.ll
- load-local-i8.ll
- load-private-double16-amdgiz.ll
- load-select-ptr.ll
- load-weird-sizes.ll
- local-64.ll
- local-atomics.ll
- local-atomics64.ll
- local-memory.amdgcn.ll
- local-memory.ll
- local-memory.r600.ll
- local-stack-slot-offset.ll
- loop-address.ll
- loop-idiom.ll
- loop_break.ll
- loop_exit_with_xor.ll
- lower-kernargs.ll
- lower-mem-intrinsics.ll
- lower-range-metadata-intrinsic-call.ll
- lshl64-to-32.ll
- lshr.v2i16.ll
- macro-fusion-cluster-vcc-uses.mir
- mad-combine.ll
- mad-mix-hi.ll
- mad-mix-lo.ll
- mad-mix.ll
- mad24-get-global-id.ll
- mad_64_32.ll
- mad_int24.ll
- mad_uint24.ll
- madak.ll
- madmk.ll
- max-literals.ll
- max.i16.ll
- max.ll
- max3.ll
- mem-builtins.ll
- memory-legalizer-atomic-cmpxchg.ll
- memory-legalizer-atomic-fence.ll
- memory-legalizer-atomic-insert-end.mir
- memory-legalizer-atomic-rmw.ll
- memory-legalizer-invalid-addrspace.mir
- memory-legalizer-invalid-syncscope.ll
- memory-legalizer-load.ll
- memory-legalizer-local.mir
- memory-legalizer-multiple-mem-operands-atomics.mir
- memory-legalizer-multiple-mem-operands-nontemporal-1.mir
- memory-legalizer-multiple-mem-operands-nontemporal-2.mir
- memory-legalizer-region.mir
- memory-legalizer-store-infinite-loop.ll
- memory-legalizer-store.ll
- memory_clause.ll
- memory_clause.mir
- merge-load-store-physreg.mir
- merge-load-store-vreg.mir
- merge-load-store.mir
- merge-m0.mir
- merge-store-crash.ll
- merge-store-usedef.ll
- merge-stores.ll
- mesa_regression.ll
- min.ll
- min3.ll
- misched-killflags.mir
- missing-store.ll
- move-addr64-rsrc-dead-subreg-writes.ll
- move-to-valu-atomicrmw.ll
- move-to-valu-worklist.ll
- movreld-bug.ll
- movrels-bug.mir
- mubuf-offset-private.ll
- mubuf-shader-vgpr.ll
- mubuf.ll
- mul.i16.ll
- mul.ll
- mul_int24.ll
- mul_uint24-amdgcn.ll
- mul_uint24-r600.ll
- multi-divergent-exit-region.ll
- multilevel-break.ll
- nested-calls.ll
- nested-loop-conditions.ll
- no-hsa-graphics-shaders.ll
- no-initializer-constant-addrspace.ll
- no-shrink-extloads.ll
- nop-data.ll
- not-scalarize-volatile-load.ll
- nullptr.ll
- omod.ll
- opencl-image-metadata.ll
- operand-folding.ll
- operand-spacing.ll
- opt-sgpr-to-vgpr-copy.mir
- optimize-if-exec-masking.mir
- or.ll
- over-max-lds-size.ll
- pack.v2f16.ll
- pack.v2i16.ll
- packed-op-sel.ll
- packetizer.ll
- parallelandifcollapse.ll
- parallelorifcollapse.ll
- partial-sgpr-to-vgpr-spills.ll
- partial-shift-shrink.ll
- partially-dead-super-register-immediate.ll
- perfhint.ll
- permute.ll
- pk_max_f16_literal.ll
- postra-norename.mir
- predicate-dp4.ll
- predicates.ll
- print-mir-custom-pseudo.ll
- private-access-no-objects.ll
- private-element-size.ll
- private-memory-atomics.ll
- private-memory-r600.ll
- promote-alloca-addrspacecast.ll
- promote-alloca-array-aggregate.ll
- promote-alloca-array-allocation.ll
- promote-alloca-bitcast-function.ll
- promote-alloca-calling-conv.ll
- promote-alloca-globals.ll
- promote-alloca-invariant-markers.ll
- promote-alloca-lifetime.ll
- promote-alloca-mem-intrinsics.ll
- promote-alloca-no-opts.ll
- promote-alloca-padding-size-estimate.ll
- promote-alloca-stored-pointer-value.ll
- promote-alloca-to-lds-icmp.ll
- promote-alloca-to-lds-phi.ll
- promote-alloca-to-lds-select.ll
- promote-alloca-unhandled-intrinsic.ll
- promote-alloca-volatile.ll
- pv-packing.ll
- pv.ll
- r600-constant-array-fixup.ll
- r600-encoding.ll
- r600-export-fix.ll
- r600-infinite-loop-bug-while-reorganizing-vector.ll
- r600-legalize-umax-bug.ll
- r600.alu-limits.ll
- r600.amdgpu-alias-analysis.ll
- r600.bitcast.ll
- r600.extract-lowbits.ll
- r600.func-alignment.ll
- r600.global_atomics.ll
- r600.private-memory.ll
- r600.work-item-intrinsics.ll
- r600cfg.ll
- rcp-pattern.ll
- rcp_iflag.ll
- read-register-invalid-subtarget.ll
- read-register-invalid-type-i32.ll
- read-register-invalid-type-i64.ll
- read_register.ll
- readcyclecounter.ll
- readlane_exec0.mir
- README
- reduce-build-vec-ext-to-ext-build-vec.ll
- reduce-load-width-alignment.ll
- reduce-saveexec.mir
- reduce-store-width-alignment.ll
- reduction.ll
- reg-coalescer-sched-crash.ll
- regcoal-subrange-join-seg.mir
- regcoal-subrange-join.mir
- regcoalesce-dbg.mir
- regcoalesce-prune.mir
- register-count-comments.ll
- rename-disconnected-bug.ll
- rename-independent-subregs-mac-operands.mir
- rename-independent-subregs.mir
- reorder-stores.ll
- reqd-work-group-size.ll
- ret.ll
- ret_jump.ll
- rewrite-out-arguments-address-space.ll
- rewrite-out-arguments.ll
- rotl.i64.ll
- rotl.ll
- rotr.i64.ll
- rotr.ll
- rsq.ll
- rv7x0_count3.ll
- s_addk_i32.ll
- s_movk_i32.ll
- s_mulk_i32.ll
- sad.ll
- saddo.ll
- salu-to-valu.ll
- sampler-resource-id.ll
- scalar-branch-missing-and-exec.ll
- scalar-store-cache-flush.mir
- scalar_to_vector.ll
- sched-crash-dbg-value.mir
- schedule-fs-loop-nested-if.ll
- schedule-fs-loop-nested.ll
- schedule-fs-loop.ll
- schedule-global-loads.ll
- schedule-if-2.ll
- schedule-if.ll
- schedule-ilp.ll
- schedule-kernel-arg-loads.ll
- schedule-regpressure-limit.ll
- schedule-regpressure-limit2.ll
- schedule-regpressure-limit3.ll
- schedule-regpressure.mir
- schedule-vs-if-nested-loop-failure.ll
- schedule-vs-if-nested-loop.ll
- scheduler-subrange-crash.ll
- scratch-buffer.ll
- scratch-simple.ll
- sdiv.ll
- sdivrem24.ll
- sdivrem64.ll
- sdwa-gfx9.mir
- sdwa-peephole-instr.mir
- sdwa-peephole.ll
- sdwa-preserve.mir
- sdwa-scalar-ops.mir
- sdwa-vop2-64bit.mir
- select-fabs-fneg-extract-legacy.ll
- select-fabs-fneg-extract.ll
- select-i1.ll
- select-opt.ll
- select-vectors.ll
- select.f16.ll
- select.ll
- select64.ll
- selectcc-cnd.ll
- selectcc-cnde-int.ll
- selectcc-icmp-select-float.ll
- selectcc-opt.ll
- selectcc.ll
- selected-stack-object.ll
- sendmsg-m0-hazard.mir
- set-dx10.ll
- setcc-equivalent.ll
- setcc-fneg-constant.ll
- setcc-opt.ll
- setcc-sext.ll
- setcc.ll
- setcc64.ll
- seto.ll
- setuo.ll
- sext-eliminate.ll
- sext-in-reg-failure-r600.ll
- sext-in-reg.ll
- sgpr-control-flow.ll
- sgpr-copy-duplicate-operand.ll
- sgpr-copy.ll
- sgpr-spill-wrong-stack-id.mir
- sgprcopies.ll
- shared-op-cycle.ll
- shift-and-i128-ubfe.ll
- shift-and-i64-ubfe.ll
- shift-i64-opts.ll
- shl-add-to-add-shl.ll
- shl.ll
- shl.v2i16.ll
- shl_add_constant.ll
- shl_add_ptr.ll
- shrink-add-sub-constant.ll
- shrink-carry.mir
- shrink-vop3-carry-out.mir
- si-annotate-cf-noloop.ll
- si-annotate-cf-unreachable.ll
- si-annotate-cf.ll
- si-annotate-cfg-loop-assert.ll
- si-fix-sgpr-copies.mir
- si-instr-info-correct-implicit-operands.ll
- si-lower-control-flow-kill.ll
- si-lower-control-flow-unreachable-block.ll
- si-lower-control-flow.mir
- si-scheduler.ll
- si-sgpr-spill.ll
- si-spill-cf.ll
- si-spill-sgpr-stack.ll
- si-triv-disjoint-mem-access.ll
- si-vector-hang.ll
- sibling-call.ll
- sign_extend.ll
- simplify-libcalls.ll
- simplifydemandedbits-recursion.ll
- sint_to_fp.f64.ll
- sint_to_fp.i64.ll
- sint_to_fp.ll
- sitofp.f16.ll
- skip-if-dead.ll
- smed3.ll
- sminmax.ll
- sminmax.v2i16.ll
- smrd-vccz-bug.ll
- smrd.ll
- sopk-compares.ll
- spill-alloc-sgpr-init-bug.ll
- spill-cfg-position.ll
- spill-csr-frame-ptr-reg-copy.ll
- spill-empty-live-interval.mir
- spill-m0.ll
- spill-offset-calculation.ll
- spill-scavenge-offset.ll
- spill-to-smem-m0.ll
- spill-wide-sgpr.ll
- split-scalar-i64-add.ll
- split-smrd.ll
- split-vector-memoperand-offsets.ll
- splitkit.mir
- sra.ll
- srem.ll
- srl.ll
- ssubo.ll
- stack-realign.ll
- stack-size-overflow.ll
- stack-slot-color-sgpr-vgpr-spills.mir
- store-barrier.ll
- store-global.ll
- store-hi16.ll
- store-local.ll
- store-private.ll
- store-v3i64.ll
- store-vector-ptrs.ll
- store-weird-sizes.ll
- store_typed.ll
- stress-calls.ll
- structurize.ll
- structurize1.ll
- sub.i16.ll
- sub.ll
- sub.v2i16.ll
- subreg-coalescer-crash.ll
- subreg-coalescer-undef-use.ll
- subreg-eliminate-dead.ll
- subreg-intervals.mir
- subreg-split-live-in-error.mir
- subreg_interference.mir
- swizzle-export.ll
- syncscopes.ll
- tail-call-cgp.ll
- target-cpu.ll
- tex-clause-antidep.ll
- texture-input-merge.ll
- trap.ll
- trunc-bitcast-vector.ll
- trunc-cmp-constant.ll
- trunc-combine.ll
- trunc-store-f64-to-f16.ll
- trunc-store-i1.ll
- trunc-store.ll
- trunc-vector-store-assertion-failure.ll
- trunc.ll
- tti-unroll-prefs.ll
- twoaddr-mad.mir
- uaddo.ll
- udiv.ll
- udivrem.ll
- udivrem24.ll
- udivrem64.ll
- uint_to_fp.f64.ll
- uint_to_fp.i64.ll
- uint_to_fp.ll
- uitofp.f16.ll
- umed3.ll
- unaligned-load-store.ll
- undefined-physreg-sgpr-spill.mir
- undefined-subreg-liverange.ll
- unhandled-loop-condition-assertion.ll
- uniform-branch-intrinsic-cond.ll
- uniform-cfg.ll
- uniform-crash.ll
- uniform-loop-inside-nonuniform.ll
- unify-metadata.ll
- unigine-liveness-crash.ll
- unknown-processor.ll
- unpack-half.ll
- unroll.ll
- unsupported-calls.ll
- unsupported-cc.ll
- urem.ll
- use-sgpr-multiple-times.ll
- usubo.ll
- v1i64-kernel-arg.ll
- v_cndmask.ll
- v_cvt_pk_u8_f32.ll
- v_mac.ll
- v_mac_f16.ll
- v_madak_f16.ll
- valu-i1.ll
- vccz-corrupt-bug-workaround.mir
- vector-alloca-addrspacecast.ll
- vector-alloca-atomic.ll
- vector-alloca.ll
- vector-extract-insert.ll
- vector-legalizer-divergence.ll
- vectorize-global-local.ll
- vertex-fetch-encoding.ll
- vgpr-spill-emergency-stack-slot-compute.ll
- vgpr-spill-emergency-stack-slot.ll
- vi-removed-intrinsics.ll
- vop-shrink-frame-index.mir
- vop-shrink-non-ssa.mir
- vop-shrink.ll
- vselect.ll
- vselect64.ll
- vtx-fetch-branch.ll
- vtx-schedule.ll
- wait.ll
- waitcnt-back-edge-loop.mir
- waitcnt-debug.mir
- waitcnt-flat.ll
- waitcnt-loop-single-basic-block.mir
- waitcnt-looptest.ll
- waitcnt-no-redundant.mir
- waitcnt-permute.mir
- waitcnt.mir
- wave_dispatch_regs.ll
- widen-smrd-loads.ll
- widen-vselect-and-mask.ll
- widen_extending_scalar_loads.ll
- wqm.ll
- wqm.mir
- write-register-vgpr-into-sgpr.ll
- write_register.ll
- wrong-transalu-pos-fix.ll
- xfail.r600.bitcast.ll
- xnor.ll
- xor.ll
- zero_extend.ll
- zext-i64-bit-operand.ll
- zext-lid.ll
inserted-wait-states.mir @release_70 — raw · history · blame
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 | # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
--- |
define amdgpu_kernel void @div_fmas() { ret void }
define amdgpu_kernel void @s_getreg() { ret void }
define amdgpu_kernel void @s_setreg() { ret void }
define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
define amdgpu_kernel void @readwrite_lane() { ret void }
define amdgpu_kernel void @rfe() { ret void }
define amdgpu_kernel void @s_mov_fed_b32() { ret void }
define amdgpu_kernel void @s_movrel() { ret void }
define amdgpu_kernel void @v_interp() { ret void }
define amdgpu_kernel void @dpp() { ret void }
define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
entry:
%A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
ret void
}
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 2}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
!6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !DIExpression()
!12 = !DILocation(line: 1, column: 30, scope: !6)
...
---
# GCN-LABEL: name: div_fmas
# GCN-LABEL: bb.0:
# GCN: S_MOV_B64
# GCN-NOT: S_NOP
# GCN: V_DIV_FMAS
# GCN-LABEL: bb.1:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.2:
# GCN: V_CMP_EQ_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
# GCN-LABEL: bb.3:
# GCN: V_DIV_SCALE_F32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_DIV_FMAS_F32
name: div_fmas
body: |
bb.0:
$vcc = S_MOV_B64 0
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $vcc = V_CMP_EQ_I32_e32 $vgpr1, $vgpr2, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.2
bb.2:
$vcc = V_CMP_EQ_I32_e64 $vgpr1, $vgpr2, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_BRANCH %bb.3
bb.3:
$vgpr4, $vcc = V_DIV_SCALE_F32 $vgpr1, $vgpr1, $vgpr3, implicit $exec
$vgpr0 = V_DIV_FMAS_F32 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $vcc, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_getreg
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG_IMM32
# GCN: S_NOP 0
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN: S_NOP 0
# GCN: S_GETREG
# GCN-LABEL: bb.3:
# GCN: S_SETREG
# GCN-NEXT: S_GETREG
name: s_getreg
body: |
bb.0:
S_SETREG_B32 $sgpr0, 1
$sgpr1 = S_GETREG_B32 1
S_BRANCH %bb.1
bb.1:
S_SETREG_IMM32_B32 0, 1
$sgpr1 = S_GETREG_B32 1
S_BRANCH %bb.2
bb.2:
S_SETREG_B32 $sgpr0, 1
$sgpr1 = S_MOV_B32 0
$sgpr2 = S_GETREG_B32 1
S_BRANCH %bb.3
bb.3:
S_SETREG_B32 $sgpr0, 0
$sgpr1 = S_GETREG_B32 1
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_setreg
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN: S_NOP 0
# VI: S_NOP 0
# GCN-NEXT: S_SETREG
# GCN-LABEL: bb.2:
# GCN: S_SETREG
# GCN-NEXT: S_SETREG
name: s_setreg
body: |
bb.0:
S_SETREG_B32 $sgpr0, 1
S_SETREG_B32 $sgpr1, 1
S_BRANCH %bb.1
bb.1:
S_SETREG_B32 $sgpr0, 64
S_SETREG_B32 $sgpr1, 128
S_BRANCH %bb.2
bb.2:
S_SETREG_B32 $sgpr0, 1
S_SETREG_B32 $sgpr1, 0
S_ENDPGM
...
...
---
# GCN-LABEL: name: vmem_gt_8dw_store
# GCN-LABEL: bb.0:
# GCN: BUFFER_STORE_DWORD_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX3_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_DWORDX4_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN-LABEL: bb.1:
# GCN: FLAT_STORE_DWORDX2
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX3
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_STORE_DWORDX4
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_CMPSWAP_X2
# CIVI: S_NOP
# GCN-NEXT: V_MOV_B32
# GCN: FLAT_ATOMIC_FCMPSWAP_X2
# CIVI: S_NOP
# GCN: V_MOV_B32
name: vmem_gt_8dw_store
body: |
bb.0:
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_BRANCH %bb.1
bb.1:
FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_CMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
FLAT_ATOMIC_FCMPSWAP_X2 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: readwrite_lane
# GCN-LABEL: bb.0:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_READLANE_B32
# GCN-LABEL: bb.1:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_WRITELANE_B32
# GCN-LABEL: bb.2:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_READLANE_B32
# GCN-LABEL: bb.3:
# GCN: V_ADD_I32
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: S_NOP
# GCN: V_WRITELANE_B32
name: readwrite_lane
body: |
bb.0:
$vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0
S_BRANCH %bb.1
bb.1:
$vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
S_BRANCH %bb.2
bb.2:
$vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo
S_BRANCH %bb.3
bb.3:
$vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
$vgpr4 = V_WRITELANE_B32 $sgpr4, $vcc_lo, $vgpr4
S_ENDPGM
...
...
---
# GCN-LABEL: name: rfe
# GCN-LABEL: bb.0:
# GCN: S_SETREG
# VI: S_NOP
# GCN-NEXT: S_RFE_B64
# GCN-LABEL: bb.1:
# GCN: S_SETREG
# GCN-NEXT: S_RFE_B64
name: rfe
body: |
bb.0:
S_SETREG_B32 $sgpr0, 3
S_RFE_B64 $sgpr2_sgpr3
S_BRANCH %bb.1
bb.1:
S_SETREG_B32 $sgpr0, 0
S_RFE_B64 $sgpr2_sgpr3
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_mov_fed_b32
# GCN-LABEL: bb.0:
# GCN: S_MOV_FED_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOV_B32
# GCN-LABEL: bb.1:
# GCN: S_MOV_FED_B32
# GFX9: S_NOP
# GCN-NEXT: V_MOV_B32
name: s_mov_fed_b32
body: |
bb.0:
$sgpr0 = S_MOV_FED_B32 $sgpr0
$sgpr0 = S_MOV_B32 $sgpr0
S_BRANCH %bb.1
bb.1:
$sgpr0 = S_MOV_FED_B32 $sgpr0
$vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: s_movrel
# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B32
# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELS_B64
# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B32
# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9: S_NOP
# GCN-NEXT: S_MOVRELD_B64
name: s_movrel
body: |
bb.0:
$m0 = S_MOV_B32 0
$sgpr0 = S_MOVRELS_B32 $sgpr0, implicit $m0
S_BRANCH %bb.1
bb.1:
$m0 = S_MOV_B32 0
$sgpr0_sgpr1 = S_MOVRELS_B64 $sgpr0_sgpr1, implicit $m0
S_BRANCH %bb.2
bb.2:
$m0 = S_MOV_B32 0
$sgpr0 = S_MOVRELD_B32 $sgpr0, implicit $m0
S_BRANCH %bb.3
bb.3:
$m0 = S_MOV_B32 0
$sgpr0_sgpr1 = S_MOVRELD_B64 $sgpr0_sgpr1, implicit $m0
S_ENDPGM
...
...
---
# GCN-LABEL: name: v_interp
# GCN-LABEL: bb.0:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32
# GCN-LABEL: bb.1:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P2_F32
# GCN-LABEL: bb.2:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_P1_F32_16bank
# GCN-LABEL: bb.3:
# GCN: S_MOV_B32
# GFX9-NEXT: S_NOP
# GCN-NEXT: V_INTERP_MOV_F32
name: v_interp
body: |
bb.0:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.1
bb.1:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P2_F32 $vgpr0, $vgpr1, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.2
bb.2:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_P1_F32_16bank $vgpr0, 0, 0, implicit $m0, implicit $exec
S_BRANCH %bb.3
bb.3:
$m0 = S_MOV_B32 0
$vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $m0, implicit $exec
S_ENDPGM
...
...
---
# GCN-LABEL: name: dpp
# VI-LABEL: bb.0:
# VI: V_MOV_B32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: V_MOV_B32_dpp
# VI-LABEL: bb.1:
# VI: V_CMPX_EQ_I32_e32
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: S_NOP 0
# VI-NEXT: V_MOV_B32_dpp
name: dpp
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1 = V_MOV_B32_dpp $vgpr1, $vgpr0, 0, 15, 15, 0, implicit $exec
S_BRANCH %bb.1
bb.1:
implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
S_ENDPGM
...
---
name: mov_fed_hazard_crash_on_dbg_value
alignment: 0
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '$sgpr4_sgpr5' }
- { reg: '$sgpr6_sgpr7' }
- { reg: '$sgpr9' }
- { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 16
offsetAdjustment: 0
maxAlignment: 8
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
stack:
- { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
- { id: 1, offset: 8, size: 4, alignment: 4 }
body: |
bb.0.entry:
liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3
$flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
DBG_VALUE $noreg, 2, !5, !11, debug-location !12
$sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
$sgpr8 = S_MOV_B32 $sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
$sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
S_ENDPGM
...
|