llvm.org GIT mirror llvm / ffac88a
AMDGPU: Fix converting unanalyzable global loads to SMRD Not all memory dependence queries succeed, so this needs to be conservative if it fails. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@307861 91177308-0d34-0410-b5e6-96231b3b80d8 Matt Arsenault 3 years ago
2 changed file(s) with 25 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
107107 DFS(Start, Checklist);
108108 for (auto &BB : Checklist) {
109109 BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
110 BasicBlock::iterator(Load) : BB->end();
111 if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
112 true, StartIt, BB, Load).isClobber())
113 return true;
110 BasicBlock::iterator(Load) : BB->end();
111 auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
112 StartIt, BB, Load);
113 if (Q.isClobber() || Q.isUnknown())
114 return true;
114115 }
115116 return false;
116117 }
0 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -memdep-block-scan-limit=1 -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
1
2 ; GCN-LABEL: {{^}}unknown_memdep_analysis:
3 ; GCN: flat_load_dword
4 ; GCN: flat_load_dword
5 ; GCN: flat_store_dword
6 define amdgpu_kernel void @unknown_memdep_analysis(float addrspace(1)* nocapture readonly %arg) #0 {
7 bb:
8 %tmp53 = load float, float addrspace(1)* undef, align 4
9 %tmp54 = getelementptr inbounds float, float addrspace(1)* %arg, i32 31
10 %tmp55 = load float, float addrspace(1)* %tmp54, align 4
11 %tmp56 = tail call float @llvm.fmuladd.f32(float undef, float %tmp53, float %tmp55)
12 store float %tmp56, float addrspace(1)* undef, align 4
13 ret void
14 }
15
16 declare float @llvm.fmuladd.f32(float, float, float) #1
17
18 attributes #0 = { nounwind }
19 attributes #1 = { nounwind readnone speculatable }