llvm.org GIT mirror llvm / 13240bd
Merging r323907 and r323913: ------------------------------------------------------------------------ r323907 | mareko | 2018-01-31 21:17:52 +0100 (Wed, 31 Jan 2018) | 11 lines [SeparateConstOffsetFromGEP] Preserve metadata when splitting GEPs Summary: !amdgpu.uniform needs to be preserved for AMDGPU, otherwise bad things happen. Reviewers: arsenm, nhaehnle, jingyue, broune, majnemer, bjarke.roune, dblaikie Subscribers: wdng, tpr, llvm-commits Differential Revision: https://reviews.llvm.org/D42744 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r323913 | mareko | 2018-01-31 21:49:19 +0100 (Wed, 31 Jan 2018) | 1 line [SeparateConstOffsetFromGEP] Fix up addrspace in the AMDGPU test ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@324088 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 7 months ago
2 changed file(s) with 47 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
10701070 NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
10711071 ConstantInt::get(IntPtrTy, Index, true),
10721072 GEP->getName(), GEP);
1073 NewGEP->copyMetadata(*GEP);
10731074 // Inherit the inbounds attribute of the original GEP.
10741075 cast(NewGEP)->setIsInBounds(GEPWasInBounds);
10751076 } else {
10941095 Type::getInt8Ty(GEP->getContext()), NewGEP,
10951096 ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
10961097 GEP);
1098 NewGEP->copyMetadata(*GEP);
10971099 // Inherit the inbounds attribute of the original GEP.
10981100 cast(NewGEP)->setIsInBounds(GEPWasInBounds);
10991101 if (GEP->getType() != I8PtrTy)
9191 store float %tmp21, float addrspace(1)* %output, align 4
9292 ret void
9393 }
94
95 ; IR-LABEL: @keep_metadata(
96 ; IR: getelementptr {{.*}} !amdgpu.uniform
97 ; IR: getelementptr {{.*}} !amdgpu.uniform
98 ; IR: getelementptr {{.*}} !amdgpu.uniform
99 define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @keep_metadata([0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
100 main_body:
101 %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
102 %23 = bitcast float %22 to i32
103 %24 = shl i32 %23, 1
104 %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(2)* %1, i32 0, i32 %24, !amdgpu.uniform !0
105 %26 = load <8 x i32>, <8 x i32> addrspace(2)* %25, align 32, !invariant.load !0
106 %27 = shl i32 %23, 2
107 %28 = or i32 %27, 3
108 %29 = bitcast [0 x <8 x i32>] addrspace(2)* %1 to [0 x <4 x i32>] addrspace(2)*
109 %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %29, i32 0, i32 %28, !amdgpu.uniform !0
110 %31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !invariant.load !0
111 %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
112 %33 = extractelement <4 x float> %32, i32 0
113 %34 = extractelement <4 x float> %32, i32 1
114 %35 = extractelement <4 x float> %32, i32 2
115 %36 = extractelement <4 x float> %32, i32 3
116 %37 = bitcast float %4 to i32
117 %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
118 %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
119 %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
120 %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
121 %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
122 %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
123 ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
124 }
125
126 ; Function Attrs: nounwind readnone speculatable
127 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
128
129 ; Function Attrs: nounwind readonly
130 declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
131
132
133 !0 = !{}
134
135 attributes #5 = { "InitialPSInputAddr"="45175" }
136 attributes #6 = { nounwind readnone speculatable }
137 attributes #7 = { nounwind readonly }
138 attributes #8 = { nounwind readnone }