llvm.org GIT mirror llvm / 82fbd87
[X86] Don't consider functions ABI compatible for ArgumentPromotion pass if they view 512-bit vectors differently. The use of the -mprefer-vector-width=256 command line option mixed with functions using vector intrinsics can create situations where one function thinks 512 vectors are legal, but another fucntion does not. If a 512 bit vector is passed between them via a pointer, its possible ArgumentPromotion might try to pass by value instead. This will result in type legalization for the two functions handling the 512 bit vector differently leading to runtime failures. Had the 512 bit vector been passed by value from clang codegen, both functions would have been tagged with a min-legal-vector-width=512 function attribute. That would make them be legalized the same way. I observed this issue in 32-bit mode where a union containing a 512 bit vector was being passed by a function that used intrinsics to one that did not. The caller ended up passing in zmm0 and the callee tried to read it from ymm0 and ymm1. The fix implemented here is just to consider it a mismatch if two functions would handle 512 bit differently without looking at the types that are being considered. This is the easist and safest fix, but it can be improved in the future. Differential Revision: https://reviews.llvm.org/D58390 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354376 91177308-0d34-0410-b5e6-96231b3b80d8 Craig Topper 6 months ago
3 changed file(s) with 203 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
30693069 return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
30703070 }
30713071
3072 bool X86TTIImpl::areFunctionArgsABICompatible(
3073 const Function *Caller, const Function *Callee,
3074 SmallPtrSetImpl &Args) const {
3075 if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
3076 return false;
3077
3078 // If we get here, we know the target features match. If one function
3079 // considers 512-bit vectors legal and the other does not, consider them
3080 // incompatible.
3081 // FIXME Look at the arguments and only consider 512 bit or larger vectors?
3082 const TargetMachine &TM = getTLI()->getTargetMachine();
3083
3084 return TM.getSubtarget(*Caller).useAVX512Regs() ==
3085 TM.getSubtarget(*Callee).useAVX512Regs();
3086 }
3087
30723088 const X86TTIImpl::TTI::MemCmpExpansionOptions *
30733089 X86TTIImpl::enableMemCmpExpansion(bool IsZeroCmp) const {
30743090 // Only enable vector loads for equality comparison.
188188 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
189189 bool areInlineCompatible(const Function *Caller,
190190 const Function *Callee) const;
191 bool areFunctionArgsABICompatible(const Function *Caller,
192 const Function *Callee,
193 SmallPtrSetImpl &Args) const;
191194 const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
192195 bool IsZeroCmp) const;
193196 bool enableInterleavedAccessVectorization();
0 ; RUN: opt -S -argpromotion < %s | FileCheck %s
1 ; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
2 ; Test that we only promote arguments when the caller/callee have compatible
3 ; function attrubtes.
4
5 target triple = "x86_64-unknown-linux-gnu"
6
7 ; This should promote
8 ; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
9 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
10 bb:
11 %tmp = load <8 x i64>, <8 x i64>* %arg1
12 store <8 x i64> %tmp, <8 x i64>* %arg
13 ret void
14 }
15
16 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 {
17 bb:
18 %tmp = alloca <8 x i64>, align 32
19 %tmp2 = alloca <8 x i64>, align 32
20 %tmp3 = bitcast <8 x i64>* %tmp to i8*
21 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
22 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
23 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
24 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
25 ret void
26 }
27
28 ; This should promote
29 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
30 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
31 bb:
32 %tmp = load <8 x i64>, <8 x i64>* %arg1
33 store <8 x i64> %tmp, <8 x i64>* %arg
34 ret void
35 }
36
37 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 {
38 bb:
39 %tmp = alloca <8 x i64>, align 32
40 %tmp2 = alloca <8 x i64>, align 32
41 %tmp3 = bitcast <8 x i64>* %tmp to i8*
42 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
43 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
44 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
45 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
46 ret void
47 }
48
49 ; This should promote
50 ; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
51 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
52 bb:
53 %tmp = load <8 x i64>, <8 x i64>* %arg1
54 store <8 x i64> %tmp, <8 x i64>* %arg
55 ret void
56 }
57
58 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 {
59 bb:
60 %tmp = alloca <8 x i64>, align 32
61 %tmp2 = alloca <8 x i64>, align 32
62 %tmp3 = bitcast <8 x i64>* %tmp to i8*
63 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
64 call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
65 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
66 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
67 ret void
68 }
69
70 ; This should promote
71 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val)
72 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
73 bb:
74 %tmp = load <8 x i64>, <8 x i64>* %arg1
75 store <8 x i64> %tmp, <8 x i64>* %arg
76 ret void
77 }
78
79 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 {
80 bb:
81 %tmp = alloca <8 x i64>, align 32
82 %tmp2 = alloca <8 x i64>, align 32
83 %tmp3 = bitcast <8 x i64>* %tmp to i8*
84 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
85 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp)
86 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
87 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
88 ret void
89 }
90
91 ; This should not promote
92 ; CHECK-LABEL: @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
93 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
94 bb:
95 %tmp = load <8 x i64>, <8 x i64>* %arg1
96 store <8 x i64> %tmp, <8 x i64>* %arg
97 ret void
98 }
99
100 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 {
101 bb:
102 %tmp = alloca <8 x i64>, align 32
103 %tmp2 = alloca <8 x i64>, align 32
104 %tmp3 = bitcast <8 x i64>* %tmp to i8*
105 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
106 call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
107 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
108 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
109 ret void
110 }
111
112 ; This should not promote
113 ; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1)
114 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 {
115 bb:
116 %tmp = load <8 x i64>, <8 x i64>* %arg1
117 store <8 x i64> %tmp, <8 x i64>* %arg
118 ret void
119 }
120
121 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 {
122 bb:
123 %tmp = alloca <8 x i64>, align 32
124 %tmp2 = alloca <8 x i64>, align 32
125 %tmp3 = bitcast <8 x i64>* %tmp to i8*
126 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
127 call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
128 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
129 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
130 ret void
131 }
132
133 ; This should promote
134 ; CHECK-LABEL: @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
135 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
136 bb:
137 %tmp = load <8 x i64>, <8 x i64>* %arg1
138 store <8 x i64> %tmp, <8 x i64>* %arg
139 ret void
140 }
141
142 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 {
143 bb:
144 %tmp = alloca <8 x i64>, align 32
145 %tmp2 = alloca <8 x i64>, align 32
146 %tmp3 = bitcast <8 x i64>* %tmp to i8*
147 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
148 call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
149 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
150 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
151 ret void
152 }
153
154 ; This should promote
155 ; CHECK-LABEL: @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val)
156 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
157 bb:
158 %tmp = load <8 x i64>, <8 x i64>* %arg1
159 store <8 x i64> %tmp, <8 x i64>* %arg
160 ret void
161 }
162
163 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 {
164 bb:
165 %tmp = alloca <8 x i64>, align 32
166 %tmp2 = alloca <8 x i64>, align 32
167 %tmp3 = bitcast <8 x i64>* %tmp to i8*
168 call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
169 call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp)
170 %tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32
171 store <8 x i64> %tmp4, <8 x i64>* %arg, align 2
172 ret void
173 }
174
175 ; Function Attrs: argmemonly nounwind
176 declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5
177
178 attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" }
179 attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
180 attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
181 attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" }
182 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
183 attributes #5 = { argmemonly nounwind }