llvm.org GIT mirror llvm / 8ffc96a
Allow vectorization of intrinsics such as powi,cttz and ctlz in Loop and SLP Vectorizer. This patch adds support to vectorize intrinsics such as powi, cttz and ctlz in Vectorizer. These intrinsics are different from other intrinsics as second argument to these function must be same in order to vectorize them and it should be represented as a scalar. Review: http://reviews.llvm.org/D3851#inline-32769 and http://reviews.llvm.org/D3937#inline-32857 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209873 91177308-0d34-0410-b5e6-96231b3b80d8 Karthik Bhat 5 years ago
5 changed file(s) with 426 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
4747 case Intrinsic::pow:
4848 case Intrinsic::fma:
4949 case Intrinsic::fmuladd:
50 case Intrinsic::ctlz:
51 case Intrinsic::cttz:
52 case Intrinsic::powi:
5053 return true;
5154 default:
5255 return false;
56 }
57 }
58
59 static bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
60 unsigned ScalarOpdIdx) {
61 switch (ID) {
62 case Intrinsic::ctlz:
63 case Intrinsic::cttz:
64 case Intrinsic::powi:
65 return (ScalarOpdIdx == 1);
66 default:
67 return false;
5368 }
5469 }
5570
31223122 scalarizeInstruction(it);
31233123 break;
31243124 default:
3125 bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
31253126 for (unsigned Part = 0; Part < UF; ++Part) {
31263127 SmallVector Args;
31273128 for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
3129 if (HasScalarOpd && i == 1) {
3130 Args.push_back(CI->getArgOperand(i));
3131 continue;
3132 }
31283133 VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
31293134 Args.push_back(Arg[Part]);
31303135 }
34733478 return false;
34743479 }
34753480
3481 // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
3482 // second argument is the same (i.e. loop invariant)
3483 if (CI &&
3484 hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
3485 if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
3486 DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
3487 return false;
3488 }
3489 }
3490
34763491 // Check that the instruction return type is vectorizable.
34773492 // Also, we can't vectorize extractelement instructions.
34783493 if ((!VectorType::isValidElementType(it->getType()) &&
960960 DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
961961 return;
962962 }
963
964963 Function *Int = CI->getCalledFunction();
965
964 Value *A1I = nullptr;
965 if (hasVectorInstrinsicScalarOpd(ID, 1))
966 A1I = CI->getArgOperand(1);
966967 for (unsigned i = 1, e = VL.size(); i != e; ++i) {
967968 CallInst *CI2 = dyn_cast(VL[i]);
968969 if (!CI2 || CI2->getCalledFunction() != Int ||
971972 DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
972973 << "\n");
973974 return;
975 }
976 // ctlz,cttz and powi are special intrinsics whose second argument
977 // should be same in order for them to be vectorized.
978 if (hasVectorInstrinsicScalarOpd(ID, 1)) {
979 Value *A1J = CI2->getArgOperand(1);
980 if (A1I != A1J) {
981 newTreeEntry(VL, false);
982 DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
983 << " argument "<< A1I<<"!=" << A1J
984 << "\n");
985 return;
986 }
974987 }
975988 }
976989
16511664 case Instruction::Call: {
16521665 CallInst *CI = cast(VL0);
16531666 setInsertPointAfterBundle(E->Scalars);
1667 Function *FI;
1668 Intrinsic::ID IID = Intrinsic::not_intrinsic;
1669 if (CI && (FI = CI->getCalledFunction())) {
1670 IID = (Intrinsic::ID) FI->getIntrinsicID();
1671 }
16541672 std::vector OpVecs;
16551673 for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
16561674 ValueList OpVL;
1675 // ctlz,cttz and powi are special intrinsics whose second argument is
1676 // a scalar. This argument should not be vectorized.
1677 if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
1678 CallInst *CEI = cast(E->Scalars[0]);
1679 OpVecs.push_back(CEI->getArgOperand(j));
1680 continue;
1681 }
16571682 for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
16581683 CallInst *CEI = cast(E->Scalars[i]);
16591684 OpVL.push_back(CEI->getArgOperand(j));
10891089 ret void
10901090 }
10911091
1092 declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
1093
1094 ;CHECK-LABEL: @powi_f64(
1095 ;CHECK: llvm.powi.v4f64
1096 ;CHECK: ret void
1097 define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
1098 entry:
1099 %cmp9 = icmp sgt i32 %n, 0
1100 br i1 %cmp9, label %for.body, label %for.end
1101
1102 for.body: ; preds = %entry, %for.body
1103 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1104 %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
1105 %0 = load double* %arrayidx, align 8
1106 %call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone
1107 %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
1108 store double %call, double* %arrayidx4, align 8
1109 %indvars.iv.next = add i64 %indvars.iv, 1
1110 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1111 %exitcond = icmp eq i32 %lftr.wideiv, %n
1112 br i1 %exitcond, label %for.end, label %for.body
1113
1114 for.end: ; preds = %for.body, %entry
1115 ret void
1116 }
1117
1118 ;CHECK-LABEL: @powi_f64_neg(
1119 ;CHECK-NOT: llvm.powi.v4f64
1120 ;CHECK: ret void
1121 define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
1122 entry:
1123 %cmp9 = icmp sgt i32 %n, 0
1124 br i1 %cmp9, label %for.body, label %for.end
1125
1126 for.body: ; preds = %entry, %for.body
1127 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1128 %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
1129 %0 = load double* %arrayidx, align 8
1130 %1 = trunc i64 %indvars.iv to i32
1131 %call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone
1132 %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
1133 store double %call, double* %arrayidx4, align 8
1134 %indvars.iv.next = add i64 %indvars.iv, 1
1135 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1136 %exitcond = icmp eq i32 %lftr.wideiv, %n
1137 br i1 %exitcond, label %for.end, label %for.body
1138
1139 for.end: ; preds = %for.body, %entry
1140 ret void
1141 }
1142
1143 declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone
1144
1145 ;CHECK-LABEL: @cttz_f64(
1146 ;CHECK: llvm.cttz.v4i64
1147 ;CHECK: ret void
1148 define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1149 entry:
1150 %cmp9 = icmp sgt i32 %n, 0
1151 br i1 %cmp9, label %for.body, label %for.end
1152
1153 for.body: ; preds = %entry, %for.body
1154 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1155 %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
1156 %0 = load i64* %arrayidx, align 8
1157 %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
1158 %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
1159 store i64 %call, i64* %arrayidx4, align 8
1160 %indvars.iv.next = add i64 %indvars.iv, 1
1161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1162 %exitcond = icmp eq i32 %lftr.wideiv, %n
1163 br i1 %exitcond, label %for.end, label %for.body
1164
1165 for.end: ; preds = %for.body, %entry
1166 ret void
1167 }
1168
1169 declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone
1170
1171 ;CHECK-LABEL: @ctlz_f64(
1172 ;CHECK: llvm.ctlz.v4i64
1173 ;CHECK: ret void
1174 define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
1175 entry:
1176 %cmp9 = icmp sgt i32 %n, 0
1177 br i1 %cmp9, label %for.body, label %for.end
1178
1179 for.body: ; preds = %entry, %for.body
1180 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1181 %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
1182 %0 = load i64* %arrayidx, align 8
1183 %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
1184 %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
1185 store i64 %call, i64* %arrayidx4, align 8
1186 %indvars.iv.next = add i64 %indvars.iv, 1
1187 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1188 %exitcond = icmp eq i32 %lftr.wideiv, %n
1189 br i1 %exitcond, label %for.end, label %for.body
1190
1191 for.end: ; preds = %for.body, %entry
1192 ret void
1193 }
116116 ; CHECK: store <4 x i32>
117117 ; CHECK: ret
118118 }
119
120 declare i32 @llvm.ctlz.i32(i32,i1) nounwind readnone
121
122 define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
123 entry:
124 %i0 = load i32* %a, align 4
125 %i1 = load i32* %b, align 4
126 %add1 = add i32 %i0, %i1
127 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
128
129 %arrayidx2 = getelementptr inbounds i32* %a, i32 1
130 %i2 = load i32* %arrayidx2, align 4
131 %arrayidx3 = getelementptr inbounds i32* %b, i32 1
132 %i3 = load i32* %arrayidx3, align 4
133 %add2 = add i32 %i2, %i3
134 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
135
136 %arrayidx4 = getelementptr inbounds i32* %a, i32 2
137 %i4 = load i32* %arrayidx4, align 4
138 %arrayidx5 = getelementptr inbounds i32* %b, i32 2
139 %i5 = load i32* %arrayidx5, align 4
140 %add3 = add i32 %i4, %i5
141 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
142
143 %arrayidx6 = getelementptr inbounds i32* %a, i32 3
144 %i6 = load i32* %arrayidx6, align 4
145 %arrayidx7 = getelementptr inbounds i32* %b, i32 3
146 %i7 = load i32* %arrayidx7, align 4
147 %add4 = add i32 %i6, %i7
148 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
149
150 store i32 %call1, i32* %c, align 4
151 %arrayidx8 = getelementptr inbounds i32* %c, i32 1
152 store i32 %call2, i32* %arrayidx8, align 4
153 %arrayidx9 = getelementptr inbounds i32* %c, i32 2
154 store i32 %call3, i32* %arrayidx9, align 4
155 %arrayidx10 = getelementptr inbounds i32* %c, i32 3
156 store i32 %call4, i32* %arrayidx10, align 4
157 ret void
158
159 ; CHECK-LABEL: @vec_ctlz_i32(
160 ; CHECK: load <4 x i32>
161 ; CHECK: load <4 x i32>
162 ; CHECK: call <4 x i32> @llvm.ctlz.v4i32
163 ; CHECK: store <4 x i32>
164 ; CHECK: ret
165 }
166
167 define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
168 entry:
169 %i0 = load i32* %a, align 4
170 %i1 = load i32* %b, align 4
171 %add1 = add i32 %i0, %i1
172 %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
173
174 %arrayidx2 = getelementptr inbounds i32* %a, i32 1
175 %i2 = load i32* %arrayidx2, align 4
176 %arrayidx3 = getelementptr inbounds i32* %b, i32 1
177 %i3 = load i32* %arrayidx3, align 4
178 %add2 = add i32 %i2, %i3
179 %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
180
181 %arrayidx4 = getelementptr inbounds i32* %a, i32 2
182 %i4 = load i32* %arrayidx4, align 4
183 %arrayidx5 = getelementptr inbounds i32* %b, i32 2
184 %i5 = load i32* %arrayidx5, align 4
185 %add3 = add i32 %i4, %i5
186 %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
187
188 %arrayidx6 = getelementptr inbounds i32* %a, i32 3
189 %i6 = load i32* %arrayidx6, align 4
190 %arrayidx7 = getelementptr inbounds i32* %b, i32 3
191 %i7 = load i32* %arrayidx7, align 4
192 %add4 = add i32 %i6, %i7
193 %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
194
195 store i32 %call1, i32* %c, align 4
196 %arrayidx8 = getelementptr inbounds i32* %c, i32 1
197 store i32 %call2, i32* %arrayidx8, align 4
198 %arrayidx9 = getelementptr inbounds i32* %c, i32 2
199 store i32 %call3, i32* %arrayidx9, align 4
200 %arrayidx10 = getelementptr inbounds i32* %c, i32 3
201 store i32 %call4, i32* %arrayidx10, align 4
202 ret void
203
204 ; CHECK-LABEL: @vec_ctlz_i32_neg(
205 ; CHECK-NOT: call <4 x i32> @llvm.ctlz.v4i32
206
207 }
208
209
210 declare i32 @llvm.cttz.i32(i32,i1) nounwind readnone
211
212 define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
213 entry:
214 %i0 = load i32* %a, align 4
215 %i1 = load i32* %b, align 4
216 %add1 = add i32 %i0, %i1
217 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
218
219 %arrayidx2 = getelementptr inbounds i32* %a, i32 1
220 %i2 = load i32* %arrayidx2, align 4
221 %arrayidx3 = getelementptr inbounds i32* %b, i32 1
222 %i3 = load i32* %arrayidx3, align 4
223 %add2 = add i32 %i2, %i3
224 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
225
226 %arrayidx4 = getelementptr inbounds i32* %a, i32 2
227 %i4 = load i32* %arrayidx4, align 4
228 %arrayidx5 = getelementptr inbounds i32* %b, i32 2
229 %i5 = load i32* %arrayidx5, align 4
230 %add3 = add i32 %i4, %i5
231 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
232
233 %arrayidx6 = getelementptr inbounds i32* %a, i32 3
234 %i6 = load i32* %arrayidx6, align 4
235 %arrayidx7 = getelementptr inbounds i32* %b, i32 3
236 %i7 = load i32* %arrayidx7, align 4
237 %add4 = add i32 %i6, %i7
238 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
239
240 store i32 %call1, i32* %c, align 4
241 %arrayidx8 = getelementptr inbounds i32* %c, i32 1
242 store i32 %call2, i32* %arrayidx8, align 4
243 %arrayidx9 = getelementptr inbounds i32* %c, i32 2
244 store i32 %call3, i32* %arrayidx9, align 4
245 %arrayidx10 = getelementptr inbounds i32* %c, i32 3
246 store i32 %call4, i32* %arrayidx10, align 4
247 ret void
248
249 ; CHECK-LABEL: @vec_cttz_i32(
250 ; CHECK: load <4 x i32>
251 ; CHECK: load <4 x i32>
252 ; CHECK: call <4 x i32> @llvm.cttz.v4i32
253 ; CHECK: store <4 x i32>
254 ; CHECK: ret
255 }
256
257 define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
258 entry:
259 %i0 = load i32* %a, align 4
260 %i1 = load i32* %b, align 4
261 %add1 = add i32 %i0, %i1
262 %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
263
264 %arrayidx2 = getelementptr inbounds i32* %a, i32 1
265 %i2 = load i32* %arrayidx2, align 4
266 %arrayidx3 = getelementptr inbounds i32* %b, i32 1
267 %i3 = load i32* %arrayidx3, align 4
268 %add2 = add i32 %i2, %i3
269 %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
270
271 %arrayidx4 = getelementptr inbounds i32* %a, i32 2
272 %i4 = load i32* %arrayidx4, align 4
273 %arrayidx5 = getelementptr inbounds i32* %b, i32 2
274 %i5 = load i32* %arrayidx5, align 4
275 %add3 = add i32 %i4, %i5
276 %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
277
278 %arrayidx6 = getelementptr inbounds i32* %a, i32 3
279 %i6 = load i32* %arrayidx6, align 4
280 %arrayidx7 = getelementptr inbounds i32* %b, i32 3
281 %i7 = load i32* %arrayidx7, align 4
282 %add4 = add i32 %i6, %i7
283 %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
284
285 store i32 %call1, i32* %c, align 4
286 %arrayidx8 = getelementptr inbounds i32* %c, i32 1
287 store i32 %call2, i32* %arrayidx8, align 4
288 %arrayidx9 = getelementptr inbounds i32* %c, i32 2
289 store i32 %call3, i32* %arrayidx9, align 4
290 %arrayidx10 = getelementptr inbounds i32* %c, i32 3
291 store i32 %call4, i32* %arrayidx10, align 4
292 ret void
293
294 ; CHECK-LABEL: @vec_cttz_i32_neg(
295 ; CHECK-NOT: call <4 x i32> @llvm.cttz.v4i32
296 }
297
298
299 declare float @llvm.powi.f32(float, i32)
300 define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
301 entry:
302 %i0 = load float* %a, align 4
303 %i1 = load float* %b, align 4
304 %add1 = fadd float %i0, %i1
305 %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
306
307 %arrayidx2 = getelementptr inbounds float* %a, i32 1
308 %i2 = load float* %arrayidx2, align 4
309 %arrayidx3 = getelementptr inbounds float* %b, i32 1
310 %i3 = load float* %arrayidx3, align 4
311 %add2 = fadd float %i2, %i3
312 %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
313
314 %arrayidx4 = getelementptr inbounds float* %a, i32 2
315 %i4 = load float* %arrayidx4, align 4
316 %arrayidx5 = getelementptr inbounds float* %b, i32 2
317 %i5 = load float* %arrayidx5, align 4
318 %add3 = fadd float %i4, %i5
319 %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
320
321 %arrayidx6 = getelementptr inbounds float* %a, i32 3
322 %i6 = load float* %arrayidx6, align 4
323 %arrayidx7 = getelementptr inbounds float* %b, i32 3
324 %i7 = load float* %arrayidx7, align 4
325 %add4 = fadd float %i6, %i7
326 %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
327
328 store float %call1, float* %c, align 4
329 %arrayidx8 = getelementptr inbounds float* %c, i32 1
330 store float %call2, float* %arrayidx8, align 4
331 %arrayidx9 = getelementptr inbounds float* %c, i32 2
332 store float %call3, float* %arrayidx9, align 4
333 %arrayidx10 = getelementptr inbounds float* %c, i32 3
334 store float %call4, float* %arrayidx10, align 4
335 ret void
336
337 ; CHECK-LABEL: @vec_powi_f32(
338 ; CHECK: load <4 x float>
339 ; CHECK: load <4 x float>
340 ; CHECK: call <4 x float> @llvm.powi.v4f32
341 ; CHECK: store <4 x float>
342 ; CHECK: ret
343 }
344
345
346 define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
347 entry:
348 %i0 = load float* %a, align 4
349 %i1 = load float* %b, align 4
350 %add1 = fadd float %i0, %i1
351 %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
352
353 %arrayidx2 = getelementptr inbounds float* %a, i32 1
354 %i2 = load float* %arrayidx2, align 4
355 %arrayidx3 = getelementptr inbounds float* %b, i32 1
356 %i3 = load float* %arrayidx3, align 4
357 %add2 = fadd float %i2, %i3
358 %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
359
360 %arrayidx4 = getelementptr inbounds float* %a, i32 2
361 %i4 = load float* %arrayidx4, align 4
362 %arrayidx5 = getelementptr inbounds float* %b, i32 2
363 %i5 = load float* %arrayidx5, align 4
364 %add3 = fadd float %i4, %i5
365 %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
366
367 %arrayidx6 = getelementptr inbounds float* %a, i32 3
368 %i6 = load float* %arrayidx6, align 4
369 %arrayidx7 = getelementptr inbounds float* %b, i32 3
370 %i7 = load float* %arrayidx7, align 4
371 %add4 = fadd float %i6, %i7
372 %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
373
374 store float %call1, float* %c, align 4
375 %arrayidx8 = getelementptr inbounds float* %c, i32 1
376 store float %call2, float* %arrayidx8, align 4
377 %arrayidx9 = getelementptr inbounds float* %c, i32 2
378 store float %call3, float* %arrayidx9, align 4
379 %arrayidx10 = getelementptr inbounds float* %c, i32 3
380 store float %call4, float* %arrayidx10, align 4
381 ret void
382
383 ; CHECK-LABEL: @vec_powi_f32_neg(
384 ; CHECK-NOT: call <4 x float> @llvm.powi.v4f32
385 }