llvm.org GIT mirror llvm / b0ee237
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8 Arnold Schwaighofer 5 years ago
4 changed file(s) with 100 addition(s) and 26 deletion(s). Raw diff Collapse all Expand all
0 //===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines some vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
14 #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
15
16 namespace llvm {
17
18 /// \brief Identify if the intrinsic is trivially vectorizable.
19 ///
20 /// This method returns true if the intrinsic's argument types are all
21 /// scalars for the scalar form of the intrinsic and all vectors for
22 /// the vector form of the intrinsic.
23 static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
24 switch (ID) {
25 case Intrinsic::sqrt:
26 case Intrinsic::sin:
27 case Intrinsic::cos:
28 case Intrinsic::exp:
29 case Intrinsic::exp2:
30 case Intrinsic::log:
31 case Intrinsic::log10:
32 case Intrinsic::log2:
33 case Intrinsic::fabs:
34 case Intrinsic::copysign:
35 case Intrinsic::floor:
36 case Intrinsic::ceil:
37 case Intrinsic::trunc:
38 case Intrinsic::rint:
39 case Intrinsic::nearbyint:
40 case Intrinsic::round:
41 case Intrinsic::ctpop:
42 case Intrinsic::pow:
43 case Intrinsic::fma:
44 case Intrinsic::fmuladd:
45 return true;
46 default:
47 return false;
48 }
49 }
50
51 } // llvm namespace
52
53 #endif
9090 #include "llvm/Transforms/Scalar.h"
9191 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
9292 #include "llvm/Transforms/Utils/Local.h"
93 #include "llvm/Transforms/Utils/VectorUtils.h"
9394 #include
9495 #include
9596
22652266 getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
22662267 // If we have an intrinsic call, check if it is trivially vectorizable.
22672268 if (IntrinsicInst *II = dyn_cast(CI)) {
2268 switch (II->getIntrinsicID()) {
2269 case Intrinsic::sqrt:
2270 case Intrinsic::sin:
2271 case Intrinsic::cos:
2272 case Intrinsic::exp:
2273 case Intrinsic::exp2:
2274 case Intrinsic::log:
2275 case Intrinsic::log10:
2276 case Intrinsic::log2:
2277 case Intrinsic::fabs:
2278 case Intrinsic::copysign:
2279 case Intrinsic::floor:
2280 case Intrinsic::ceil:
2281 case Intrinsic::trunc:
2282 case Intrinsic::rint:
2283 case Intrinsic::nearbyint:
2284 case Intrinsic::round:
2285 case Intrinsic::pow:
2286 case Intrinsic::fma:
2287 case Intrinsic::fmuladd:
2288 case Intrinsic::lifetime_start:
2289 case Intrinsic::lifetime_end:
2290 return II->getIntrinsicID();
2291 default:
2269 Intrinsic::ID ID = II->getIntrinsicID();
2270 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
2271 ID == Intrinsic::lifetime_end)
2272 return ID;
2273 else
22922274 return Intrinsic::not_intrinsic;
2293 }
22942275 }
22952276
22962277 if (!TLI)
4040 #include "llvm/Support/CommandLine.h"
4141 #include "llvm/Support/Debug.h"
4242 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Transforms/Utils/VectorUtils.h"
4344 #include
4445 #include
4546
948949 case Instruction::Call: {
949950 // Check if the calls are all to the same vectorizable intrinsic.
950951 IntrinsicInst *II = dyn_cast(VL[0]);
951 if (II==NULL) {
952 Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
953
954 if (!isTriviallyVectorizable(ID)) {
952955 newTreeEntry(VL, false);
953956 DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
954957 return;
0 ; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
1
2 target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
3 target triple = "nvptx--nvidiacl"
4
5 ; CTLZ cannot be vectorized currently because the second argument is a scalar
6 ; for both the scalar and vector forms of the intrinsic. In the future it
7 ; should be possible to vectorize such functions.
8 ; Test causes an assert if LLVM tries to vectorize CTLZ.
9
10 define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
11 entry:
12 %0 = extractelement <2 x i8> %x, i32 0
13 %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
14 %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
15 %1 = extractelement <2 x i8> %x, i32 1
16 %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
17 %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
18 ret <2 x i8> %vecinit2
19 }
20
21 define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
22 entry:
23 %0 = extractelement <2 x i8> %x, i32 0
24 %1 = extractelement <2 x i8> %x, i32 1
25 %call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
26 %call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
27 %vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
28 %vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
29 ret <2 x i8> %vecinit2
30 }
31
32 declare i8 @llvm.ctlz.i8(i8, i1) #3
33
34 attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
35 attributes #1 = { nounwind readnone }