llvm.org GIT mirror llvm / afbe82d
[SLP] Enable vectorization of GEP expressions. The use cases look like the following: x->a = y->a + 10 x->b = y->b + 12 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210342 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Zolotukhin 5 years ago
2 changed file(s) with 128 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
940940 }
941941 return;
942942 }
943 case Instruction::GetElementPtr: {
944 // We don't combine GEPs with complicated (nested) indexing.
945 for (unsigned j = 0; j < VL.size(); ++j) {
946 if (cast(VL[j])->getNumOperands() != 2) {
947 DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
948 newTreeEntry(VL, false);
949 return;
950 }
951 }
952
953 // We can't combine several GEPs into one vector if they operate on
954 // different types.
955 Type *Ty0 = cast(VL0)->getOperand(0)->getType();
956 for (unsigned j = 0; j < VL.size(); ++j) {
957 Type *CurTy = cast(VL[j])->getOperand(0)->getType();
958 if (Ty0 != CurTy) {
959 DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
960 newTreeEntry(VL, false);
961 return;
962 }
963 }
964
965 // We don't combine GEPs with non-constant indexes.
966 for (unsigned j = 0; j < VL.size(); ++j) {
967 auto Op = cast(VL[j])->getOperand(1);
968 if (!isa(Op)) {
969 DEBUG(
970 dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
971 newTreeEntry(VL, false);
972 return;
973 }
974 }
975
976 newTreeEntry(VL, true);
977 DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
978 for (unsigned i = 0, e = 2; i < e; ++i) {
979 ValueList Operands;
980 // Prepare the operand vector.
981 for (unsigned j = 0; j < VL.size(); ++j)
982 Operands.push_back(cast(VL[j])->getOperand(i));
983
984 buildTree_rec(Operands, Depth + 1);
985 }
986 return;
987 }
943988 case Instruction::Store: {
944989 // Check if the stores are consecutive or of we need to swizzle them.
945990 for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
11451190 }
11461191 return VecCost - ScalarCost;
11471192 }
1193 case Instruction::GetElementPtr: {
1194 TargetTransformInfo::OperandValueKind Op1VK =
1195 TargetTransformInfo::OK_AnyValue;
1196 TargetTransformInfo::OperandValueKind Op2VK =
1197 TargetTransformInfo::OK_UniformConstantValue;
1198
1199 int ScalarCost =
1200 VecTy->getNumElements() *
1201 TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
1202 int VecCost =
1203 TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
1204
1205 return VecCost - ScalarCost;
1206 }
11481207 case Instruction::Load: {
11491208 // Cost of wide load - cost of scalar loads.
11501209 int ScalarLdCost = VecTy->getNumElements() *
16721731 S->setAlignment(Alignment);
16731732 E->VectorizedValue = S;
16741733 return propagateMetadata(S, E->Scalars);
1734 }
1735 case Instruction::GetElementPtr: {
1736 setInsertPointAfterBundle(E->Scalars);
1737
1738 ValueList Op0VL;
1739 for (int i = 0, e = E->Scalars.size(); i < e; ++i)
1740 Op0VL.push_back(cast(E->Scalars[i])->getOperand(0));
1741
1742 Value *Op0 = vectorizeTree(Op0VL);
1743
1744 std::vector OpVecs;
1745 for (int j = 1, e = cast(VL0)->getNumOperands(); j < e;
1746 ++j) {
1747 ValueList OpVL;
1748 for (int i = 0, e = E->Scalars.size(); i < e; ++i)
1749 OpVL.push_back(cast(E->Scalars[i])->getOperand(j));
1750
1751 Value *OpVec = vectorizeTree(OpVL);
1752 OpVecs.push_back(OpVec);
1753 }
1754
1755 Value *V = Builder.CreateGEP(Op0, OpVecs);
1756 E->VectorizedValue = V;
1757
1758 if (Instruction *I = dyn_cast(V))
1759 return propagateMetadata(I, E->Scalars);
1760
1761 return V;
16751762 }
16761763 case Instruction::Call: {
16771764 CallInst *CI = cast(VL0);
0 ; RUN: opt < %s -basicaa -slp-vectorizer -S |Filecheck %s
1 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
2
3 ; Test if SLP can handle GEP expressions.
4 ; The test perform the following action:
5 ; x->first = y->first + 16
6 ; x->second = y->second + 16
7
8 ; CHECK-LABEL: foo1
9 ; CHECK: <2 x i32*>
10 define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
11 %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
12 %2 = load i32** %1, align 8
13 %3 = getelementptr inbounds i32* %2, i64 16
14 %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
15 store i32* %3, i32** %4, align 8
16 %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
17 %6 = load i32** %5, align 8
18 %7 = getelementptr inbounds i32* %6, i64 16
19 %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
20 store i32* %7, i32** %8, align 8
21 ret void
22 }
23
24 ; Test that we don't vectorize GEP expressions if indexes are not constants.
25 ; We can't produce an efficient code in that case.
26 ; CHECK-LABEL: foo2
27 ; CHECK-NOT: <2 x i32*>
28 define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
29 %1 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 0
30 %2 = load i32** %1, align 8
31 %3 = getelementptr inbounds i32* %2, i32 %i
32 %4 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 0
33 store i32* %3, i32** %4, align 8
34 %5 = getelementptr inbounds { i32*, i32* }* %y, i64 0, i32 1
35 %6 = load i32** %5, align 8
36 %7 = getelementptr inbounds i32* %6, i32 %i
37 %8 = getelementptr inbounds { i32*, i32* }* %x, i64 0, i32 1
38 store i32* %7, i32** %8, align 8
39 ret void
40 }