llvm.org GIT mirror llvm / 1f93c85
The cost of splitting a large vector instruction is not being taken into account by the getUserCost function. This was leading to some loops being over unrolled. The cost of a vector instruction is now being multiplied by the cost of the type legalization. This will return a more accurate cost. Committing on behalf on Brad Nemanich (brad.nemanich@ibm.com) Differential Revision: https://reviews.llvm.org/D38961 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316174 91177308-0d34-0410-b5e6-96231b3b80d8 Graham Yiu 1 year, 11 months ago
3 changed file(s) with 87 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
188188 return PPCTTIImpl::getIntImmCost(Imm, Ty);
189189 }
190190
191 unsigned PPCTTIImpl::getUserCost(const User *U,
192 ArrayRef Operands) {
193 if (U->getType()->isVectorTy()) {
194 // Instructions that need to be split should cost more.
195 std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType());
196 return LT.first * BaseT::getUserCost(U, Operands);
197 }
198
199 return BaseT::getUserCost(U, Operands);
200 }
201
191202 void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
192203 TTI::UnrollingPreferences &UP) {
193204 if (ST->getDarwinDirective() == PPC::DIR_A2) {
5050 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
5151 Type *Ty);
5252
53 unsigned getUserCost(const User *U, ArrayRef Operands);
54
5355 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
5456 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
5557 TTI::UnrollingPreferences &UP);
0 ; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -loop-unroll | FileCheck %s
1 ; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -loop-unroll | FileCheck %s
2
3 target datalayout = "e-m:e-i64:64-n32:64"
4 target triple = "powerpc64le-unknown-linux-gnu"
5
6 ; Function Attrs: norecurse nounwind
7 define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
8 entry:
9 %cmp10 = icmp sgt i32 %k, 0
10 br i1 %cmp10, label %for.body.lr.ph, label %for.end
11
12 for.body.lr.ph: ; preds = %entry
13 %wide.trip.count = zext i32 %k to i64
14 %min.iters.check = icmp ult i32 %k, 16
15 br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
16
17 vector.ph: ; preds = %for.body.lr.ph
18 %n.vec = and i64 %wide.trip.count, 4294967280
19 %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %x, i32 0
20 %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
21 br label %vector.body
22
23 vector.body: ; preds = %vector.body, %vector.ph
24 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
25 %vec.ind12 = phi <16 x i32> [ , %vector.ph ], [ %vec.ind.next13, %vector.body ]
26 %0 = shl <16 x i32> , %vec.ind12
27 %1 = and <16 x i32> %0, %broadcast.splat
28 %2 = icmp eq <16 x i32> %1, zeroinitializer
29 %3 = select <16 x i1> %2, <16 x i8> , <16 x i8>
30 %4 = getelementptr inbounds i8, i8* %s, i64 %index
31 %5 = bitcast i8* %4 to <16 x i8>*
32 store <16 x i8> %3, <16 x i8>* %5, align 1
33 %index.next = add i64 %index, 16
34 %vec.ind.next13 = add <16 x i32> %vec.ind12,
35 %6 = icmp eq i64 %index.next, %n.vec
36 br i1 %6, label %middle.block, label %vector.body
37
38 middle.block: ; preds = %vector.body
39 %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
40 br i1 %cmp.n, label %for.end, label %for.body.preheader
41
42 for.body.preheader: ; preds = %middle.block, %for.body.lr.ph
43 %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %n.vec, %middle.block ]
44 br label %for.body
45
46 for.body: ; preds = %for.body.preheader, %for.body
47 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
48 %7 = trunc i64 %indvars.iv to i32
49 %shl = shl i32 1, %7
50 %and = and i32 %shl, %x
51 %tobool = icmp eq i32 %and, 0
52 %conv = select i1 %tobool, i8 48, i8 49
53 %arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
54 store i8 %conv, i8* %arrayidx, align 1
55 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
56 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
57 br i1 %exitcond, label %for.end, label %for.body
58
59 for.end: ; preds = %for.body, %middle.block, %entry
60 %idxprom1 = sext i32 %k to i64
61 %arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
62 store i8 0, i8* %arrayidx2, align 1
63 ret i8* %s
64 }
65
66
67 ; CHECK-LABEL: vector.body
68 ; CHECK: shl
69 ; CHECK-NEXT: and
70 ; CHECK: shl
71 ; CHECK-NEXT: and
72 ; CHECK: label %vector.body
73