llvm.org GIT mirror llvm / 638cd03
[TTI] Let the cost model estimate ctpop costs based on legality PPC has a vector popcount, this lets the vectorizer use the correct cost for it. Tweak X86 test to use an intrinsic that's actually scalarized (we have a somewhat efficient lowering for vector popcount using SSE, the cost model finds that now). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265005 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 4 years ago
3 changed file(s) with 28 addition(s) and 9 deletion(s). Raw diff Collapse all Expand all
620620 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
621621 ArrayRef Tys) {
622622 unsigned ISD = 0;
623 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
623624 switch (IID) {
624625 default: {
625626 // Assume that we need to scalarize this intrinsic.
724725 case Intrinsic::masked_load:
725726 return static_cast(this)
726727 ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
728 case Intrinsic::ctpop:
729 ISD = ISD::CTPOP;
730 // In case of legalization use TCC_Expensive. This is cheaper than a
731 // library call but still not a cheap instruction.
732 SingleCallCost = TargetTransformInfo::TCC_Expensive;
733 break;
734 // FIXME: ctlz, cttz, ...
727735 }
728736
729737 const TargetLoweringBase *TLI = getTLI();
784792 }
785793
786794 // This is going to be turned into a library call, make it expensive.
787 return 10;
795 return SingleCallCost;
788796 }
789797
790798 /// \brief Compute a cost of the given call instruction.
0 ; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
1 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
2 target triple = "powerpc64-unknown-linux-gnu"
3
4 define <4 x i32> @test1(<4 x i32> %arg) {
5 ; CHECK: cost of 1 {{.*}} call <4 x i32> @llvm.ctpop.v4i32
6 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %arg)
7 ret <4 x i32> %ctpop
8 }
9
10 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
1212 declare %i4 @llvm.bswap.v4i32(%i4)
1313 declare %i8 @llvm.bswap.v2i64(%i8)
1414
15 declare %i4 @llvm.ctpop.v4i32(%i4)
16 declare %i8 @llvm.ctpop.v2i64(%i8)
15 declare %i4 @llvm.cttz.v4i32(%i4)
16 declare %i8 @llvm.cttz.v2i64(%i8)
1717
1818 ; CHECK32-LABEL: test_scalarized_intrinsics
1919 ; CHECK64-LABEL: test_scalarized_intrinsics
2727 ; CHECK64: cost of 6 {{.*}}bswap.v2i64
2828 %r3 = call %i8 @llvm.bswap.v2i64(%i8 undef)
2929
30 ; CHECK32: cost of 12 {{.*}}ctpop.v4i32
31 ; CHECK64: cost of 12 {{.*}}ctpop.v4i32
32 %r4 = call %i4 @llvm.ctpop.v4i32(%i4 undef)
33 ; CHECK32: cost of 10 {{.*}}ctpop.v2i64
34 ; CHECK64: cost of 6 {{.*}}ctpop.v2i64
35 %r5 = call %i8 @llvm.ctpop.v2i64(%i8 undef)
30 ; CHECK32: cost of 12 {{.*}}cttz.v4i32
31 ; CHECK64: cost of 12 {{.*}}cttz.v4i32
32 %r4 = call %i4 @llvm.cttz.v4i32(%i4 undef)
33 ; CHECK32: cost of 10 {{.*}}cttz.v2i64
34 ; CHECK64: cost of 6 {{.*}}cttz.v2i64
35 %r5 = call %i8 @llvm.cttz.v2i64(%i8 undef)
3636
3737 ; CHECK32: ret
3838 ; CHECK64: ret