llvm.org GIT mirror llvm / 12ab473
[SLP] Pass in correct alignment when query memory access cost This patch fixes bug https://llvm.org/bugs/show_bug.cgi?id=27897. When query memory access cost, current SLP always passes in alignment value of 1 (unaligned), so it gets a very high cost of scalar memory access, and wrongly vectorize memory loads in the test case. It can be fixed by simply giving correct alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271333 91177308-0d34-0410-b5e6-96231b3b80d8 Guozhi Wei 3 years ago
3 changed file(s) with 39 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
17251725 }
17261726 case Instruction::Load: {
17271727 // Cost of wide load - cost of scalar loads.
1728 unsigned alignment = dyn_cast(VL0)->getAlignment();
17281729 int ScalarLdCost = VecTy->getNumElements() *
1729 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
1730 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, 1, 0);
1730 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
1731 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
1732 VecTy, alignment, 0);
17311733 return VecLdCost - ScalarLdCost;
17321734 }
17331735 case Instruction::Store: {
17341736 // We know that we can merge the stores. Calculate the cost.
1737 unsigned alignment = dyn_cast(VL0)->getAlignment();
17351738 int ScalarStCost = VecTy->getNumElements() *
1736 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
1737 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, 1, 0);
1739 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
1740 int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
1741 VecTy, alignment, 0);
17381742 return VecStCost - ScalarStCost;
17391743 }
17401744 case Instruction::Call: {
0 if not 'PowerPC' in config.root.targets:
1 config.unsupported = True
0 ; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
1
2 %struct.A = type { i8*, i8* }
3
4 define i64 @foo(%struct.A* nocapture readonly %this) {
5 entry:
6 %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
7 %0 = bitcast i8** %end.i to i64*
8 %1 = load i64, i64* %0, align 8
9 %2 = bitcast %struct.A* %this to i64*
10 %3 = load i64, i64* %2, align 8
11 %sub.ptr.sub.i = sub i64 %1, %3
12 %cmp = icmp sgt i64 %sub.ptr.sub.i, 9
13 br i1 %cmp, label %return, label %lor.lhs.false
14
15 lor.lhs.false:
16 %4 = inttoptr i64 %3 to i8*
17 %5 = inttoptr i64 %1 to i8*
18 %cmp2 = icmp ugt i8* %5, %4
19 %. = select i1 %cmp2, i64 2, i64 -1
20 ret i64 %.
21
22 return:
23 ret i64 2
24 }
25
26 ; CHECK: load i64
27 ; CHECK-NOT: load <2 x i64>
28 ; CHECK-NOT: extractelement