llvm.org GIT mirror llvm / 8d88d91
[SystemZ] Bugfix in isFusableLoadOpStorePattern() This function is responsible for checking the legality of fusing an instance of load -> op -> store into a single operation. In the SystemZ backend the check was incomplete and a test case emerged with a cycle in the instruction selection DAG as a result. Instead of using the NodeIds to determine node relationships, hasPredecessorHelper() now is used just like in the X86 backend. This handled the failing tests and as well gave a few additional transformations on benchmarks. The SystemZ isFusableLoadOpStorePattern() is now a very near copy of the X86 function, and it seems this could be made a utility function in common code instead. Review: Ulrich Weigand https://reviews.llvm.org/D60255 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357688 91177308-0d34-0410-b5e6-96231b3b80d8 Jonas Paulsson 7 months ago
2 changed file(s) with 50 addition(s) and 15 deletion(s). Raw diff Collapse all Expand all
12741274 InputChain = LoadNode->getChain();
12751275 } else if (Chain.getOpcode() == ISD::TokenFactor) {
12761276 SmallVector ChainOps;
1277 SmallVector LoopWorklist;
1278 SmallPtrSet Visited;
1279 const unsigned int Max = 1024;
12771280 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
12781281 SDValue Op = Chain.getOperand(i);
12791282 if (Op == Load.getValue(1)) {
12821285 ChainOps.push_back(Load.getOperand(0));
12831286 continue;
12841287 }
1285
1286 // Make sure using Op as part of the chain would not cause a cycle here.
1287 // In theory, we could check whether the chain node is a predecessor of
1288 // the load. But that can be very expensive. Instead visit the uses and
1289 // make sure they all have smaller node id than the load.
1290 int LoadId = LoadNode->getNodeId();
1291 for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
1292 UE = UI->use_end(); UI != UE; ++UI) {
1293 if (UI.getUse().getResNo() != 0)
1294 continue;
1295 if (UI->getNodeId() > LoadId)
1296 return false;
1297 }
1298
1288 LoopWorklist.push_back(Op.getNode());
12991289 ChainOps.push_back(Op);
13001290 }
13011291
1302 if (ChainCheck)
1292 if (ChainCheck) {
1293 // Add the other operand of StoredVal to worklist.
1294 for (SDValue Op : StoredVal->ops())
1295 if (Op.getNode() != LoadNode)
1296 LoopWorklist.push_back(Op.getNode());
1297
1298 // Check if Load is reachable from any of the nodes in the worklist.
1299 if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
1300 true))
1301 return false;
1302
13031303 // Make a new TokenFactor with all the other input chains except
13041304 // for the load.
13051305 InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
13061306 MVT::Other, ChainOps);
1307 }
13071308 }
13081309 if (!ChainCheck)
13091310 return false;
0 ; Test that this test case does not abort after the folding of load -> add ->
1 ; store into an alsi. This folding is suppose to not happen as it would
2 ; introduce a loop in the DAG.
3 ;
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa -consthoist-gep | FileCheck %s
5
6 @g_295 = external dso_local unnamed_addr global i32, align 4
7 @g_672 = external dso_local unnamed_addr global i64, align 8
8 @g_1484 = external dso_local global <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, align 2
9
10 define void @fun() {
11 ; CHECK-LABEL: fun:
12
13 bb:
14 br label %bb1
15
16 bb1: ; preds = %bb1, %bb
17 store i32 2, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 16)
18 %tmp = icmp slt i32 undef, 3
19 br i1 %tmp, label %bb1, label %bb2
20
21 bb2: ; preds = %bb1
22 %tmp3 = load i32, i32* getelementptr inbounds (<{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>, <{ i8, i64, { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, [2 x i8], i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i8 }, i32 }>* @g_1484, i64 0, i32 2, i32 28)
23 %tmp4 = load i64, i64* @g_672
24 %tmp5 = add i64 %tmp4, 1
25 store i64 %tmp5, i64* @g_672
26 %tmp6 = icmp eq i64 %tmp5, 0
27 %tmp7 = zext i1 %tmp6 to i32
28 %tmp8 = icmp ult i32 %tmp3, %tmp7
29 %tmp9 = zext i1 %tmp8 to i32
30 store i32 %tmp9, i32* @g_295
31 ret void
32 }
33