llvm.org GIT mirror llvm / 16cba69
FastISel needs to remove dead code when it bails out. When FastISel fails to translate an instruction it hands off code generation to SelectionDAG. Before it does so, it may have generated local value instructions to feed phi nodes in successor blocks. These instructions will then be generated again by SelectionDAG, causing duplication and less efficient code, including extra spill instructions. Patch by Wolfgang Pieb! Differential Revision: http://reviews.llvm.org/D11768 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255520 91177308-0d34-0410-b5e6-96231b3b80d8 Paul Robinson 4 years ago
3 changed file(s) with 182 addition(s) and 2 deletion(s). Raw diff Collapse all Expand all
558558 /// across heavy instructions like calls.
559559 void flushLocalValueMap();
560560
561 /// \brief Removes dead local value instructions after SavedLastLocalvalue.
562 void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
563
561564 /// \brief Insertion point before trying to select the current instruction.
562565 MachineBasicBlock::iterator SavedInsertPt;
563566
13211321 return true;
13221322 }
13231323
1324 // Remove local value instructions starting from the instruction after
1325 // SavedLastLocalValue to the current function insert point.
1326 void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
1327 {
1328 MachineInstr *CurLastLocalValue = getLastLocalValue();
1329 if (CurLastLocalValue != SavedLastLocalValue) {
1330 // Find the first local value instruction to be deleted.
1331 // This is the instruction after SavedLastLocalValue if it is non-NULL.
1332 // Otherwise it's the first instruction in the block.
1333 MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
1334 if (SavedLastLocalValue)
1335 ++FirstDeadInst;
1336 else
1337 FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
1338 setLastLocalValue(SavedLastLocalValue);
1339 removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
1340 }
1341 }
1342
13241343 bool FastISel::selectInstruction(const Instruction *I) {
1344 MachineInstr *SavedLastLocalValue = getLastLocalValue();
13251345 // Just before the terminator instruction, insert instructions to
13261346 // feed PHI nodes in successor blocks.
13271347 if (isa(I))
1328 if (!handlePHINodesInSuccessorBlocks(I->getParent()))
1348 if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
1349 // PHI node handling may have generated local value instructions,
1350 // even though it failed to handle all PHI nodes.
1351 // We remove these instructions because SelectionDAGISel will generate
1352 // them again.
1353 removeDeadLocalValueCode(SavedLastLocalValue);
13291354 return false;
1355 }
13301356
13311357 DbgLoc = I->getDebugLoc();
13321358
13751401
13761402 DbgLoc = DebugLoc();
13771403 // Undo phi node updates, because they will be added again by SelectionDAG.
1378 if (isa(I))
1404 if (isa(I)) {
1405 // PHI node handling may have generated local value instructions.
1406 // We remove them because SelectionDAGISel will generate them again.
1407 removeDeadLocalValueCode(SavedLastLocalValue);
13791408 FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
1409 }
13801410 return false;
13811411 }
13821412
0 ; RUN: llc < %s | FileCheck %s
1 ;
2 ; Generated with clang -O2 -S -emit-llvm
3 ;
4 ; /* Test 1 */
5 ; extern "C" bool bar (long double);
6 ; __attribute__((optnone))
7 ; extern "C" bool foo(long double x, long double y)
8 ; {
9 ; return (x == y) || (bar(x));
10 ; }
11 ;
12 ; /* Test 2 */
13 ; struct FVector {
14 ; float x, y, z;
15 ; inline __attribute__((always_inline)) FVector(float f): x(f), y(f), z(f) {}
16 ; inline __attribute__((always_inline)) FVector func(float p) const
17 ; {
18 ; if( x == 1.f ) {
19 ; return *this;
20 ; } else if( x < p ) {
21 ; return FVector(0.f);
22 ; }
23 ; return FVector(x);
24 ; }
25 ; };
26 ;
27 ; __attribute__((optnone))
28 ; int main()
29 ; {
30 ; FVector v(1.0);
31 ; v = v.func(1.e-8);
32 ; return 0;
33 ; }
34 ;
35 ; ModuleID = 'test.cpp'
36 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
37 target triple = "x86_64-unknown-linux-gnu"
38
39 %struct.FVector = type { float, float, float }
40
41 define zeroext i1 @foo(x86_fp80 %x, x86_fp80 %y) noinline optnone {
42 entry:
43 %x.addr = alloca x86_fp80, align 16
44 %y.addr = alloca x86_fp80, align 16
45 store x86_fp80 %x, x86_fp80* %x.addr, align 16
46 store x86_fp80 %y, x86_fp80* %y.addr, align 16
47 %0 = load x86_fp80, x86_fp80* %x.addr, align 16
48 %1 = load x86_fp80, x86_fp80* %y.addr, align 16
49 %cmp = fcmp oeq x86_fp80 %0, %1
50
51 ; Test 1
52 ; Make sure that there is no dead code generated
53 ; from Fast-ISel Phi-node handling. We should only
54 ; see one movb of the constant 1, feeding the PHI
55 ; node in lor.end. This covers the code path with
56 ; handlePHINodesInSuccessorBlocks() returning true.
57 ;
58 ; CHECK-LABEL: foo:
59 ; CHECK: movb $1,
60 ; CHECK-NOT: movb $1,
61 ; CHECK-LABEL: .LBB0_1:
62
63 br i1 %cmp, label %lor.end, label %lor.rhs
64
65 lor.rhs: ; preds = %entry
66 %2 = load x86_fp80, x86_fp80* %x.addr, align 16
67 %call = call zeroext i1 @bar(x86_fp80 %2)
68 br label %lor.end
69
70 lor.end: ; preds = %lor.rhs, %entry
71 %3 = phi i1 [ true, %entry ], [ %call, %lor.rhs ]
72 ret i1 %3
73 }
74
75 declare zeroext i1 @bar(x86_fp80)
76
77 define i32 @main() noinline optnone {
78 entry:
79 %retval = alloca i32, align 4
80 %v = alloca %struct.FVector, align 4
81 %ref.tmp = alloca %struct.FVector, align 4
82 %tmp = alloca { <2 x float>, float }, align 8
83 store i32 0, i32* %retval, align 4
84 %0 = bitcast %struct.FVector* %v to i8*
85 call void @llvm.lifetime.start(i64 12, i8* %0) nounwind
86 %x.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
87 store float 1.000000e+00, float* %x.i, align 4
88 %y.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 1
89 store float 1.000000e+00, float* %y.i, align 4
90 %z.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
91 store float 1.000000e+00, float* %z.i, align 4
92 %x.i.1 = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
93 %1 = load float, float* %x.i.1, align 4
94 %cmp.i = fcmp oeq float %1, 1.000000e+00
95 br i1 %cmp.i, label %if.then.i, label %if.else.i
96
97 if.then.i: ; preds = %entry
98 %retval.sroa.0.0..sroa_cast.i = bitcast %struct.FVector* %v to <2 x float>*
99 %retval.sroa.0.0.copyload.i = load <2 x float>, <2 x float>* %retval.sroa.0.0..sroa_cast.i, align 4
100 %retval.sroa.6.0..sroa_idx16.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
101 %retval.sroa.6.0.copyload.i = load float, float* %retval.sroa.6.0..sroa_idx16.i, align 4
102 br label %func.exit
103
104 if.else.i: ; preds = %entry
105
106 ; Test 2
107 ; In order to feed the first PHI node in func.exit handlePHINodesInSuccessorBlocks()
108 ; generates a local value instruction, but it cannot handle the second PHI node and
109 ; returns false to let SelectionDAGISel handle both cases. Make sure the generated
110 ; local value instruction is removed.
111 ; CHECK-LABEL: main:
112 ; CHECK-LABEL: .LBB1_2:
113 ; CHECK: xorps [[REG:%xmm[0-7]]], [[REG]]
114 ; CHECK-NOT: xorps [[REG]], [[REG]]
115 ; CHECK-LABEL: .LBB1_3:
116
117 %cmp3.i = fcmp olt float %1, 0x3E45798EE0000000
118 br i1 %cmp3.i, label %func.exit, label %if.end.5.i
119
120 if.end.5.i: ; preds = %if.else.i
121 %retval.sroa.0.0.vec.insert13.i = insertelement <2 x float> undef, float %1, i32 0
122 %retval.sroa.0.4.vec.insert15.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert13.i, float %1, i32 1
123 br label %func.exit
124
125 func.exit: ; preds = %if.then.i, %if.else.i, %if.end.5.i
126 %retval.sroa.6.0.i = phi float [ %retval.sroa.6.0.copyload.i, %if.then.i ], [ %1, %if.end.5.i ], [ 0.000000e+00, %if.else.i ]
127 %retval.sroa.0.0.i = phi <2 x float> [ %retval.sroa.0.0.copyload.i, %if.then.i ], [ %retval.sroa.0.4.vec.insert15.i, %if.end.5.i ], [ zeroinitializer, %if.else.i ]
128 %.fca.0.insert.i = insertvalue { <2 x float>, float } undef, <2 x float> %retval.sroa.0.0.i, 0
129 %.fca.1.insert.i = insertvalue { <2 x float>, float } %.fca.0.insert.i, float %retval.sroa.6.0.i, 1
130 store { <2 x float>, float } %.fca.1.insert.i, { <2 x float>, float }* %tmp, align 8
131 %2 = bitcast { <2 x float>, float }* %tmp to i8*
132 %3 = bitcast %struct.FVector* %ref.tmp to i8*
133 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 12, i32 4, i1 false)
134 %4 = bitcast %struct.FVector* %v to i8*
135 %5 = bitcast %struct.FVector* %ref.tmp to i8*
136 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 12, i32 4, i1 false)
137 %6 = bitcast %struct.FVector* %v to i8*
138 call void @llvm.lifetime.end(i64 12, i8* %6) nounwind
139 ret i32 0
140 }
141
142 declare void @llvm.lifetime.start(i64, i8* nocapture) argmemonly nounwind
143
144 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) argmemonly nounwind
145
146 declare void @llvm.lifetime.end(i64, i8* nocapture) argmemonly nounwind