llvm.org GIT mirror llvm / 884c70c
On x86 favors folding short immediate into some arithmetic operations (e.g. add, and, xor, etc.) because materializing an immediate in a register is expensive in turns of code size. e.g. movl 4(%esp), %eax addl $4, %eax is 2 bytes shorter than movl $4, %eax addl 4(%esp), %eax git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60139 91177308-0d34-0410-b5e6-96231b3b80d8 Evan Cheng 11 years ago
5 changed file(s) with 62 addition(s) and 11 deletion(s). Raw diff Collapse all Expand all
7979 return true;
8080 }
8181
82 /// CanBeFoldedBy - Returns true if the specific operand node N of U can be
83 /// folded during instruction selection that starts at Root?
84 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
82 /// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
83 /// U can be folded during instruction selection that starts at Root and
84 /// folding N is profitable.
85 virtual
86 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const {
8587 return true;
8688 }
8789
140140
141141 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
142142
143 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
143 virtual
144 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
144145
145146 // Include the pieces autogenerated from the target description.
146147 #include "X86GenDAGISel.inc"
279280 /// isNonImmUse - Start searching from Root up the DAG to check is Def can
280281 /// be reached. Return true if that's the case. However, ignore direct uses
281282 /// by ImmedUse (which would be U in the example illustrated in
282 /// CanBeFoldedBy) and by Root (which can happen in the store case).
283 /// IsLegalAndProfitableToFold) and by Root (which can happen in the store
284 /// case).
283285 /// FIXME: to be really generic, we should allow direct use by any node
284286 /// that is being folded. But realisticly since we only fold loads which
285287 /// have one non-chain use, we only need to watch out for load/op/store
293295 }
294296
295297
296 bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
298 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
299 SDNode *Root) const {
297300 if (Fast) return false;
301
302 if (U == Root)
303 switch (U->getOpcode()) {
304 default: break;
305 case ISD::ADD:
306 case ISD::ADDC:
307 case ISD::ADDE:
308 case ISD::AND:
309 case ISD::OR:
310 case ISD::XOR: {
311 // If the other operand is a 8-bit immediate we should fold the immediate
312 // instead. This reduces code size.
313 // e.g.
314 // movl 4(%esp), %eax
315 // addl $4, %eax
316 // vs.
317 // movl $4, %eax
318 // addl 4(%esp), %eax
319 // The former is 2 bytes shorter. In case where the increment is 1, then
320 // the saving can be 4 bytes (by using incl %eax).
321 ConstantSDNode *Imm = dyn_cast(U->getOperand(1));
322 if (Imm) {
323 if (U->getValueType(0) == MVT::i64) {
324 if ((int32_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
325 return false;
326 } else {
327 if ((int8_t)Imm->getZExtValue() == (int64_t)Imm->getZExtValue())
328 return false;
329 }
330 }
331 }
332 }
298333
299334 // If Root use can somehow reach N through a path that that doesn't contain
300335 // U then folding N would create a cycle. e.g. In the following
9981033 if (ISD::isNON_EXTLoad(InChain.getNode()) &&
9991034 InChain.getValue(0).hasOneUse() &&
10001035 N.hasOneUse() &&
1001 CanBeFoldedBy(N.getNode(), Pred.getNode(), Op.getNode())) {
1036 IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) {
10021037 LoadSDNode *LD = cast(InChain);
10031038 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
10041039 return false;
10851120 SDValue &Index, SDValue &Disp) {
10861121 if (ISD::isNON_EXTLoad(N.getNode()) &&
10871122 N.hasOneUse() &&
1088 CanBeFoldedBy(N.getNode(), P.getNode(), P.getNode()))
1123 IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
10891124 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
10901125 return false;
10911126 }
0 ; RUN: llvm-as < %s | llc -march=x86 | grep inc
1 ; RUN: llvm-as < %s | llc -march=x86 | grep add | grep 4
2
3 define i32 @test(i32 %X) nounwind {
4 entry:
5 %0 = add i32 %X, 1
6 ret i32 %0
7 }
8
9 define i32 @test2(i32 %X) nounwind {
10 entry:
11 %0 = add i32 %X, 4
12 ret i32 %0
13 }
None ; RUN: llvm-as < %s | llc | grep {addl.(%eax), %ecx}
0 ; RUN: llvm-as < %s | llc | grep {addl.\$4, %ecx}
11 ; RUN: llvm-as < %s | llc | not grep leal
22 ; this should not sink %1 into bb1, that would increase reg pressure.
33
521521
522522 if (NeedCheck) {
523523 std::string ParentName(RootName.begin(), RootName.end()-1);
524 emitCheck("CanBeFoldedBy(" + RootName + ".getNode(), " + ParentName +
525 ".getNode(), N.getNode())");
524 emitCheck("IsLegalAndProfitableToFold(" + RootName +
525 ".getNode(), " + ParentName + ".getNode(), N.getNode())");
526526 }
527527 }
528528 }