llvm.org GIT mirror llvm / 8a7f742
Fold the ShrinkDemandedOps pass into the regular DAGCombiner pass, which is faster, simpler, and less surprising. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106263 91177308-0d34-0410-b5e6-96231b3b80d8 Dan Gohman 9 years ago
9 changed file(s) with 62 addition(s) and 119 deletion(s). Raw diff Collapse all Expand all
291291 MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB);
292292 void LowerArguments(const BasicBlock *BB);
293293
294 void ShrinkDemandedOps();
295294 void ComputeLiveOutVRegInfo();
296295
297296 /// Create the scheduler. If a specific scheduler was specified
765765 SelectionDAG &DAG;
766766 bool LegalTys;
767767 bool LegalOps;
768 bool ShrinkOps;
769768 SDValue Old;
770769 SDValue New;
771770
772771 explicit TargetLoweringOpt(SelectionDAG &InDAG,
773 bool LT, bool LO,
774 bool Shrink = false) :
775 DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
772 bool LT, bool LO) :
773 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
776774
777775 bool LegalTypes() const { return LegalTys; }
778776 bool LegalOperations() const { return LegalOps; }
20272027 // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
20282028 // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
20292029 // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2030 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
2030 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
20312031 //
20322032 // do not sink logical op inside of a vector extend, since it may combine
20332033 // into a vsetcc.
20372037 // Avoid infinite looping with PromoteIntBinOp.
20382038 (N0.getOpcode() == ISD::ANY_EXTEND &&
20392039 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2040 (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
2040 (N0.getOpcode() == ISD::TRUNCATE &&
2041 (!TLI.isZExtFree(VT, Op0VT) ||
2042 !TLI.isTruncateFree(Op0VT, VT)) &&
2043 TLI.isTypeLegal(Op0VT))) &&
20412044 !VT.isVector() &&
20422045 Op0VT == N1.getOperand(0).getValueType() &&
20432046 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
24232426 // See if this is some rotate idiom.
24242427 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
24252428 return SDValue(Rot, 0);
2429
2430 // Simplify the operands using demanded-bits information.
2431 if (!VT.isVector() &&
2432 SimplifyDemandedBits(SDValue(N, 0)))
2433 return SDValue(N, 0);
24262434
24272435 return SDValue();
24282436 }
31573165 return NewSRL;
31583166 }
31593167
3168 // Attempt to convert a srl of a load into a narrower zero-extending load.
3169 SDValue NarrowLoad = ReduceLoadWidth(N);
3170 if (NarrowLoad.getNode())
3171 return NarrowLoad;
3172
31603173 // Here is a common situation. We want to optimize:
31613174 //
31623175 // %a = ...
36343647
36353648 // fold (zext (truncate x)) -> (and x, mask)
36363649 if (N0.getOpcode() == ISD::TRUNCATE &&
3637 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
3638 (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
3639 N0.getValueType()) ||
3640 !TLI.isZExtFree(N0.getValueType(), VT))) {
3650 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
36413651 SDValue Op = N0.getOperand(0);
36423652 if (Op.getValueType().bitsLT(VT)) {
36433653 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
40234033 /// extended, also fold the extension to form a extending load.
40244034 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
40254035 unsigned Opc = N->getOpcode();
4036
40264037 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
40274038 SDValue N0 = N->getOperand(0);
40284039 EVT VT = N->getValueType(0);
40394050 ExtVT = cast(N->getOperand(1))->getVT();
40404051 if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
40414052 return SDValue();
4053 } else if (Opc == ISD::SRL) {
4054 // Annother special-case: SRL is basically zero-extending a narrower
4055 // value.
4056 ExtType = ISD::ZEXTLOAD;
4057 N0 = SDValue(N, 0);
4058 ConstantSDNode *N01 = dyn_cast(N0.getOperand(1));
4059 if (!N01) return SDValue();
4060 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
4061 VT.getSizeInBits() - N01->getZExtValue());
40424062 }
40434063
40444064 unsigned EVTBits = ExtVT.getSizeInBits();
42424262
42434263 // fold (truncate (load x)) -> (smaller load x)
42444264 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
4245 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
4246 return ReduceLoadWidth(N);
4265 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
4266 SDValue Reduced = ReduceLoadWidth(N);
4267 if (Reduced.getNode())
4268 return Reduced;
4269 }
4270
4271 // Simplify the operands using demanded-bits information.
4272 if (!VT.isVector() &&
4273 SimplifyDemandedBits(SDValue(N, 0)))
4274 return SDValue(N, 0);
4275
42474276 return SDValue();
42484277 }
42494278
24732473 VT.getVectorNumElements() ==
24742474 Operand.getValueType().getVectorNumElements()) &&
24752475 "Vector element count mismatch!");
2476
24762477 if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
24772478 OpOpcode == ISD::ANY_EXTEND)
24782479 // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
24792480 return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
2481
2482 // (ext (trunx x)) -> x
2483 if (OpOpcode == ISD::TRUNCATE) {
2484 SDValue OpOp = Operand.getNode()->getOperand(0);
2485 if (OpOp.getValueType() == VT)
2486 return OpOp;
2487 }
24802488 break;
24812489 case ISD::TRUNCATE:
24822490 assert(VT.isInteger() && Operand.getValueType().isInteger() &&
371371 };
372372 }
373373
374 /// TrivialTruncElim - Eliminate some trivial nops that can result from
375 /// ShrinkDemandedOps: (trunc (ext n)) -> n.
376 static bool TrivialTruncElim(SDValue Op,
377 TargetLowering::TargetLoweringOpt &TLO) {
378 SDValue N0 = Op.getOperand(0);
379 EVT VT = Op.getValueType();
380 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
381 N0.getOpcode() == ISD::SIGN_EXTEND ||
382 N0.getOpcode() == ISD::ANY_EXTEND) &&
383 N0.getOperand(0).getValueType() == VT) {
384 return TLO.CombineTo(Op, N0.getOperand(0));
385 }
386 return false;
387 }
388
389 /// ShrinkDemandedOps - A late transformation pass that shrink expressions
390 /// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
391 /// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
392 void SelectionDAGISel::ShrinkDemandedOps() {
393 SmallVector Worklist;
394 SmallPtrSet InWorklist;
395
396 // Add all the dag nodes to the worklist.
397 Worklist.reserve(CurDAG->allnodes_size());
398 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
399 E = CurDAG->allnodes_end(); I != E; ++I) {
400 Worklist.push_back(I);
401 InWorklist.insert(I);
402 }
403
404 TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
405 while (!Worklist.empty()) {
406 SDNode *N = Worklist.pop_back_val();
407 InWorklist.erase(N);
408
409 if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
410 // Deleting this node may make its operands dead, add them to the worklist
411 // if they aren't already there.
412 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
413 if (InWorklist.insert(N->getOperand(i).getNode()))
414 Worklist.push_back(N->getOperand(i).getNode());
415
416 CurDAG->DeleteNode(N);
417 continue;
418 }
419
420 // Run ShrinkDemandedOp on scalar binary operations.
421 if (N->getNumValues() != 1 ||
422 !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
423 continue;
424
425 unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
426 APInt Demanded = APInt::getAllOnesValue(BitWidth);
427 APInt KnownZero, KnownOne;
428 if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
429 KnownZero, KnownOne, TLO) &&
430 (N->getOpcode() != ISD::TRUNCATE ||
431 !TrivialTruncElim(SDValue(N, 0), TLO)))
432 continue;
433
434 // Revisit the node.
435 assert(!InWorklist.count(N) && "Already in worklist");
436 Worklist.push_back(N);
437 InWorklist.insert(N);
438
439 // Replace the old value with the new one.
440 DEBUG(errs() << "\nShrinkDemandedOps replacing ";
441 TLO.Old.getNode()->dump(CurDAG);
442 errs() << "\nWith: ";
443 TLO.New.getNode()->dump(CurDAG);
444 errs() << '\n');
445
446 if (InWorklist.insert(TLO.New.getNode()))
447 Worklist.push_back(TLO.New.getNode());
448
449 SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
450 CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
451
452 if (!TLO.Old.getNode()->use_empty()) continue;
453
454 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
455 i != e; ++i) {
456 SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
457 if (OpNode->hasOneUse()) {
458 // Add OpNode to the end of the list to revisit.
459 DeadNodes.RemoveFromWorklist(OpNode);
460 Worklist.push_back(OpNode);
461 InWorklist.insert(OpNode);
462 }
463 }
464
465 DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
466 CurDAG->DeleteNode(TLO.Old.getNode());
467 }
468 }
469
470374 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
471375 SmallPtrSet VisitedNodes;
472376 SmallVector Worklist;
635539 DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
636540 DEBUG(CurDAG->dump());
637541
638 if (OptLevel != CodeGenOpt::None) {
639 ShrinkDemandedOps();
542 if (OptLevel != CodeGenOpt::None)
640543 ComputeLiveOutVRegInfo();
641 }
642544
643545 if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
644546
10411041 if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
10421042 return true;
10431043 // If the operation can be done in a smaller type, do so.
1044 if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1044 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
10451045 return true;
10461046
10471047 // Output known-1 bits are only known if set in both the LHS & RHS.
10751075 if (TLO.ShrinkDemandedConstant(Op, NewMask))
10761076 return true;
10771077 // If the operation can be done in a smaller type, do so.
1078 if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1078 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
10791079 return true;
10801080
10811081 // Output known-0 bits are only known if clear in both the LHS & RHS.
11001100 if ((KnownZero2 & NewMask) == NewMask)
11011101 return TLO.CombineTo(Op, Op.getOperand(1));
11021102 // If the operation can be done in a smaller type, do so.
1103 if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1103 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
11041104 return true;
11051105
11061106 // If all of the unknown bits are known to be zero on one side or the other
15471547 KnownOne2, TLO, Depth+1))
15481548 return true;
15491549 // See if the operation should be performed at a smaller bit width.
1550 if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
1550 if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
15511551 return true;
15521552 }
15531553 // FALL THROUGH
96409640 if (ShAmt1.getOpcode() == ISD::SUB) {
96419641 SDValue Sum = ShAmt1.getOperand(0);
96429642 if (ConstantSDNode *SumC = dyn_cast(Sum)) {
9643 if (SumC->getSExtValue() == Bits &&
9644 ShAmt1.getOperand(1) == ShAmt0)
9643 SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
9644 if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
9645 ShAmt1Op1 = ShAmt1Op1.getOperand(0);
9646 if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
96459647 return DAG.getNode(Opc, DL, VT,
96469648 Op0, Op1,
96479649 DAG.getNode(ISD::TRUNCATE, DL,
2020 ret i32* %P2
2121 }
2222
23 define fastcc i32 @test4(i32* %d) nounwind {
24 %tmp4 = load i32* %d
25 %tmp512 = lshr i32 %tmp4, 24
26 ret i32 %tmp512
27 }
6666 ; X64: movw %si, 2(%rdi)
6767
6868 ; X32: test4:
69 ; X32: movzwl 8(%esp), %eax
69 ; X32: movl 8(%esp), %eax
7070 ; X32: movw %ax, 2(%{{.*}})
7171 }
7272