llvm.org GIT mirror llvm / 15cbde3
Fix X86's isTruncateFree to not claim that truncate to i1 is free. This fixes Bill's testcase that failed for r48491. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@48542 91177308-0d34-0410-b5e6-96231b3b80d8 Christopher Lamb 11 years ago
4 changed file(s) with 27 addition(s) and 47 deletion(s). Raw diff Collapse all Expand all
23822382 DAG.getConstant(Sum, N1C->getValueType(0)));
23832383 }
23842384 }
2385
2386 // fold sra (shl X, m), result_size - n
2387 // -> (sign_extend (trunc (shl X, result_size - n - m))) for
2388 // result_size - n != m. If truncate is free for the target sext(shl) is
2389 // likely to result in better code.
2390 if (N0.getOpcode() == ISD::SHL) {
2391 // Get the two constanst of the shifts, CN0 = m, CN = n.
2392 const ConstantSDNode *N01C = dyn_cast(N0.getOperand(1));
2393 if (N01C && N1C) {
2394 // Determine if the truncate type's bitsize would correspond to
2395 // an integer type for this target.
2396 unsigned VTValSize = MVT::getSizeInBits(VT);
2397 MVT::ValueType TruncVT = MVT::getIntegerType(VTValSize - N1C->getValue());
2398 unsigned ShiftAmt = N1C->getValue() - N01C->getValue();
2399
2400 // If the shift wouldn't be a noop, the truncated type is an actual type,
2401 // and the truncate is free, then proceed with the transform.
2402 if (ShiftAmt != 0 && TLI.isTruncateFree(VT, TruncVT)) {
2403 SDOperand Amt = DAG.getConstant(ShiftAmt, TLI.getShiftAmountTy());
2404 SDOperand Shift = DAG.getNode(ISD::SRL, VT, N0.getOperand(0), Amt);
2405 SDOperand Trunc = DAG.getNode(ISD::TRUNCATE, TruncVT, Shift);
2406 return DAG.getNode(ISD::SIGN_EXTEND, N->getValueType(0), Trunc);
2407 }
2408 }
2409 }
23852410
23862411 // Simplify, based on bits shifted out of the LHS.
23872412 if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
16461646 if it commuted the addl in LBB1_1.
16471647
16481648 //===---------------------------------------------------------------------===//
1649
1650 These two functions perform identical operations:
1651
1652 define i32 @test(i32 %f12) {
1653 %tmp7.25 = lshr i32 %f12, 16
1654 %tmp7.26 = trunc i32 %tmp7.25 to i8
1655 %tmp78.2 = sext i8 %tmp7.26 to i32
1656 ret i32 %tmp78.2
1657 }
1658
1659 define i32 @test2(i32 %f12) {
1660 %f11 = shl i32 %f12, 8
1661 %tmp7.25 = ashr i32 %f11, 24
1662 ret i32 %tmp7.25
1663 }
1664
1665 but the first compiles into significantly better code on x86-32:
1666
1667 _test:
1668 movsbl 6(%esp), %eax
1669 ret
1670 _test2:
1671 movl 4(%esp), %eax
1672 shll $8, %eax
1673 sarl $24, %eax
1674 ret
1675
1676 and on x86-64:
1677
1678 _test:
1679 shrl $16, %edi
1680 movsbl %dil, %eax
1681 ret
1682 _test2:
1683 shll $8, %edi
1684 movl %edi, %eax
1685 sarl $24, %eax
1686 ret
1687
1688 I would like instcombine to canonicalize the first into the second (since it is
1689 shorter and doesn't involve type width changes) but the x86 backend needs to do
1690 the right thing with the later sequence first.
1691
1692 //===---------------------------------------------------------------------===//
56615661 return false;
56625662 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
56635663 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
5664 if (NumBits1 <= NumBits2)
5664 if (NumBits1 <= NumBits2 || NumBits2 < 8)
56655665 return false;
56665666 return Subtarget->is64Bit() || NumBits1 < 64;
56675667 }
56725672 return false;
56735673 unsigned NumBits1 = MVT::getSizeInBits(VT1);
56745674 unsigned NumBits2 = MVT::getSizeInBits(VT2);
5675 if (NumBits1 <= NumBits2)
5675 if (NumBits1 <= NumBits2 || NumBits2 < 8)
56765676 return false;
56775677 return Subtarget->is64Bit() || NumBits1 < 64;
56785678 }
0 ; RUN: llvm-as < %s | llc -march=x86 | grep sar | count 1
11 ; RUN: llvm-as < %s | llc -march=x86-64 | not grep sar
2 ; XFAIL: *
32
43 define i32 @test(i32 %f12) {
54 %tmp7.25 = lshr i32 %f12, 16