llvm.org GIT mirror llvm / b20a8fc
X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039) This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is uint64_t foo(uint64_t x) { return (x&1) << 42; } which used to compile into bloated code: shlq $42, %rdi ## encoding: [0x48,0xc1,0xe7,0x2a] movabsq $4398046511104, %rax ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00] andq %rdi, %rax ## encoding: [0x48,0x21,0xf8] ret ## encoding: [0xc3] with this patch we can fold the immediate into the and: andq $1, %rdi ## encoding: [0x48,0x83,0xe7,0x01] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] shlq $42, %rax ## encoding: [0x48,0xc1,0xe0,0x2a] ret ## encoding: [0xc3] It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing that without making this code even more complicated. See the TODOs in the code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129990 91177308-0d34-0410-b5e6-96231b3b80d8 Benjamin Kramer 9 years ago
2 changed file(s) with 176 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
15791579 return RetVal;
15801580 break;
15811581 }
1582 case ISD::AND:
1583 case ISD::OR:
1584 case ISD::XOR: {
1585 // For operations of the form (x << C1) op C2, check if we can use a smaller
1586 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
1587 SDValue N0 = Node->getOperand(0);
1588 SDValue N1 = Node->getOperand(1);
1589
1590 if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
1591 break;
1592
1593 // i8 is unshrinkable, i16 should be promoted to i32.
1594 if (NVT != MVT::i32 && NVT != MVT::i64)
1595 break;
1596
1597 ConstantSDNode *Cst = dyn_cast(N1);
1598 ConstantSDNode *ShlCst = dyn_cast(N0->getOperand(1));
1599 if (!Cst || !ShlCst)
1600 break;
1601
1602 int64_t Val = Cst->getSExtValue();
1603 uint64_t ShlVal = ShlCst->getZExtValue();
1604
1605 // Make sure that we don't change the operation by removing bits.
1606 // This only matters for OR and XOR, AND is unaffected.
1607 if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
1608 break;
1609
1610 unsigned ShlOp, Op;
1611 EVT CstVT = NVT;
1612
1613 // Check the minimum bitwidth for the new constant.
1614 // TODO: AND32ri is the same as AND64ri32 with zext imm.
1615 // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
1616 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
1617 if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
1618 CstVT = MVT::i8;
1619 else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
1620 CstVT = MVT::i32;
1621
1622 // Bail if there is no smaller encoding.
1623 if (NVT == CstVT)
1624 break;
1625
1626 switch (NVT.getSimpleVT().SimpleTy) {
1627 default: llvm_unreachable("Unsupported VT!");
1628 case MVT::i32:
1629 assert(CstVT == MVT::i8);
1630 ShlOp = X86::SHL32ri;
1631
1632 switch (Opcode) {
1633 case ISD::AND: Op = X86::AND32ri8; break;
1634 case ISD::OR: Op = X86::OR32ri8; break;
1635 case ISD::XOR: Op = X86::XOR32ri8; break;
1636 }
1637 break;
1638 case MVT::i64:
1639 assert(CstVT == MVT::i8 || CstVT == MVT::i32);
1640 ShlOp = X86::SHL64ri;
1641
1642 switch (Opcode) {
1643 case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
1644 case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
1645 case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
1646 }
1647 break;
1648 }
1649
1650 // Emit the smaller op and the shift.
1651 SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
1652 SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
1653 return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
1654 getI8Imm(ShlVal));
1655 break;
1656 }
15821657 case X86ISD::UMUL: {
15831658 SDValue N0 = Node->getOperand(0);
15841659 SDValue N1 = Node->getOperand(1);
0 ; RUN: llc < %s -march=x86-64 | FileCheck %s
1 ; PR5039
2
3 define i32 @test1(i32 %x) nounwind {
4 %and = shl i32 %x, 10
5 %shl = and i32 %and, 31744
6 ret i32 %shl
7 ; CHECK: test1:
8 ; CHECK: andl $31
9 ; CHECK: shll $10
10 }
11
12 define i32 @test2(i32 %x) nounwind {
13 %or = shl i32 %x, 10
14 %shl = or i32 %or, 31744
15 ret i32 %shl
16 ; CHECK: test2:
17 ; CHECK: orl $31
18 ; CHECK: shll $10
19 }
20
21 define i32 @test3(i32 %x) nounwind {
22 %xor = shl i32 %x, 10
23 %shl = xor i32 %xor, 31744
24 ret i32 %shl
25 ; CHECK: test3:
26 ; CHECK: xorl $31
27 ; CHECK: shll $10
28 }
29
30 define i64 @test4(i64 %x) nounwind {
31 %and = shl i64 %x, 40
32 %shl = and i64 %and, 264982302294016
33 ret i64 %shl
34 ; CHECK: test4:
35 ; CHECK: andq $241
36 ; CHECK: shlq $40
37 }
38
39 define i64 @test5(i64 %x) nounwind {
40 %and = shl i64 %x, 40
41 %shl = and i64 %and, 34084860461056
42 ret i64 %shl
43 ; CHECK: test5:
44 ; CHECK: andq $31
45 ; CHECK: shlq $40
46 }
47
48 define i64 @test6(i64 %x) nounwind {
49 %and = shl i64 %x, 32
50 %shl = and i64 %and, -281474976710656
51 ret i64 %shl
52 ; CHECK: test6:
53 ; CHECK: andq $-65536
54 ; CHECK: shlq $32
55 }
56
57 define i64 @test7(i64 %x) nounwind {
58 %or = shl i64 %x, 40
59 %shl = or i64 %or, 264982302294016
60 ret i64 %shl
61 ; CHECK: test7:
62 ; CHECK: orq $241
63 ; CHECK: shlq $40
64 }
65
66 define i64 @test8(i64 %x) nounwind {
67 %or = shl i64 %x, 40
68 %shl = or i64 %or, 34084860461056
69 ret i64 %shl
70 ; CHECK: test8:
71 ; CHECK: orq $31
72 ; CHECK: shlq $40
73 }
74
75 define i64 @test9(i64 %x) nounwind {
76 %xor = shl i64 %x, 40
77 %shl = xor i64 %xor, 264982302294016
78 ret i64 %shl
79 ; CHECK: test9:
80 ; CHECK: orq $241
81 ; CHECK: shlq $40
82 }
83
84 define i64 @test10(i64 %x) nounwind {
85 %xor = shl i64 %x, 40
86 %shl = xor i64 %xor, 34084860461056
87 ret i64 %shl
88 ; CHECK: test10:
89 ; CHECK: xorq $31
90 ; CHECK: shlq $40
91 }
92
93 define i64 @test11(i64 %x) nounwind {
94 %xor = shl i64 %x, 33
95 %shl = xor i64 %xor, -562949953421312
96 ret i64 %shl
97 ; CHECK: test11:
98 ; CHECK: xorq $-65536
99 ; CHECK: shlq $33
100 }