llvm.org GIT mirror llvm / 14cceb3
[Thumb] Reapply r272251 with a fix for PR28348 (mk 2) The important thing I was missing was ensuring newly added constants were kept in topological order. Repositioning the node is correct if the constant is newly added (so it has no topological ordering) but wrong if it already existed - positioning it next in the worklist would break the topological ordering. Original commit message: [Thumb] Select a BIC instead of AND if the immediate can be encoded more optimally negated If an immediate is only used in an AND node, it is possible that the immediate can be more optimally materialized when negated. If this is the case, we can negate the immediate and use a BIC instead; int i(int a) { return a & 0xfffffeec; } Used to produce: ldr r1, [CONSTPOOL] ands r0, r1 CONSTPOOL: 0xfffffeec And now produces: movs r1, #255 adds r1, #20 ; Less costly immediate generation bics r0, r1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@274543 91177308-0d34-0410-b5e6-96231b3b80d8 James Molloy 4 years ago
3 changed file(s) with 86 addition(s) and 1 deletion(s). Raw diff Collapse all Expand all
28192819 if (tryV6T2BitfieldExtractOp(N, false))
28202820 return;
28212821
2822 // If an immediate is used in an AND node, it is possible that the immediate
2823 // can be more optimally materialized when negated. If this is the case we
2824 // can negate the immediate and use a BIC instead.
2825 auto *N1C = dyn_cast(N->getOperand(1));
2826 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
2827 uint32_t Imm = (uint32_t) N1C->getZExtValue();
2828
2829 // In Thumb2 mode, an AND can take a 12-bit immediate. If this
2830 // immediate can be negated and fit in the immediate operand of
2831 // a t2BIC, don't do any manual transform here as this can be
2832 // handled by the generic ISel machinery.
2833 bool PreferImmediateEncoding =
2834 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
2835 if (!PreferImmediateEncoding &&
2836 ConstantMaterializationCost(Imm) >
2837 ConstantMaterializationCost(~Imm)) {
2838 // The current immediate costs more to materialize than a negated
2839 // immediate, so negate the immediate and use a BIC.
2840 SDValue NewImm =
2841 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
2842 // If the new constant didn't exist before, reposition it in the topological
2843 // ordering so it is just before N. Otherwise, don't touch its location.
2844 if (NewImm->getNodeId() == -1)
2845 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
2846
2847 if (!Subtarget->hasThumb2()) {
2848 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
2849 N->getOperand(0), NewImm, getAL(CurDAG, dl),
2850 CurDAG->getRegister(0, MVT::i32)};
2851 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
2852 return;
2853 } else {
2854 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
2855 CurDAG->getRegister(0, MVT::i32),
2856 CurDAG->getRegister(0, MVT::i32)};
2857 ReplaceNode(N,
2858 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
2859 return;
2860 }
2861 }
2862 }
2863
28222864 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
28232865 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
28242866 // are entirely contributed by c2 and lower 16-bits are entirely contributed
28332875 if (!Opc)
28342876 break;
28352877 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2836 ConstantSDNode *N1C = dyn_cast(N1);
2878 N1C = dyn_cast(N1);
28372879 if (!N1C)
28382880 break;
28392881 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
0 ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m0 -verify-machineinstrs | FileCheck --check-prefix CHECK-T1 %s
1 ; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-m3 -verify-machineinstrs | FileCheck --check-prefix CHECK-T2 %s
2
3 ; CHECK-T1-LABEL: @i
4 ; CHECK-T2-LABEL: @i
5 ; CHECK-T1: movs r1, #255
6 ; CHECK-T1: adds r1, #20
7 ; CHECK-T1: bics r0, r1
8 ; CHECK-T2: movw r1, #275
9 ; CHECK-T2: bics r0, r1
10 define i32 @i(i32 %a) {
11 entry:
12 %and = and i32 %a, -276
13 ret i32 %and
14 }
15
16 ; CHECK-T1-LABEL: @j
17 ; CHECK-T2-LABEL: @j
18 ; CHECK-T1: movs r1, #128
19 ; CHECK-T1: bics r0, r1
20 ; CHECK-T2: bic r0, r0, #128
21 define i32 @j(i32 %a) {
22 entry:
23 %and = and i32 %a, -129
24 ret i32 %and
25 }
0 ; RUN: llc < %s | FileCheck %s
1
2 target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
3 target triple = "thumbv7--linux-gnueabihf"
4
5 ; CHECK-LABEL: f:
6 ; CHECK: bic
7 define void @f(i32* nocapture %b, i32* nocapture %c, i32 %a) {
8 %1 = and i32 %a, -4096
9 store i32 %1, i32* %c, align 4
10 %2 = and i32 %a, 4095
11 %3 = or i32 %2, 4096
12 %4 = load i32, i32* %b, align 4
13 %5 = add nsw i32 %4, %3
14 store i32 %5, i32* %b, align 4
15 ret void
16 }