llvm.org GIT mirror llvm / 02e89ac
ARMLoadStoreOptimizer: Create LDRD/STRD on thumb2 Differential Revision: http://reviews.llvm.org/D10623 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241926 91177308-0d34-0410-b5e6-96231b3b80d8 Matthias Braun 5 years ago
7 changed file(s) with 133 addition(s) and 46 deletion(s). Raw diff Collapse all Expand all
110110 /// Index into the basic block where the merged instruction will be
111111 /// inserted. (See MemOpQueueEntry.Position)
112112 unsigned InsertPos;
113 /// Whether the instructions can be merged into a ldm/stm instruction.
114 bool CanMergeToLSMulti;
115 /// Whether the instructions can be merged into a ldrd/strd instruction.
116 bool CanMergeToLSDouble;
113117 };
114118 BumpPtrAllocator Allocator;
115119 SmallVector Candidates;
121125 MachineBasicBlock::iterator MBBI,
122126 DebugLoc DL, unsigned Base, unsigned WordOffset,
123127 ARMCC::CondCodes Pred, unsigned PredReg);
124 MachineInstr *MergeOps(MachineBasicBlock &MBB,
125 MachineBasicBlock::iterator InsertBefore, int Offset,
126 unsigned Base, bool BaseKill, unsigned Opcode,
127 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
128 ArrayRef> Regs);
128 MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
129 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
130 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
131 DebugLoc DL, ArrayRef> Regs);
132 MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
133 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
134 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
135 DebugLoc DL, ArrayRef> Regs) const;
129136 void FormCandidates(const MemOpQueue &MemOps);
130137 MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
131138 bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
554561 /// Create and insert a LDM or STM with Base as base register and registers in
555562 /// Regs as the register operands that would be loaded / stored. It returns
556563 /// true if the transformation is done.
557 MachineInstr *
558 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
559 MachineBasicBlock::iterator InsertBefore, int Offset,
560 unsigned Base, bool BaseKill, unsigned Opcode,
561 ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
562 ArrayRef> Regs) {
564 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
565 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
566 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
567 DebugLoc DL, ArrayRef> Regs) {
563568 unsigned NumRegs = Regs.size();
564569 assert(NumRegs > 1);
565570
748753 return MIB.getInstr();
749754 }
750755
756 MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
757 MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
758 bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
759 DebugLoc DL, ArrayRef> Regs) const {
760 bool IsLoad = isi32Load(Opcode);
761 assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
762 unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
763
764 assert(Regs.size() == 2);
765 MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
766 TII->get(LoadStoreOpcode));
767 if (IsLoad) {
768 MIB.addReg(Regs[0].first, RegState::Define)
769 .addReg(Regs[1].first, RegState::Define);
770 } else {
771 MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
772 .addReg(Regs[1].first, getKillRegState(Regs[1].second));
773 }
774 MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
775 return MIB.getInstr();
776 }
777
751778 /// Call MergeOps and update MemOps and merges accordingly on success.
752779 MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
753780 const MachineInstr *First = Cand.Instrs.front();
796823 unsigned PredReg = 0;
797824 ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
798825 DebugLoc DL = First->getDebugLoc();
799 MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
826 MachineInstr *Merged = nullptr;
827 if (Cand.CanMergeToLSDouble)
828 Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
829 Opcode, Pred, PredReg, DL, Regs);
830 if (!Merged && Cand.CanMergeToLSMulti)
831 Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
800832 Opcode, Pred, PredReg, DL, Regs);
801833 if (!Merged)
802834 return nullptr;
856888 }
857889
858890 return Merged;
891 }
892
893 static bool isValidLSDoubleOffset(int Offset) {
894 unsigned Value = abs(Offset);
895 // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
896 // multiplied by 4.
897 return (Value % 4) == 0 && Value < 1024;
859898 }
860899
861900 /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
896935 unsigned Latest = SIndex;
897936 unsigned Earliest = SIndex;
898937 unsigned Count = 1;
899
900 // Merge additional instructions fulfilling LDM/STM constraints.
938 bool CanMergeToLSDouble =
939 STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
940 // ARM errata 602117: LDRD with base in list may result in incorrect base
941 // register when interrupted or faulted.
942 if (STI->isCortexM3() && isi32Load(Opcode) &&
943 PReg == getLoadStoreBaseOp(*MI).getReg())
944 CanMergeToLSDouble = false;
945
946 bool CanMergeToLSMulti = true;
947 // On swift vldm/vstm starting with an odd register number as that needs
948 // more uops than single vldrs.
949 if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
950 CanMergeToLSMulti = false;
951
952 // Merge following instructions where possible.
901953 for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
902954 int NewOffset = MemOps[I].Offset;
903955 if (NewOffset != Offset + (int)Size)
904956 break;
905957 const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
906958 unsigned Reg = MO.getReg();
907 if (Reg == ARM::SP)
959 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
960
961 // See if the current load/store may be part of a multi load/store.
962 bool PartOfLSMulti = CanMergeToLSMulti;
963 if (PartOfLSMulti) {
964 // Cannot load from SP
965 if (Reg == ARM::SP)
966 PartOfLSMulti = false;
967 // Register numbers must be in ascending order.
968 else if (RegNum <= PRegNum)
969 PartOfLSMulti = false;
970 // For VFP / NEON load/store multiples, the registers must be
971 // consecutive and within the limit on the number of registers per
972 // instruction.
973 else if (!isNotVFP && RegNum != PRegNum+1)
974 PartOfLSMulti = false;
975 }
976 // See if the current load/store may be part of a double load/store.
977 bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
978
979 if (!PartOfLSMulti && !PartOfLSDouble)
908980 break;
909 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
910 // Register numbers must be in ascending order.
911 if (RegNum <= PRegNum)
912 break;
913 // For VFP / NEON load/store multiples, the registers must be consecutive
914 // and within the limit on the number of registers per instruction.
915 if (!isNotVFP && RegNum != PRegNum+1)
916 break;
917 // On Swift we don't want vldm/vstm to start with a odd register num
918 // because Q register unaligned vldm/vstm need more uops.
919 if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
920 break;
921
981 CanMergeToLSMulti &= PartOfLSMulti;
982 CanMergeToLSDouble &= PartOfLSDouble;
922983 // Track MemOp with latest and earliest position (Positions are
923984 // counted in reverse).
924985 unsigned Position = MemOps[I].Position;
938999 Candidate->LatestMIIdx = Latest - SIndex;
9391000 Candidate->EarliestMIIdx = Earliest - SIndex;
9401001 Candidate->InsertPos = MemOps[Latest].Position;
1002 if (Count == 1)
1003 CanMergeToLSMulti = CanMergeToLSDouble = false;
1004 Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1005 Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
9411006 Candidates.push_back(Candidate);
9421007 // Continue after the chain.
9431008 SIndex += Count;
16761741 // Go through list of candidates and merge.
16771742 bool Changed = false;
16781743 for (const MergeCandidate *Candidate : Candidates) {
1679 if (Candidate->Instrs.size() > 1) {
1744 if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
16801745 MachineInstr *Merged = MergeOpsUpdate(*Candidate);
16811746 // Merge preceding/trailing base inc/dec into the merged op.
16821747 if (Merged) {
1683 MergeBaseUpdateLSMultiple(Merged);
16841748 Changed = true;
1749 unsigned Opcode = Merged->getOpcode();
1750 if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
1751 MergeBaseUpdateLSMultiple(Merged);
16851752 } else {
16861753 for (MachineInstr *MI : Candidate->Instrs) {
16871754 if (MergeBaseUpdateLoadStore(MI))
2424 ;CHECK: push {r7, lr}
2525 ;CHECK: sub sp, #4
2626 ;CHECK: add r0, sp, #12
27 ;CHECK: str r2, [sp, #16]
28 ;CHECK: str r1, [sp, #12]
27 ;CHECK: strd r1, r2, [sp, #12]
2928 ;CHECK: bl fooUseStruct
3029 call void @fooUseStruct(%st_t* %p1)
3130 ret void
2727 ; CHECK: push {r4, r7, lr}
2828 ; CHECK: add r7, sp, #4
2929
30 ; CHECK-DAG: str r2, [r7, #8]
31 ; CHECK-DAG: str r3, [r7, #12]
30 ; CHECK: strd r2, r3, [r7, #8]
3231
3332 ; CHECK: ldr r0, [r7, #8]
3433
22 ; rdar://6949835
33 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
44 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK
5 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -check-prefix=CHECK
56
67 ; Magic ARM pair hints works best with linearscan / fast.
78
109110 ret void
110111 }
111112
113 ; CHECK-LABEL: strd_spill_ldrd_reload:
114 ; A8: strd r1, r0, [sp]
115 ; M3: strd r1, r0, [sp]
116 ; BASIC: strd r1, r0, [sp]
117 ; GREEDY: strd r0, r1, [sp]
118 ; CHECK: @ InlineAsm Start
119 ; CHECK: @ InlineAsm End
120 ; A8: ldrd r2, r1, [sp]
121 ; M3: ldrd r2, r1, [sp]
122 ; BASIC: ldrd r2, r1, [sp]
123 ; GREEDY: ldrd r1, r2, [sp]
124 ; CHECK: bl{{x?}} _extfunc
125 define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
126 ; force %v0 and %v1 to be spilled
127 call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{lr}"()
128 ; force the reloaded %v0, %v1 into different registers
129 call void @extfunc(i32 0, i32 %v0, i32 %v1, i32 7)
130 ret void
131 }
132
112133 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
113134 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
33 entry:
44 ; CHECK-LABEL: t1:
55 ; CHECK: movs r1, #0
6 ; CHECK: str r1, [r0]
7 ; CHECK: str r1, [r0, #4]
6 ; CHECK: strd r1, r1, [r0]
87 ; CHECK: str r1, [r0, #8]
98 call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
109 ret void
44
55 declare i8* @llvm.returnaddress(i32)
66
7 define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0) minsize {
7 define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0, i32 %val1) minsize {
88 store i32 %val0, i32* %addr
99 %addr1 = getelementptr i32, i32* %addr, i32 1
10 %addr2 = getelementptr i32, i32* %addr, i32 2
1011 %lr = call i8* @llvm.returnaddress(i32 0)
1112 %lr32 = ptrtoint i8* %lr to i32
12 store i32 %lr32, i32* %addr1
13 %addr2 = getelementptr i32, i32* %addr1, i32 1
14 ret i32* %addr2
13 store i32 %val1, i32* %addr1
14 store i32 %lr32, i32* %addr2
15
16 %addr3 = getelementptr i32, i32* %addr, i32 3
17 ret i32* %addr3
1518 }
1619
17 ; Check that stm writes two registers. The bug caused one of registers (LR,
20 ; Check that stm writes three registers. The bug caused one of registers (LR,
1821 ; which invalid for Thumb1 form of STMIA instruction) to be dropped.
19 ; CHECK: stm{{[^,]*}}, {{{.*,.*}}}
22 ; CHECK-LABEL: wrong-t2stmia-size-reduction:
23 ; CHECK: stm{{[^,]*}}, {{{.*,.*,.*}}}
3232
3333 define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
3434 ; CHECK-LABEL: double_on_stack:
35 ; SOFT: ldr r0, [sp, #48]
36 ; SOFT: ldr r1, [sp, #52]
35 ; SOFT: ldrd r0, r1, [sp, #48]
3736 ; HARD: vldr d0, [sp]
3837 ; CHECK-NEXT: bx lr
3938 ret double %i
4140
4241 define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
4342 ; CHECK-LABEL: double_not_split:
44 ; SOFT: ldr r0, [sp, #48]
45 ; SOFT: ldr r1, [sp, #52]
43 ; SOFT: ldrd r0, r1, [sp, #48]
4644 ; HARD: vldr d0, [sp]
4745 ; CHECK-NEXT: bx lr
4846 ret double %i