llvm.org GIT mirror llvm / 97c0826
ARM: use a pseudo-instruction for cmpxchg at -O0. The fast register-allocator cannot cope with inter-block dependencies without spilling. This is fine for ldrex/strex loops coming from atomicrmw instructions where any value produced within a block is dead by the end, but not for cmpxchg. So we lower a cmpxchg at -O0 via a pseudo-inst that gets expanded after regalloc. Fortunately this is at -O0 so we don't have to care about performance. This simplifies the various axes of expansion considerably: we assume a strong seq_cst operation and ensure ordering via the always-present DMB instructions rather than v8 acquire/release instructions. Should fix the 32-bit part of PR25526. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266679 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 4 years ago
9 changed file(s) with 475 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
1919 #include "ARMConstantPoolValue.h"
2020 #include "ARMMachineFunctionInfo.h"
2121 #include "MCTargetDesc/ARMAddressingModes.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
2223 #include "llvm/CodeGen/MachineFrameInfo.h"
2324 #include "llvm/CodeGen/MachineFunctionPass.h"
2425 #include "llvm/CodeGen/MachineInstrBuilder.h"
6263 void TransferImpOps(MachineInstr &OldMI,
6364 MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
6465 bool ExpandMI(MachineBasicBlock &MBB,
65 MachineBasicBlock::iterator MBBI);
66 MachineBasicBlock::iterator MBBI,
67 MachineBasicBlock::iterator &NextMBBI);
6668 bool ExpandMBB(MachineBasicBlock &MBB);
6769 void ExpandVLD(MachineBasicBlock::iterator &MBBI);
6870 void ExpandVST(MachineBasicBlock::iterator &MBBI);
7173 unsigned Opc, bool IsExt);
7274 void ExpandMOV32BitImm(MachineBasicBlock &MBB,
7375 MachineBasicBlock::iterator &MBBI);
76 bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
77 MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
78 unsigned StrexOp, unsigned UxtOp,
79 MachineBasicBlock::iterator &NextMBBI);
80
81 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
7484 };
7585 char ARMExpandPseudo::ID = 0;
7686 }
741751 MI.eraseFromParent();
742752 }
743753
754 static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
755 for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
756 MBB->addLiveIn(*I);
757 }
758
759 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
760 /// possible. This only gets used at -O0 so we don't care about efficiency of the
761 /// generated code.
762 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
763 MachineBasicBlock::iterator MBBI,
764 unsigned LdrexOp, unsigned StrexOp,
765 unsigned UxtOp,
766 MachineBasicBlock::iterator &NextMBBI) {
767 bool IsThumb = STI->isThumb();
768 MachineInstr &MI = *MBBI;
769 DebugLoc DL = MI.getDebugLoc();
770 MachineOperand &Dest = MI.getOperand(0);
771 unsigned StatusReg = MI.getOperand(1).getReg();
772 MachineOperand &Addr = MI.getOperand(2);
773 MachineOperand &Desired = MI.getOperand(3);
774 MachineOperand &New = MI.getOperand(4);
775
776 LivePhysRegs LiveRegs(&TII->getRegisterInfo());
777 LiveRegs.addLiveOuts(&MBB);
778 for (auto I = std::prev(MBB.end()); I != MBBI; --I)
779 LiveRegs.stepBackward(*I);
780
781 MachineFunction *MF = MBB.getParent();
782 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
783 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
784 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
785
786 MF->insert(++MBB.getIterator(), LoadCmpBB);
787 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
788 MF->insert(++StoreBB->getIterator(), DoneBB);
789
790 if (UxtOp) {
791 MachineInstrBuilder MIB =
792 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg())
793 .addReg(Desired.getReg(), RegState::Kill);
794 if (!IsThumb)
795 MIB.addImm(0);
796 AddDefaultPred(MIB);
797 }
798
799 // .Lloadcmp:
800 // ldrex rDest, [rAddr]
801 // cmp rDest, rDesired
802 // bne .Ldone
803 MBB.addSuccessor(LoadCmpBB);
804 LoadCmpBB->addLiveIn(Addr.getReg());
805 LoadCmpBB->addLiveIn(Dest.getReg());
806 LoadCmpBB->addLiveIn(Desired.getReg());
807 addPostLoopLiveIns(LoadCmpBB, LiveRegs);
808
809 MachineInstrBuilder MIB;
810 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
811 MIB.addReg(Addr.getReg());
812 if (LdrexOp == ARM::t2LDREX)
813 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
814 AddDefaultPred(MIB);
815
816 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
817 AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
818 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
819 .addOperand(Desired));
820 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
821 BuildMI(LoadCmpBB, DL, TII->get(Bcc))
822 .addMBB(DoneBB)
823 .addImm(ARMCC::NE)
824 .addReg(ARM::CPSR, RegState::Kill);
825 LoadCmpBB->addSuccessor(DoneBB);
826 LoadCmpBB->addSuccessor(StoreBB);
827
828 // .Lstore:
829 // strex rStatus, rNew, [rAddr]
830 // cmp rStatus, #0
831 // bne .Lloadcmp
832 StoreBB->addLiveIn(Addr.getReg());
833 StoreBB->addLiveIn(New.getReg());
834 addPostLoopLiveIns(StoreBB, LiveRegs);
835
836
837 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
838 MIB.addOperand(New);
839 MIB.addOperand(Addr);
840 if (StrexOp == ARM::t2STREX)
841 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
842 AddDefaultPred(MIB);
843
844 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
845 AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
846 .addReg(StatusReg, RegState::Kill)
847 .addImm(0));
848 BuildMI(StoreBB, DL, TII->get(Bcc))
849 .addMBB(LoadCmpBB)
850 .addImm(ARMCC::NE)
851 .addReg(ARM::CPSR, RegState::Kill);
852 StoreBB->addSuccessor(LoadCmpBB);
853 StoreBB->addSuccessor(DoneBB);
854
855 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
856 DoneBB->transferSuccessors(&MBB);
857 addPostLoopLiveIns(DoneBB, LiveRegs);
858
859 NextMBBI = MBB.end();
860 MI.eraseFromParent();
861 return true;
862 }
863
864 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
865 /// single GPRPair register), Thumb's take two separate registers so we need to
866 /// extract the subregs from the pair.
867 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
868 unsigned Flags, bool IsThumb,
869 const TargetRegisterInfo *TRI) {
870 if (IsThumb) {
871 unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
872 unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
873 MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
874 MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
875 } else
876 MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
877 }
878
879 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
880 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
881 MachineBasicBlock::iterator MBBI,
882 MachineBasicBlock::iterator &NextMBBI) {
883 bool IsThumb = STI->isThumb();
884 MachineInstr &MI = *MBBI;
885 DebugLoc DL = MI.getDebugLoc();
886 MachineOperand &Dest = MI.getOperand(0);
887 unsigned StatusReg = MI.getOperand(1).getReg();
888 MachineOperand &Addr = MI.getOperand(2);
889 MachineOperand &Desired = MI.getOperand(3);
890 MachineOperand &New = MI.getOperand(4);
891
892 unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
893 unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
894 unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
895 unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
896
897 LivePhysRegs LiveRegs(&TII->getRegisterInfo());
898 LiveRegs.addLiveOuts(&MBB);
899 for (auto I = std::prev(MBB.end()); I != MBBI; --I)
900 LiveRegs.stepBackward(*I);
901
902 MachineFunction *MF = MBB.getParent();
903 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
904 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
905 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
906
907 MF->insert(++MBB.getIterator(), LoadCmpBB);
908 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
909 MF->insert(++StoreBB->getIterator(), DoneBB);
910
911 // .Lloadcmp:
912 // ldrexd rDestLo, rDestHi, [rAddr]
913 // cmp rDestLo, rDesiredLo
914 // sbcs rStatus, rDestHi, rDesiredHi
915 // bne .Ldone
916 MBB.addSuccessor(LoadCmpBB);
917 LoadCmpBB->addLiveIn(Addr.getReg());
918 LoadCmpBB->addLiveIn(Dest.getReg());
919 LoadCmpBB->addLiveIn(Desired.getReg());
920 addPostLoopLiveIns(LoadCmpBB, LiveRegs);
921
922 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
923 MachineInstrBuilder MIB;
924 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
925 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
926 MIB.addReg(Addr.getReg());
927 AddDefaultPred(MIB);
928
929 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
930 AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
931 .addReg(DestLo, getKillRegState(Dest.isDead()))
932 .addReg(DesiredLo, getKillRegState(Desired.isDead())));
933
934 unsigned SBCrr = IsThumb ? ARM::t2SBCrr : ARM::SBCrr;
935 MIB = BuildMI(LoadCmpBB, DL, TII->get(SBCrr))
936 .addReg(StatusReg, RegState::Define | RegState::Dead)
937 .addReg(DestHi, getKillRegState(Dest.isDead()))
938 .addReg(DesiredHi, getKillRegState(Desired.isDead()));
939 AddDefaultPred(MIB);
940 MIB.addReg(ARM::CPSR, RegState::Kill);
941
942 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
943 BuildMI(LoadCmpBB, DL, TII->get(Bcc))
944 .addMBB(DoneBB)
945 .addImm(ARMCC::NE)
946 .addReg(ARM::CPSR, RegState::Kill);
947 LoadCmpBB->addSuccessor(DoneBB);
948 LoadCmpBB->addSuccessor(StoreBB);
949
950 // .Lstore:
951 // strexd rStatus, rNewLo, rNewHi, [rAddr]
952 // cmp rStatus, #0
953 // bne .Lloadcmp
954 StoreBB->addLiveIn(Addr.getReg());
955 StoreBB->addLiveIn(New.getReg());
956 addPostLoopLiveIns(StoreBB, LiveRegs);
957
958 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
959 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
960 addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
961 MIB.addOperand(Addr);
962 AddDefaultPred(MIB);
963
964 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
965 AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
966 .addReg(StatusReg, RegState::Kill)
967 .addImm(0));
968 BuildMI(StoreBB, DL, TII->get(Bcc))
969 .addMBB(LoadCmpBB)
970 .addImm(ARMCC::NE)
971 .addReg(ARM::CPSR, RegState::Kill);
972 StoreBB->addSuccessor(LoadCmpBB);
973 StoreBB->addSuccessor(DoneBB);
974
975 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
976 DoneBB->transferSuccessors(&MBB);
977 addPostLoopLiveIns(DoneBB, LiveRegs);
978
979 NextMBBI = MBB.end();
980 MI.eraseFromParent();
981 return true;
982 }
983
984
744985 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
745 MachineBasicBlock::iterator MBBI) {
986 MachineBasicBlock::iterator MBBI,
987 MachineBasicBlock::iterator &NextMBBI) {
746988 MachineInstr &MI = *MBBI;
747989 unsigned Opcode = MI.getOpcode();
748990 switch (Opcode) {
13791621 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
13801622 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
13811623 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
1624
1625 case ARM::CMP_SWAP_8:
1626 if (STI->isThumb())
1627 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB,
1628 ARM::tUXTB, NextMBBI);
1629 else
1630 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB,
1631 ARM::UXTB, NextMBBI);
1632 case ARM::CMP_SWAP_16:
1633 if (STI->isThumb())
1634 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH,
1635 ARM::tUXTH, NextMBBI);
1636 else
1637 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH,
1638 ARM::UXTH, NextMBBI);
1639 case ARM::CMP_SWAP_32:
1640 if (STI->isThumb())
1641 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
1642 NextMBBI);
1643 else
1644 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
1645
1646 case ARM::CMP_SWAP_64:
1647 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
13821648 }
13831649 }
13841650
13881654 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
13891655 while (MBBI != E) {
13901656 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1391 Modified |= ExpandMI(MBB, MBBI);
1657 Modified |= ExpandMI(MBB, MBBI, NMBBI);
13921658 MBBI = NMBBI;
13931659 }
13941660
251251 SDNode *SelectConcatVector(SDNode *N);
252252
253253 SDNode *SelectSMLAWSMULW(SDNode *N);
254
255 SDNode *SelectCMP_SWAP(SDNode *N);
254256
255257 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
256258 /// inline asm expressions.
25932595 CurDAG->getRegister(0, MVT::i32)};
25942596 return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
25952597 }
2598 return nullptr;
2599 }
2600
2601 /// We've got special pseudo-instructions for these
2602 SDNode *ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
2603 unsigned Opcode;
2604 EVT MemTy = cast(N)->getMemoryVT();
2605 if (MemTy == MVT::i8)
2606 Opcode = ARM::CMP_SWAP_8;
2607 else if (MemTy == MVT::i16)
2608 Opcode = ARM::CMP_SWAP_16;
2609 else if (MemTy == MVT::i32)
2610 Opcode = ARM::CMP_SWAP_32;
2611 else
2612 llvm_unreachable("Unknown AtomicCmpSwap type");
2613
2614 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
2615 N->getOperand(0)};
2616 SDNode *CmpSwap = CurDAG->getMachineNode(
2617 Opcode, SDLoc(N),
2618 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
2619
2620 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
2621 MemOp[0] = cast(N)->getMemOperand();
2622 cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
2623
2624 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
2625 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
25962626 return nullptr;
25972627 }
25982628
34923522
34933523 case ISD::CONCAT_VECTORS:
34943524 return SelectConcatVector(N);
3525
3526 case ISD::ATOMIC_CMP_SWAP:
3527 return SelectCMP_SWAP(N);
34953528 }
34963529
34973530 return SelectCode(N);
849849 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
850850 // to ldrex/strex loops already.
851851 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
852 if (!Subtarget->isThumb() || !Subtarget->isMClass())
853 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
852854
853855 // On v8, we have particularly efficient implementations of atomic fences
854856 // if they can be combined with nearby atomic loads and stores.
855 if (!Subtarget->hasV8Ops()) {
857 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
856858 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
857859 InsertFencesForAtomic = true;
858860 }
69666968 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
69676969 DAG.getConstant(0, DL, MVT::i32)));
69686970 Results.push_back(Cycles32.getValue(1));
6971 }
6972
6973 static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V0, SDValue V1) {
6974 SDLoc dl(V0.getNode());
6975 SDValue RegClass =
6976 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
6977 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
6978 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
6979 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
6980 return SDValue(
6981 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
6982 }
6983
6984 static void ReplaceCMP_SWAP_64Results(SDNode *N,
6985 SmallVectorImpl & Results,
6986 SelectionDAG &DAG) {
6987 assert(N->getValueType(0) == MVT::i64 &&
6988 "AtomicCmpSwap on types less than 64 should be legal");
6989 SDValue Ops[] = {N->getOperand(1),
6990 createGPRPairNode(DAG, N->getOperand(2)->getOperand(0),
6991 N->getOperand(2)->getOperand(1)),
6992 createGPRPairNode(DAG, N->getOperand(3)->getOperand(0),
6993 N->getOperand(3)->getOperand(1)),
6994 N->getOperand(0)};
6995 SDNode *CmpSwap = DAG.getMachineNode(
6996 ARM::CMP_SWAP_64, SDLoc(N),
6997 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
6998
6999 MachineFunction &MF = DAG.getMachineFunction();
7000 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7001 MemOp[0] = cast(N)->getMemOperand();
7002 cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7003
7004 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7005 SDValue(CmpSwap, 0)));
7006 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7007 SDValue(CmpSwap, 0)));
7008 Results.push_back(SDValue(CmpSwap, 2));
69697009 }
69707010
69717011 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
70967136 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
70977137 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
70987138 Results);
7139 case ISD::ATOMIC_CMP_SWAP:
7140 ReplaceCMP_SWAP_64Results(N, Results, DAG);
7141 return;
70997142 }
71007143 if (Res.getNode())
71017144 Results.push_back(Res);
1215412197
1215512198 bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
1215612199 AtomicCmpXchgInst *AI) const {
12157 return true;
12200 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
12201 // implement cmpxchg without spilling. If the address being exchanged is also
12202 // on the stack and close enough to the spill slot, this can lead to a
12203 // situation where the monitor always gets cleared and the atomic operation
12204 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
12205 return getTargetMachine().getOptLevel() != 0;
1215812206 }
1215912207
1216012208 bool ARMTargetLowering::shouldInsertFencesForAtomic(
57925792 def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
57935793 NoItinerary,
57945794 [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
5795
5796 //===----------------------------------
5797 // Atomic cmpxchg for -O0
5798 //===----------------------------------
5799
5800 // The fast register allocator used during -O0 inserts spills to cover any VRegs
5801 // live across basic block boundaries. When this happens between an LDXR and an
5802 // STXR it can clear the exclusive monitor, causing all cmpxchg attempts to
5803 // fail.
5804
5805 // Unfortunately, this means we have to have an alternative (expanded
5806 // post-regalloc) path for -O0 compilations. Fortunately this path can be
5807 // significantly more naive than the standard expansion: we conservatively
5808 // assume seq_cst, strong cmpxchg and omit clrex on failure.
5809
5810 let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
5811 mayLoad = 1, mayStore = 1 in {
5812 def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
5813 (ins GPR:$addr, GPR:$desired, GPR:$new),
5814 NoItinerary, []>, Sched<[]>;
5815
5816 def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
5817 (ins GPR:$addr, GPR:$desired, GPR:$new),
5818 NoItinerary, []>, Sched<[]>;
5819
5820 def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
5821 (ins GPR:$addr, GPR:$desired, GPR:$new),
5822 NoItinerary, []>, Sched<[]>;
5823
5824 def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
5825 (ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
5826 NoItinerary, []>, Sched<[]>;
5827 }
0 ; RUN: llc -verify-machineinstrs -mtriple=armv7-linux-gnu -O0 %s -o - | FileCheck %s
1 ; RUN: llc -verify-machineinstrs -mtriple=thumbv8-linux-gnu -O0 %s -o - | FileCheck %s
2 ; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi -O0 %s -o - | FileCheck %s --check-prefix=CHECK-T1
3
4 ; CHECK-T1-NOT: ldrex
5 ; CHECK-T1-NOT: strex
6
7 define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
8 ; CHECK-LABEL: test_cmpxchg_8:
9 ; CHECK: dmb ish
10 ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
11 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
12 ; CHECK: ldrexb [[OLD:r[0-9]+]], [r0]
13 ; CHECK: cmp [[OLD]], [[DESIRED]]
14 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
15 ; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0]
16 ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
17 ; CHECK: bne [[RETRY]]
18 ; CHECK: [[DONE]]:
19 ; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
20 ; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
21 ; CHECK: dmb ish
22 %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
23 ret { i8, i1 } %res
24 }
25
26 define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
27 ; CHECK-LABEL: test_cmpxchg_16:
28 ; CHECK: dmb ish
29 ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
30 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
31 ; CHECK: ldrexh [[OLD:r[0-9]+]], [r0]
32 ; CHECK: cmp [[OLD]], [[DESIRED]]
33 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
34 ; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0]
35 ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
36 ; CHECK: bne [[RETRY]]
37 ; CHECK: [[DONE]]:
38 ; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
39 ; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
40 ; CHECK: dmb ish
41 %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
42 ret { i16, i1 } %res
43 }
44
45 define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
46 ; CHECK-LABEL: test_cmpxchg_32:
47 ; CHECK: dmb ish
48 ; CHECK-NOT: uxt
49 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
50 ; CHECK: ldrex [[OLD:r[0-9]+]], [r0]
51 ; CHECK: cmp [[OLD]], [[DESIRED]]
52 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
53 ; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
54 ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
55 ; CHECK: bne [[RETRY]]
56 ; CHECK: [[DONE]]:
57 ; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
58 ; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
59 ; CHECK: dmb ish
60 %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
61 ret { i32, i1 } %res
62 }
63
64 define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
65 ; CHECK-LABEL: test_cmpxchg_64:
66 ; CHECK: dmb ish
67 ; CHECK-NOT: uxt
68 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
69 ; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
70 ; CHECK: cmp [[OLDLO]], r6
71 ; CHECK: sbcs{{(\.w)?}} [[STATUS:r[0-9]+]], [[OLDHI]], r7
72 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
73 ; CHECK: strexd [[STATUS]], r4, r5, [r0]
74 ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
75 ; CHECK: bne [[RETRY]]
76 ; CHECK: [[DONE]]:
77 ; CHECK: dmb ish
78 %res = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst monotonic
79 ret { i64, i1 } %res
80 }
None ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
0 ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand -codegen-opt-level=1 %s | FileCheck %s
11
22 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
33 ; CHECK-LABEL: @test_atomic_xchg_i8
None ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
0 ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s -codegen-opt-level=1 | FileCheck %s
11
22 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
33 ; CHECK-LABEL: @test_atomic_xchg_i8
None ; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
0 ; RUN: opt -atomic-expand -codegen-opt-level=1 -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
11
22 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
33 ; CHECK-LABEL: @test_cmpxchg_seq_cst
135135 OptLevelO3("O3",
136136 cl::desc("Optimization level 3. Similar to clang -O3"));
137137
138 static cl::opt
139 CodeGenOptLevel("codegen-opt-level",
140 cl::desc("Override optimization level for codegen hooks"));
141
138142 static cl::opt
139143 TargetTriple("mtriple", cl::desc("Override target triple for module"));
140144
271275 //
272276
273277 static CodeGenOpt::Level GetCodeGenOptLevel() {
278 if (CodeGenOptLevel.getNumOccurrences())
279 return static_cast(unsigned(CodeGenOptLevel));
274280 if (OptLevelO1)
275281 return CodeGenOpt::Less;
276282 if (OptLevelO2)