llvm.org GIT mirror llvm / 5c8b83e
X86: expand atomics in IR instead of as MachineInstrs. The logic for expanding atomics that aren't natively supported in terms of cmpxchg loops is much simpler to express at the IR level. It also allows the normal optimisations and CodeGen improvements to help out with atomics, instead of using a limited set of possible instructions.. rdar://problem/13496295 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212119 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
21 changed file(s) with 960 addition(s) and 1277 deletion(s). Raw diff Collapse all Expand all
1313
1414 set(sources
1515 X86AsmPrinter.cpp
16 X86AtomicExpandPass.cpp
1617 X86CodeEmitter.cpp
1718 X86FastISel.cpp
1819 X86FloatingPoint.cpp
2222 class ImmutablePass;
2323 class JITCodeEmitter;
2424 class X86TargetMachine;
25
26 /// createX86AtomicExpandPass - This pass expands atomic operations that cannot
27 /// be handled natively in terms of a loop using cmpxchg.
28 FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
2529
2630 /// createX86ISelDag - This pass converts a legalized DAG into a
2731 /// X86-specific DAG, ready for instruction scheduling.
0 //===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions which
10 // cannot be implemented as a single instruction with cmpxchg-based loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "X86.h"
15 #include "X86TargetMachine.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetLowering.h"
24 #include "llvm/Target/TargetMachine.h"
25 using namespace llvm;
26
27 #define DEBUG_TYPE "x86-atomic-expand"
28
29 namespace {
30 class X86AtomicExpandPass : public FunctionPass {
31 const X86TargetMachine *TM;
32 public:
33 static char ID; // Pass identification, replacement for typeid
34 explicit X86AtomicExpandPass(const X86TargetMachine *TM)
35 : FunctionPass(ID), TM(TM) {}
36
37 bool runOnFunction(Function &F) override;
38 bool expandAtomicInsts(Function &F);
39
40 bool needsCmpXchgNb(Type *MemType);
41
42 /// There are four kinds of atomic operations. Two never need expanding:
43 /// cmpxchg is what we expand the others *to*, and loads are easily handled
44 /// by ISelLowering. Atomicrmw and store can need expanding in some
45 /// circumstances.
46 bool shouldExpand(Instruction *Inst);
47
48 /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
49 /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
50 bool shouldExpandStore(StoreInst *SI);
51
52 /// Only some atomicrmw instructions need expanding -- some operations
53 /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
54 /// limited support but can't return the previous value; some (e.g. add)
55 /// have complete support in the instruction set.
56 ///
57 /// Also, naturally, 128-bit operations always need to be expanded.
58 bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
59
60 bool expandAtomicRMW(AtomicRMWInst *AI);
61 bool expandAtomicStore(StoreInst *SI);
62 };
63 }
64
65 char X86AtomicExpandPass::ID = 0;
66
67 FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
68 return new X86AtomicExpandPass(TM);
69 }
70
71 bool X86AtomicExpandPass::runOnFunction(Function &F) {
72 SmallVector AtomicInsts;
73
74 // Changing control-flow while iterating through it is a bad idea, so gather a
75 // list of all atomic instructions before we start.
76 for (BasicBlock &BB : F)
77 for (Instruction &Inst : BB) {
78 if (isa(&Inst) ||
79 (isa(&Inst) && cast(&Inst)->isAtomic()))
80 AtomicInsts.push_back(&Inst);
81 }
82
83 bool MadeChange = false;
84 for (Instruction *Inst : AtomicInsts) {
85 if (!shouldExpand(Inst))
86 continue;
87
88 if (AtomicRMWInst *AI = dyn_cast(Inst))
89 MadeChange |= expandAtomicRMW(AI);
90 if (StoreInst *SI = dyn_cast(Inst))
91 MadeChange |= expandAtomicStore(SI);
92 }
93
94 return MadeChange;
95 }
96
97 /// Returns true if operations on the given type will need to use either
98 /// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
99 /// native width, and the instructions are available (otherwise we leave them
100 /// alone to become __sync_fetch_and_... calls).
101 bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
102 const X86Subtarget &Subtarget = TM->getSubtarget();
103 if (!Subtarget.hasCmpxchg16b())
104 return false;
105
106 unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
107
108 unsigned OpWidth = MemType->getPrimitiveSizeInBits();
109 if (OpWidth == CmpXchgNbWidth)
110 return true;
111
112 return false;
113 }
114
115
116 bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
117 const X86Subtarget &Subtarget = TM->getSubtarget();
118 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
119
120 if (needsCmpXchgNb(AI->getType()))
121 return true;
122
123 if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
124 return false;
125
126 AtomicRMWInst::BinOp Op = AI->getOperation();
127 switch (Op) {
128 default:
129 llvm_unreachable("Unknown atomic operation");
130 case AtomicRMWInst::Xchg:
131 case AtomicRMWInst::Add:
132 case AtomicRMWInst::Sub:
133 // It's better to use xadd, xsub or xchg for these in all cases.
134 return false;
135 case AtomicRMWInst::Or:
136 case AtomicRMWInst::And:
137 case AtomicRMWInst::Xor:
138 // If the atomicrmw's result isn't actually used, we can just add a "lock"
139 // prefix to a normal instruction for these operations.
140 return !AI->use_empty();
141 case AtomicRMWInst::Nand:
142 case AtomicRMWInst::Max:
143 case AtomicRMWInst::Min:
144 case AtomicRMWInst::UMax:
145 case AtomicRMWInst::UMin:
146 // These always require a non-trivial set of data operations on x86. We must
147 // use a cmpxchg loop.
148 return true;
149 }
150 }
151
152 bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
153 if (needsCmpXchgNb(SI->getValueOperand()->getType()))
154 return true;
155
156 return false;
157 }
158
159 bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
160 if (AtomicRMWInst *AI = dyn_cast(Inst))
161 return shouldExpandAtomicRMW(AI);
162 if (StoreInst *SI = dyn_cast(Inst))
163 return shouldExpandStore(SI);
164 return false;
165 }
166
167 /// Emit IR to implement the given atomicrmw operation on values in registers,
168 /// returning the new value.
169 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
170 Value *Loaded, Value *Inc) {
171 Value *NewVal;
172 switch (Op) {
173 case AtomicRMWInst::Xchg:
174 return Inc;
175 case AtomicRMWInst::Add:
176 return Builder.CreateAdd(Loaded, Inc, "new");
177 case AtomicRMWInst::Sub:
178 return Builder.CreateSub(Loaded, Inc, "new");
179 case AtomicRMWInst::And:
180 return Builder.CreateAnd(Loaded, Inc, "new");
181 case AtomicRMWInst::Nand:
182 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
183 case AtomicRMWInst::Or:
184 return Builder.CreateOr(Loaded, Inc, "new");
185 case AtomicRMWInst::Xor:
186 return Builder.CreateXor(Loaded, Inc, "new");
187 case AtomicRMWInst::Max:
188 NewVal = Builder.CreateICmpSGT(Loaded, Inc);
189 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
190 case AtomicRMWInst::Min:
191 NewVal = Builder.CreateICmpSLE(Loaded, Inc);
192 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
193 case AtomicRMWInst::UMax:
194 NewVal = Builder.CreateICmpUGT(Loaded, Inc);
195 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
196 case AtomicRMWInst::UMin:
197 NewVal = Builder.CreateICmpULE(Loaded, Inc);
198 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
199 default:
200 break;
201 }
202 llvm_unreachable("Unknown atomic op");
203 }
204
205 bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
206 AtomicOrdering Order =
207 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
208 Value *Addr = AI->getPointerOperand();
209 BasicBlock *BB = AI->getParent();
210 Function *F = BB->getParent();
211 LLVMContext &Ctx = F->getContext();
212
213 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
214 //
215 // The standard expansion we produce is:
216 // [...]
217 // %init_loaded = load atomic iN* %addr
218 // br label %loop
219 // loop:
220 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
221 // %new = some_op iN %loaded, %incr
222 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
223 // %new_loaded = extractvalue { iN, i1 } %pair, 0
224 // %success = extractvalue { iN, i1 } %pair, 1
225 // br i1 %success, label %atomicrmw.end, label %loop
226 // atomicrmw.end:
227 // [...]
228 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
229 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
230
231 // This grabs the DebugLoc from AI.
232 IRBuilder<> Builder(AI);
233
234 // The split call above "helpfully" added a branch at the end of BB (to the
235 // wrong place), but we want a load. It's easiest to just remove
236 // the branch entirely.
237 std::prev(BB->end())->eraseFromParent();
238 Builder.SetInsertPoint(BB);
239 LoadInst *InitLoaded = Builder.CreateLoad(Addr);
240 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
241 Builder.CreateBr(LoopBB);
242
243 // Start the main loop block now that we've taken care of the preliminaries.
244 Builder.SetInsertPoint(LoopBB);
245 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
246 Loaded->addIncoming(InitLoaded, BB);
247
248 Value *NewVal =
249 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
250
251 Value *Pair = Builder.CreateAtomicCmpXchg(
252 Addr, Loaded, NewVal, Order,
253 AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
254 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
255 Loaded->addIncoming(NewLoaded, LoopBB);
256
257 Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
258 Builder.CreateCondBr(Success, ExitBB, LoopBB);
259
260 AI->replaceAllUsesWith(NewLoaded);
261 AI->eraseFromParent();
262
263 return true;
264 }
265
266 bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
267 // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
268 // this in terms of the usual expansion to "atomicrmw xchg".
269 IRBuilder<> Builder(SI);
270 AtomicRMWInst *AI =
271 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
272 SI->getValueOperand(), SI->getOrdering());
273 SI->eraseFromParent();
274
275 // Now we have an appropriate swap instruction, lower it as usual.
276 if (shouldExpandAtomicRMW(AI))
277 return expandAtomicRMW(AI);
278
279 return AI;
280 }
21252125 return getGlobalBaseReg();
21262126
21272127
2128 case X86ISD::ATOMOR64_DAG:
2129 case X86ISD::ATOMXOR64_DAG:
2130 case X86ISD::ATOMADD64_DAG:
2131 case X86ISD::ATOMSUB64_DAG:
2132 case X86ISD::ATOMNAND64_DAG:
2133 case X86ISD::ATOMAND64_DAG:
2134 case X86ISD::ATOMMAX64_DAG:
2135 case X86ISD::ATOMMIN64_DAG:
2136 case X86ISD::ATOMUMAX64_DAG:
2137 case X86ISD::ATOMUMIN64_DAG:
2138 case X86ISD::ATOMSWAP64_DAG: {
2139 unsigned Opc;
2140 switch (Opcode) {
2141 default: llvm_unreachable("Impossible opcode");
2142 case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
2143 case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
2144 case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
2145 case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
2146 case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
2147 case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
2148 case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
2149 case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
2150 case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
2151 case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
2152 case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
2153 }
2154 SDNode *RetVal = SelectAtomic64(Node, Opc);
2155 if (RetVal)
2156 return RetVal;
2157 break;
2158 }
2159
21602128 case ISD::ATOMIC_LOAD_XOR:
21612129 case ISD::ATOMIC_LOAD_AND:
21622130 case ISD::ATOMIC_LOAD_OR:
589589 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
590590 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
591591 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
592 }
593
594 if (!Subtarget->is64Bit()) {
595 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
596 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
597 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
598 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
599 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
600 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
601 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
602 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
603 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
604 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
605 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
606 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
607592 }
608593
609594 if (Subtarget->hasCmpxchg16b()) {
1622716212 Results.push_back(Swap.getValue(2));
1622816213 }
1622916214
16230 static void
16231 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results,
16232 SelectionDAG &DAG, unsigned NewOp) {
16233 SDLoc dl(Node);
16234 assert (Node->getValueType(0) == MVT::i64 &&
16235 "Only know how to expand i64 atomics");
16236
16237 SDValue Chain = Node->getOperand(0);
16238 SDValue In1 = Node->getOperand(1);
16239 SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
16240 Node->getOperand(2), DAG.getIntPtrConstant(0));
16241 SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
16242 Node->getOperand(2), DAG.getIntPtrConstant(1));
16243 SDValue Ops[] = { Chain, In1, In2L, In2H };
16244 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
16245 SDValue Result =
16246 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
16247 cast(Node)->getMemOperand());
16248 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
16249 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
16250 Results.push_back(Result.getValue(2));
16251 }
16252
1625316215 /// ReplaceNodeResults - Replace a node with an illegal result type
1625416216 /// with a new node built out of custom code.
1625516217 void X86TargetLowering::ReplaceNodeResults(SDNode *N,
1639516357 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
1639616358 Results.push_back(Success);
1639716359 Results.push_back(EFLAGS.getValue(1));
16398 return;
16399 }
16400 case ISD::ATOMIC_LOAD_ADD:
16401 case ISD::ATOMIC_LOAD_AND:
16402 case ISD::ATOMIC_LOAD_NAND:
16403 case ISD::ATOMIC_LOAD_OR:
16404 case ISD::ATOMIC_LOAD_SUB:
16405 case ISD::ATOMIC_LOAD_XOR:
16406 case ISD::ATOMIC_LOAD_MAX:
16407 case ISD::ATOMIC_LOAD_MIN:
16408 case ISD::ATOMIC_LOAD_UMAX:
16409 case ISD::ATOMIC_LOAD_UMIN:
16410 case ISD::ATOMIC_SWAP: {
16411 unsigned Opc;
16412 switch (N->getOpcode()) {
16413 default: llvm_unreachable("Unexpected opcode");
16414 case ISD::ATOMIC_LOAD_ADD:
16415 Opc = X86ISD::ATOMADD64_DAG;
16416 break;
16417 case ISD::ATOMIC_LOAD_AND:
16418 Opc = X86ISD::ATOMAND64_DAG;
16419 break;
16420 case ISD::ATOMIC_LOAD_NAND:
16421 Opc = X86ISD::ATOMNAND64_DAG;
16422 break;
16423 case ISD::ATOMIC_LOAD_OR:
16424 Opc = X86ISD::ATOMOR64_DAG;
16425 break;
16426 case ISD::ATOMIC_LOAD_SUB:
16427 Opc = X86ISD::ATOMSUB64_DAG;
16428 break;
16429 case ISD::ATOMIC_LOAD_XOR:
16430 Opc = X86ISD::ATOMXOR64_DAG;
16431 break;
16432 case ISD::ATOMIC_LOAD_MAX:
16433 Opc = X86ISD::ATOMMAX64_DAG;
16434 break;
16435 case ISD::ATOMIC_LOAD_MIN:
16436 Opc = X86ISD::ATOMMIN64_DAG;
16437 break;
16438 case ISD::ATOMIC_LOAD_UMAX:
16439 Opc = X86ISD::ATOMUMAX64_DAG;
16440 break;
16441 case ISD::ATOMIC_LOAD_UMIN:
16442 Opc = X86ISD::ATOMUMIN64_DAG;
16443 break;
16444 case ISD::ATOMIC_SWAP:
16445 Opc = X86ISD::ATOMSWAP64_DAG;
16446 break;
16447 }
16448 ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
1644916360 return;
1645016361 }
1645116362 case ISD::ATOMIC_LOAD: {
1655516466 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
1655616467 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
1655716468 case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
16558 case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
16559 case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
16560 case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
16561 case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
16562 case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
16563 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
1656416469 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
1656516470 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
1656616471 case X86ISD::VZEXT: return "X86ISD::VZEXT";
1695116856 return sinkMBB;
1695216857 }
1695316858
16954 // Get CMPXCHG opcode for the specified data type.
16955 static unsigned getCmpXChgOpcode(EVT VT) {
16956 switch (VT.getSimpleVT().SimpleTy) {
16957 case MVT::i8: return X86::LCMPXCHG8;
16958 case MVT::i16: return X86::LCMPXCHG16;
16959 case MVT::i32: return X86::LCMPXCHG32;
16960 case MVT::i64: return X86::LCMPXCHG64;
16961 default:
16962 break;
16963 }
16964 llvm_unreachable("Invalid operand size!");
16965 }
16966
16967 // Get LOAD opcode for the specified data type.
16968 static unsigned getLoadOpcode(EVT VT) {
16969 switch (VT.getSimpleVT().SimpleTy) {
16970 case MVT::i8: return X86::MOV8rm;
16971 case MVT::i16: return X86::MOV16rm;
16972 case MVT::i32: return X86::MOV32rm;
16973 case MVT::i64: return X86::MOV64rm;
16974 default:
16975 break;
16976 }
16977 llvm_unreachable("Invalid operand size!");
16978 }
16979
16980 // Get opcode of the non-atomic one from the specified atomic instruction.
16981 static unsigned getNonAtomicOpcode(unsigned Opc) {
16982 switch (Opc) {
16983 case X86::ATOMAND8: return X86::AND8rr;
16984 case X86::ATOMAND16: return X86::AND16rr;
16985 case X86::ATOMAND32: return X86::AND32rr;
16986 case X86::ATOMAND64: return X86::AND64rr;
16987 case X86::ATOMOR8: return X86::OR8rr;
16988 case X86::ATOMOR16: return X86::OR16rr;
16989 case X86::ATOMOR32: return X86::OR32rr;
16990 case X86::ATOMOR64: return X86::OR64rr;
16991 case X86::ATOMXOR8: return X86::XOR8rr;
16992 case X86::ATOMXOR16: return X86::XOR16rr;
16993 case X86::ATOMXOR32: return X86::XOR32rr;
16994 case X86::ATOMXOR64: return X86::XOR64rr;
16995 }
16996 llvm_unreachable("Unhandled atomic-load-op opcode!");
16997 }
16998
16999 // Get opcode of the non-atomic one from the specified atomic instruction with
17000 // extra opcode.
17001 static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
17002 unsigned &ExtraOpc) {
17003 switch (Opc) {
17004 case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
17005 case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
17006 case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
17007 case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
17008 case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
17009 case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
17010 case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
17011 case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
17012 case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
17013 case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
17014 case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
17015 case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
17016 case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
17017 case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
17018 case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
17019 case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
17020 case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
17021 case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
17022 case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
17023 case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
17024 }
17025 llvm_unreachable("Unhandled atomic-load-op opcode!");
17026 }
17027
17028 // Get opcode of the non-atomic one from the specified atomic instruction for
17029 // 64-bit data type on 32-bit target.
17030 static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
17031 switch (Opc) {
17032 case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
17033 case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
17034 case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
17035 case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
17036 case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
17037 case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
17038 case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
17039 case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
17040 case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
17041 case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
17042 }
17043 llvm_unreachable("Unhandled atomic-load-op opcode!");
17044 }
17045
17046 // Get opcode of the non-atomic one from the specified atomic instruction for
17047 // 64-bit data type on 32-bit target with extra opcode.
17048 static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
17049 unsigned &HiOpc,
17050 unsigned &ExtraOpc) {
17051 switch (Opc) {
17052 case X86::ATOMNAND6432:
17053 ExtraOpc = X86::NOT32r;
17054 HiOpc = X86::AND32rr;
17055 return X86::AND32rr;
17056 }
17057 llvm_unreachable("Unhandled atomic-load-op opcode!");
17058 }
17059
17060 // Get pseudo CMOV opcode from the specified data type.
17061 static unsigned getPseudoCMOVOpc(EVT VT) {
17062 switch (VT.getSimpleVT().SimpleTy) {
17063 case MVT::i8: return X86::CMOV_GR8;
17064 case MVT::i16: return X86::CMOV_GR16;
17065 case MVT::i32: return X86::CMOV_GR32;
17066 default:
17067 break;
17068 }
17069 llvm_unreachable("Unknown CMOV opcode!");
17070 }
17071
17072 // EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
17073 // They will be translated into a spin-loop or compare-exchange loop from
17074 //
17075 // ...
17076 // dst = atomic-fetch-op MI.addr, MI.val
17077 // ...
17078 //
17079 // to
17080 //
17081 // ...
17082 // t1 = LOAD MI.addr
17083 // loop:
17084 // t4 = phi(t1, t3 / loop)
17085 // t2 = OP MI.val, t4
17086 // EAX = t4
17087 // LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
17088 // t3 = EAX
17089 // JNE loop
17090 // sink:
17091 // dst = t3
17092 // ...
17093 MachineBasicBlock *
17094 X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
17095 MachineBasicBlock *MBB) const {
17096 MachineFunction *MF = MBB->getParent();
17097 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
17098 DebugLoc DL = MI->getDebugLoc();
17099
17100 MachineRegisterInfo &MRI = MF->getRegInfo();
17101
17102 const BasicBlock *BB = MBB->getBasicBlock();
17103 MachineFunction::iterator I = MBB;
17104 ++I;
17105
17106 assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
17107 "Unexpected number of operands");
17108
17109 assert(MI->hasOneMemOperand() &&
17110 "Expected atomic-load-op to have one memoperand");
17111
17112 // Memory Reference
17113 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
17114 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
17115
17116 unsigned DstReg, SrcReg;
17117 unsigned MemOpndSlot;
17118
17119 unsigned CurOp = 0;
17120
17121 DstReg = MI->getOperand(CurOp++).getReg();
17122 MemOpndSlot = CurOp;
17123 CurOp += X86::AddrNumOperands;
17124 SrcReg = MI->getOperand(CurOp++).getReg();
17125
17126 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
17127 MVT::SimpleValueType VT = *RC->vt_begin();
17128 unsigned t1 = MRI.createVirtualRegister(RC);
17129 unsigned t2 = MRI.createVirtualRegister(RC);
17130 unsigned t3 = MRI.createVirtualRegister(RC);
17131 unsigned t4 = MRI.createVirtualRegister(RC);
17132 unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
17133
17134 unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
17135 unsigned LOADOpc = getLoadOpcode(VT);
17136
17137 // For the atomic load-arith operator, we generate
17138 //
17139 // thisMBB:
17140 // t1 = LOAD [MI.addr]
17141 // mainMBB:
17142 // t4 = phi(t1 / thisMBB, t3 / mainMBB)
17143 // t1 = OP MI.val, EAX
17144 // EAX = t4
17145 // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
17146 // t3 = EAX
17147 // JNE mainMBB
17148 // sinkMBB:
17149 // dst = t3
17150
17151 MachineBasicBlock *thisMBB = MBB;
17152 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
17153 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
17154 MF->insert(I, mainMBB);
17155 MF->insert(I, sinkMBB);
17156
17157 MachineInstrBuilder MIB;
17158
17159 // Transfer the remainder of BB and its successor edges to sinkMBB.
17160 sinkMBB->splice(sinkMBB->begin(), MBB,
17161 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
17162 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
17163
17164 // thisMBB:
17165 MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
17166 for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
17167 MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
17168 if (NewMO.isReg())
17169 NewMO.setIsKill(false);
17170 MIB.addOperand(NewMO);
17171 }
17172 for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
17173 unsigned flags = (*MMOI)->getFlags();
17174 flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
17175 MachineMemOperand *MMO =
17176 MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
17177 (*MMOI)->getSize(),
17178 (*MMOI)->getBaseAlignment(),
17179 (*MMOI)->getTBAAInfo(),
17180 (*MMOI)->getRanges());
17181 MIB.addMemOperand(MMO);
17182 }
17183
17184 thisMBB->addSuccessor(mainMBB);
17185
17186 // mainMBB:
17187 MachineBasicBlock *origMainMBB = mainMBB;
17188
17189 // Add a PHI.
17190 MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
17191 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
17192
17193 unsigned Opc = MI->getOpcode();
17194 switch (Opc) {
17195 default:
17196 llvm_unreachable("Unhandled atomic-load-op opcode!");
17197 case X86::ATOMAND8:
17198 case X86::ATOMAND16:
17199 case X86::ATOMAND32:
17200 case X86::ATOMAND64:
17201 case X86::ATOMOR8:
17202 case X86::ATOMOR16:
17203 case X86::ATOMOR32:
17204 case X86::ATOMOR64:
17205 case X86::ATOMXOR8:
17206 case X86::ATOMXOR16:
17207 case X86::ATOMXOR32:
17208 case X86::ATOMXOR64: {
17209 unsigned ARITHOpc = getNonAtomicOpcode(Opc);
17210 BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
17211 .addReg(t4);
17212 break;
17213 }
17214 case X86::ATOMNAND8:
17215 case X86::ATOMNAND16:
17216 case X86::ATOMNAND32:
17217 case X86::ATOMNAND64: {
17218 unsigned Tmp = MRI.createVirtualRegister(RC);
17219 unsigned NOTOpc;
17220 unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
17221 BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
17222 .addReg(t4);
17223 BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
17224 break;
17225 }
17226 case X86::ATOMMAX8:
17227 case X86::ATOMMAX16:
17228 case X86::ATOMMAX32:
17229 case X86::ATOMMAX64:
17230 case X86::ATOMMIN8:
17231 case X86::ATOMMIN16:
17232 case X86::ATOMMIN32:
17233 case X86::ATOMMIN64:
17234 case X86::ATOMUMAX8:
17235 case X86::ATOMUMAX16:
17236 case X86::ATOMUMAX32:
17237 case X86::ATOMUMAX64:
17238 case X86::ATOMUMIN8:
17239 case X86::ATOMUMIN16:
17240 case X86::ATOMUMIN32:
17241 case X86::ATOMUMIN64: {
17242 unsigned CMPOpc;
17243 unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
17244
17245 BuildMI(mainMBB, DL, TII->get(CMPOpc))
17246 .addReg(SrcReg)
17247 .addReg(t4);
17248
17249 if (Subtarget->hasCMov()) {
17250 if (VT != MVT::i8) {
17251 // Native support
17252 BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
17253 .addReg(SrcReg)
17254 .addReg(t4);
17255 } else {
17256 // Promote i8 to i32 to use CMOV32
17257 const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
17258 const TargetRegisterClass *RC32 =
17259 TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
17260 unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
17261 unsigned AccReg32 = MRI.createVirtualRegister(RC32);
17262 unsigned Tmp = MRI.createVirtualRegister(RC32);
17263
17264 unsigned Undef = MRI.createVirtualRegister(RC32);
17265 BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
17266
17267 BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
17268 .addReg(Undef)
17269 .addReg(SrcReg)
17270 .addImm(X86::sub_8bit);
17271 BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
17272 .addReg(Undef)
17273 .addReg(t4)
17274 .addImm(X86::sub_8bit);
17275
17276 BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
17277 .addReg(SrcReg32)
17278 .addReg(AccReg32);
17279
17280 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
17281 .addReg(Tmp, 0, X86::sub_8bit);
17282 }
17283 } else {
17284 // Use pseudo select and lower them.
17285 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
17286 "Invalid atomic-load-op transformation!");
17287 unsigned SelOpc = getPseudoCMOVOpc(VT);
17288 X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
17289 assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
17290 MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
17291 .addReg(SrcReg).addReg(t4)
17292 .addImm(CC);
17293 mainMBB = EmitLoweredSelect(MIB, mainMBB);
17294 // Replace the original PHI node as mainMBB is changed after CMOV
17295 // lowering.
17296 BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
17297 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
17298 Phi->eraseFromParent();
17299 }
17300 break;
17301 }
17302 }
17303
17304 // Copy PhyReg back from virtual register.
17305 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
17306 .addReg(t4);
17307
17308 MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
17309 for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
17310 MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
17311 if (NewMO.isReg())
17312 NewMO.setIsKill(false);
17313 MIB.addOperand(NewMO);
17314 }
17315 MIB.addReg(t2);
17316 MIB.setMemRefs(MMOBegin, MMOEnd);
17317
17318 // Copy PhyReg back to virtual register.
17319 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
17320 .addReg(PhyReg);
17321
17322 BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
17323
17324 mainMBB->addSuccessor(origMainMBB);
17325 mainMBB->addSuccessor(sinkMBB);
17326
17327 // sinkMBB:
17328 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
17329 TII->get(TargetOpcode::COPY), DstReg)
17330 .addReg(t3);
17331
17332 MI->eraseFromParent();
17333 return sinkMBB;
17334 }
17335
17336 // EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
17337 // instructions. They will be translated into a spin-loop or compare-exchange
17338 // loop from
17339 //
17340 // ...
17341 // dst = atomic-fetch-op MI.addr, MI.val
17342 // ...
17343 //
17344 // to
17345 //
17346 // ...
17347 // t1L = LOAD [MI.addr + 0]
17348 // t1H = LOAD [MI.addr + 4]
17349 // loop:
17350 // t4L = phi(t1L, t3L / loop)
17351 // t4H = phi(t1H, t3H / loop)
17352 // t2L = OP MI.val.lo, t4L
17353 // t2H = OP MI.val.hi, t4H
17354 // EAX = t4L
17355 // EDX = t4H
17356 // EBX = t2L
17357 // ECX = t2H
17358 // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
17359 // t3L = EAX
17360 // t3H = EDX
17361 // JNE loop
17362 // sink:
17363 // dstL = t3L
17364 // dstH = t3H
17365 // ...
17366 MachineBasicBlock *
17367 X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
17368 MachineBasicBlock *MBB) const {
17369 MachineFunction *MF = MBB->getParent();
17370 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
17371 DebugLoc DL = MI->getDebugLoc();
17372
17373 MachineRegisterInfo &MRI = MF->getRegInfo();
17374
17375 const BasicBlock *BB = MBB->getBasicBlock();
17376 MachineFunction::iterator I = MBB;
17377 ++I;
17378
17379 assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
17380 "Unexpected number of operands");
17381
17382 assert(MI->hasOneMemOperand() &&
17383 "Expected atomic-load-op32 to have one memoperand");
17384
17385 // Memory Reference
17386 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
17387 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
17388
17389 unsigned DstLoReg, DstHiReg;
17390 unsigned SrcLoReg, SrcHiReg;
17391 unsigned MemOpndSlot;
17392
17393 unsigned CurOp = 0;
17394
17395 DstLoReg = MI->getOperand(CurOp++).getReg();
17396 DstHiReg = MI->getOperand(CurOp++).getReg();
17397 MemOpndSlot = CurOp;
17398 CurOp += X86::AddrNumOperands;
17399 SrcLoReg = MI->getOperand(CurOp++).getReg();
17400 SrcHiReg = MI->getOperand(CurOp++).getReg();
17401
17402 const TargetRegisterClass *RC = &X86::GR32RegClass;
17403 const TargetRegisterClass *RC8 = &X86::GR8RegClass;
17404
17405 unsigned t1L = MRI.createVirtualRegister(RC);
17406 unsigned t1H = MRI.createVirtualRegister(RC);
17407 unsigned t2L = MRI.createVirtualRegister(RC);
17408 unsigned t2H = MRI.createVirtualRegister(RC);
17409 unsigned t3L = MRI.createVirtualRegister(RC);
17410 unsigned t3H = MRI.createVirtualRegister(RC);
17411 unsigned t4L = MRI.createVirtualRegister(RC);
17412 unsigned t4H = MRI.createVirtualRegister(RC);
17413
17414 unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
17415 unsigned LOADOpc = X86::MOV32rm;
17416
17417 // For the atomic load-arith operator, we generate
17418 //
17419 // thisMBB:
17420 // t1L = LOAD [MI.addr + 0]
17421 // t1H = LOAD [MI.addr + 4]
17422 // mainMBB:
17423 // t4L = phi(t1L / thisMBB, t3L / mainMBB)
17424 // t4H = phi(t1H / thisMBB, t3H / mainMBB)
17425 // t2L = OP MI.val.lo, t4L
17426 // t2H = OP MI.val.hi, t4H
17427 // EBX = t2L
17428 // ECX = t2H
17429 // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
17430 // t3L = EAX
17431 // t3H = EDX
17432 // JNE loop
17433 // sinkMBB:
17434 // dstL = t3L
17435 // dstH = t3H
17436
17437 MachineBasicBlock *thisMBB = MBB;
17438 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
17439 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
17440 MF->insert(I, mainMBB);
17441 MF->insert(I, sinkMBB);
17442
17443 MachineInstrBuilder MIB;
17444
17445 // Transfer the remainder of BB and its successor edges to sinkMBB.
17446 sinkMBB->splice(sinkMBB->begin(), MBB,
17447 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
17448 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
17449
17450 // thisMBB:
17451 // Lo
17452 MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
17453 for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
17454 MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
17455 if (NewMO.isReg())
17456 NewMO.setIsKill(false);
17457 MIB.addOperand(NewMO);
17458 }
17459 for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
17460 unsigned flags = (*MMOI)->getFlags();
17461 flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
17462 MachineMemOperand *MMO =
17463 MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
17464 (*MMOI)->getSize(),
17465 (*MMOI)->getBaseAlignment(),
17466 (*MMOI)->getTBAAInfo(),
17467 (*MMOI)->getRanges());
17468 MIB.addMemOperand(MMO);
17469 };
17470 MachineInstr *LowMI = MIB;
17471
17472 // Hi
17473 MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
17474 for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
17475 if (i == X86::AddrDisp) {
17476 MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
17477 } else {
17478 MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
17479 if (NewMO.isReg())
17480 NewMO.setIsKill(false);
17481 MIB.addOperand(NewMO);
17482 }
17483 }
17484 MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
17485
17486 thisMBB->addSuccessor(mainMBB);
17487
17488 // mainMBB:
17489 MachineBasicBlock *origMainMBB = mainMBB;
17490
17491 // Add PHIs.
17492 MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
17493 .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
17494 MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
17495 .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
17496
17497 unsigned Opc = MI->getOpcode();
17498 switch (Opc) {
17499 default:
17500 llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
17501 case X86::ATOMAND6432:
17502 case X86::ATOMOR6432:
17503 case X86::ATOMXOR6432:
17504 case X86::ATOMADD6432:
17505 case X86::ATOMSUB6432: {
17506 unsigned HiOpc;
17507 unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
17508 BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
17509 .addReg(SrcLoReg);
17510 BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
17511 .addReg(SrcHiReg);
17512 break;
17513 }
17514 case X86::ATOMNAND6432: {
17515 unsigned HiOpc, NOTOpc;
17516 unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
17517 unsigned TmpL = MRI.createVirtualRegister(RC);
17518 unsigned TmpH = MRI.createVirtualRegister(RC);
17519 BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
17520 .addReg(t4L);
17521 BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
17522 .addReg(t4H);
17523 BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
17524 BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
17525 break;
17526 }
17527 case X86::ATOMMAX6432:
17528 case X86::ATOMMIN6432:
17529 case X86::ATOMUMAX6432:
17530 case X86::ATOMUMIN6432: {
17531 unsigned HiOpc;
17532 unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
17533 unsigned cL = MRI.createVirtualRegister(RC8);
17534 unsigned cH = MRI.createVirtualRegister(RC8);
17535 unsigned cL32 = MRI.createVirtualRegister(RC);
17536 unsigned cH32 = MRI.createVirtualRegister(RC);
17537 unsigned cc = MRI.createVirtualRegister(RC);
17538 // cl := cmp src_lo, lo
17539 BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
17540 .addReg(SrcLoReg).addReg(t4L);
17541 BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
17542 BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
17543 // ch := cmp src_hi, hi
17544 BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
17545 .addReg(SrcHiReg).addReg(t4H);
17546 BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
17547 BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
17548 // cc := if (src_hi == hi) ? cl : ch;
17549 if (Subtarget->hasCMov()) {
17550 BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
17551 .addReg(cH32).addReg(cL32);
17552 } else {
17553 MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
17554 .addReg(cH32).addReg(cL32)
17555 .addImm(X86::COND_E);
17556 mainMBB = EmitLoweredSelect(MIB, mainMBB);
17557 }
17558 BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
17559 if (Subtarget->hasCMov()) {
17560 BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
17561 .addReg(SrcLoReg).addReg(t4L);
17562 BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
17563 .addReg(SrcHiReg).addReg(t4H);
17564 } else {
17565 MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
17566 .addReg(SrcLoReg).addReg(t4L)
17567 .addImm(X86::COND_NE);
17568 mainMBB = EmitLoweredSelect(MIB, mainMBB);
17569 // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
17570 // 2nd CMOV lowering.
17571 mainMBB->addLiveIn(X86::EFLAGS);
17572 MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
17573 .addReg(SrcHiReg).addReg(t4H)
17574 .addImm(X86::COND_NE);
17575 mainMBB = EmitLoweredSelect(MIB, mainMBB);
17576 // Replace the original PHI node as mainMBB is changed after CMOV
17577 // lowering.
17578 BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
17579 .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
17580 BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
17581 .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
17582 PhiL->eraseFromParent();
17583 PhiH->eraseFromParent();
17584 }
17585 break;
17586 }
17587 case X86::ATOMSWAP6432: {
17588 unsigned HiOpc;
17589 unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
17590 BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
17591 BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
17592 break;
17593 }
17594 }
17595
17596 // Copy EDX:EAX back from HiReg:LoReg
17597 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
17598 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
17599 // Copy ECX:EBX from t1H:t1L
17600 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
17601 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
17602
17603 MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
17604 for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
17605 MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
17606 if (NewMO.isReg())
17607 NewMO.setIsKill(false);
17608 MIB.addOperand(NewMO);
17609 }
17610 MIB.setMemRefs(MMOBegin, MMOEnd);
17611
17612 // Copy EDX:EAX back to t3H:t3L
17613 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
17614 BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
17615
17616 BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
17617
17618 mainMBB->addSuccessor(origMainMBB);
17619 mainMBB->addSuccessor(sinkMBB);
17620
17621 // sinkMBB:
17622 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
17623 TII->get(TargetOpcode::COPY), DstLoReg)
17624 .addReg(t3L);
17625 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
17626 TII->get(TargetOpcode::COPY), DstHiReg)
17627 .addReg(t3H);
17628
17629 MI->eraseFromParent();
17630 return sinkMBB;
17631 }
17632
1763316859 // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
1763416860 // or XMM0_V32I8 in AVX all of this code can be replaced with that
1763516861 // in the .td file.
1883818064 // xbegin
1883918065 case X86::XBEGIN:
1884018066 return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
18841
18842 // Atomic Lowering.
18843 case X86::ATOMAND8:
18844 case X86::ATOMAND16:
18845 case X86::ATOMAND32:
18846 case X86::ATOMAND64:
18847 // Fall through
18848 case X86::ATOMOR8:
18849 case X86::ATOMOR16:
18850 case X86::ATOMOR32:
18851 case X86::ATOMOR64:
18852 // Fall through
18853 case X86::ATOMXOR16:
18854 case X86::ATOMXOR8:
18855 case X86::ATOMXOR32:
18856 case X86::ATOMXOR64:
18857 // Fall through
18858 case X86::ATOMNAND8:
18859 case X86::ATOMNAND16:
18860 case X86::ATOMNAND32:
18861 case X86::ATOMNAND64:
18862 // Fall through
18863 case X86::ATOMMAX8:
18864 case X86::ATOMMAX16:
18865 case X86::ATOMMAX32:
18866 case X86::ATOMMAX64:
18867 // Fall through
18868 case X86::ATOMMIN8:
18869 case X86::ATOMMIN16:
18870 case X86::ATOMMIN32:
18871 case X86::ATOMMIN64:
18872 // Fall through
18873 case X86::ATOMUMAX8:
18874 case X86::ATOMUMAX16:
18875 case X86::ATOMUMAX32:
18876 case X86::ATOMUMAX64:
18877 // Fall through
18878 case X86::ATOMUMIN8:
18879 case X86::ATOMUMIN16:
18880 case X86::ATOMUMIN32:
18881 case X86::ATOMUMIN64:
18882 return EmitAtomicLoadArith(MI, BB);
18883
18884 // This group does 64-bit operations on a 32-bit host.
18885 case X86::ATOMAND6432:
18886 case X86::ATOMOR6432:
18887 case X86::ATOMXOR6432:
18888 case X86::ATOMNAND6432:
18889 case X86::ATOMADD6432:
18890 case X86::ATOMSUB6432:
18891 case X86::ATOMMAX6432:
18892 case X86::ATOMMIN6432:
18893 case X86::ATOMUMAX6432:
18894 case X86::ATOMUMIN6432:
18895 case X86::ATOMSWAP6432:
18896 return EmitAtomicLoadArith6432(MI, BB);
1889718067
1889818068 case X86::VASTART_SAVE_XMM_REGS:
1889918069 return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
404404 // XTEST - Test if in transactional execution.
405405 XTEST,
406406
407 // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
408 // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
409 // Atomic 64-bit binary operations.
410 ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
411 ATOMSUB64_DAG,
412 ATOMOR64_DAG,
413 ATOMXOR64_DAG,
414 ATOMAND64_DAG,
415 ATOMNAND64_DAG,
416 ATOMMAX64_DAG,
417 ATOMMIN64_DAG,
418 ATOMUMAX64_DAG,
419 ATOMUMIN64_DAG,
420 ATOMSWAP64_DAG,
421
422407 // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
423 LCMPXCHG_DAG,
408 LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
424409 LCMPXCHG8_DAG,
425410 LCMPXCHG16_DAG,
426411
519519 EFLAGS))]>;
520520 } // UsesCustomInserter = 1, Uses = [EFLAGS]
521521
522
523 //===----------------------------------------------------------------------===//
524 // Atomic Instruction Pseudo Instructions
525 //===----------------------------------------------------------------------===//
526
527 // Pseudo atomic instructions
528
529 multiclass PSEUDO_ATOMIC_LOAD_BINOP {
530 let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
531 let Defs = [EFLAGS, AL] in
532 def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
533 (ins i8mem:$ptr, GR8:$val),
534 !strconcat(mnemonic, "8 PSEUDO!"), []>;
535 let Defs = [EFLAGS, AX] in
536 def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
537 (ins i16mem:$ptr, GR16:$val),
538 !strconcat(mnemonic, "16 PSEUDO!"), []>;
539 let Defs = [EFLAGS, EAX] in
540 def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
541 (ins i32mem:$ptr, GR32:$val),
542 !strconcat(mnemonic, "32 PSEUDO!"), []>;
543 let Defs = [EFLAGS, RAX] in
544 def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
545 (ins i64mem:$ptr, GR64:$val),
546 !strconcat(mnemonic, "64 PSEUDO!"), []>;
547 }
548 }
549
550 multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS {
551 def : Pat<(!cast(frag # "_8") addr:$ptr, GR8:$val),
552 (!cast(name # "8") addr:$ptr, GR8:$val)>;
553 def : Pat<(!cast(frag # "_16") addr:$ptr, GR16:$val),
554 (!cast(name # "16") addr:$ptr, GR16:$val)>;
555 def : Pat<(!cast(frag # "_32") addr:$ptr, GR32:$val),
556 (!cast(name # "32") addr:$ptr, GR32:$val)>;
557 def : Pat<(!cast(frag # "_64") addr:$ptr, GR64:$val),
558 (!cast(name # "64") addr:$ptr, GR64:$val)>;
559 }
560
561 // Atomic exchange, and, or, xor
562 defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
563 defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
564 defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
565 defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
566 defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
567 defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
568 defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
569 defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
570
571 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
572 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
573 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
574 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
575 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
576 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
577 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
578 defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
579
580 multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 {
581 let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
582 mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
583 def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
584 (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
585 !strconcat(mnemonic, "6432 PSEUDO!"), []>;
586 }
587
588 defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
589 defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
590 defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
591 defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
592 defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
593 defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
594 defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
595 defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
596 defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
597 defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
598 defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
599522
600523 //===----------------------------------------------------------------------===//
601524 // Normal-Instructions-With-Lock-Prefix Pseudo Instructions
154154 [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
155155 SDNPMayLoad, SDNPMemOperand]>;
156156
157 def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
158 [SDNPHasChain, SDNPMayStore,
159 SDNPMayLoad, SDNPMemOperand]>;
160 def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
161 [SDNPHasChain, SDNPMayStore,
162 SDNPMayLoad, SDNPMemOperand]>;
163 def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
164 [SDNPHasChain, SDNPMayStore,
165 SDNPMayLoad, SDNPMemOperand]>;
166 def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
167 [SDNPHasChain, SDNPMayStore,
168 SDNPMayLoad, SDNPMemOperand]>;
169 def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
170 [SDNPHasChain, SDNPMayStore,
171 SDNPMayLoad, SDNPMemOperand]>;
172 def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
173 [SDNPHasChain, SDNPMayStore,
174 SDNPMayLoad, SDNPMemOperand]>;
175 def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
176 [SDNPHasChain, SDNPMayStore,
177 SDNPMayLoad, SDNPMemOperand]>;
178157 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
179158 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
180159
110110 return *getX86TargetMachine().getSubtargetImpl();
111111 }
112112
113 void addIRPasses() override;
113114 bool addInstSelector() override;
114115 bool addILPOpts() override;
115116 bool addPreRegAlloc() override;
120121
121122 TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
122123 return new X86PassConfig(this, PM);
124 }
125
126 void X86PassConfig::addIRPasses() {
127 addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
128
129 TargetPassConfig::addIRPasses();
123130 }
124131
125132 bool X86PassConfig::addInstSelector() {
1010 ; CHECK: movl 4([[REG]]), %edx
1111 ; CHECK: LBB0_1:
1212 ; CHECK: movl %eax, %ebx
13 ; CHECK: addl {{%[a-z]+}}, %ebx
13 ; CHECK: addl $1, %ebx
1414 ; CHECK: movl %edx, %ecx
15 ; CHECK: adcl {{%[a-z]+}}, %ecx
15 ; CHECK: adcl $0, %ecx
1616 ; CHECK: lock
1717 ; CHECK-NEXT: cmpxchg8b ([[REG]])
1818 ; CHECK-NEXT: jne
0 ; RUN: llc < %s -march=x86-64 > %t.x86-64
1 ; RUN: llc < %s -march=x86 > %t.x86
1 ; RUN: llc < %s -march=x86 -mattr=cx16 > %t.x86
22 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
33 target triple = "x86_64-apple-darwin8"
44
None ; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s
0 ; RUN: llc < %s -mcpu=corei7 -march=x86 -verify-machineinstrs | FileCheck %s
11
22 ; 64-bit load/store on x86-32
33 ; FIXME: The generated code can be substantially improved.
None ; RUN: llc -march=x86 -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
1 ; RUN: llc -march=x86 -mattr=-cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=NOCMOV
2 ; RUN: llc -march=x86 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
0 ; RUN: llc -march=x86 -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
1 ; RUN: llc -march=x86 -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
32
43 @sc64 = external global i64
54
87 %1 = atomicrmw max i64* @sc64, i64 5 acquire
98 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
109 ; LINUX: cmpl
11 ; LINUX: setl
12 ; LINUX: cmpl
13 ; LINUX: setl
10 ; LINUX: seta
1411 ; LINUX: cmovne
1512 ; LINUX: cmovne
1613 ; LINUX: lock
1714 ; LINUX-NEXT: cmpxchg8b
1815 ; LINUX: jne [[LABEL]]
19 ; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
20 ; NOCMOV: cmpl
21 ; NOCMOV: setl
22 ; NOCMOV: cmpl
23 ; NOCMOV: setl
24 ; NOCMOV: jne
25 ; NOCMOV: jne
26 ; NOCMOV: lock
27 ; NOCMOV-NEXT: cmpxchg8b
28 ; NOCMOV: jne [[LABEL]]
2916 %2 = atomicrmw min i64* @sc64, i64 6 acquire
3017 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
31 ; LINUX: cmpl
32 ; LINUX: setg
33 ; LINUX: cmpl
34 ; LINUX: setg
35 ; LINUX: cmovne
36 ; LINUX: cmovne
37 ; LINUX: lock
38 ; LINUX-NEXT: cmpxchg8b
39 ; LINUX: jne [[LABEL]]
40 ; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
41 ; NOCMOV: cmpl
42 ; NOCMOV: setg
43 ; NOCMOV: cmpl
44 ; NOCMOV: setg
45 ; NOCMOV: jne
46 ; NOCMOV: jne
47 ; NOCMOV: lock
48 ; NOCMOV-NEXT: cmpxchg8b
49 ; NOCMOV: jne [[LABEL]]
50 %3 = atomicrmw umax i64* @sc64, i64 7 acquire
51 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
52 ; LINUX: cmpl
53 ; LINUX: setb
5418 ; LINUX: cmpl
5519 ; LINUX: setb
5620 ; LINUX: cmovne
5822 ; LINUX: lock
5923 ; LINUX-NEXT: cmpxchg8b
6024 ; LINUX: jne [[LABEL]]
61 ; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
62 ; NOCMOV: cmpl
63 ; NOCMOV: setb
64 ; NOCMOV: cmpl
65 ; NOCMOV: setb
66 ; NOCMOV: jne
67 ; NOCMOV: jne
68 ; NOCMOV: lock
69 ; NOCMOV-NEXT: cmpxchg8b
70 ; NOCMOV: jne [[LABEL]]
71 %4 = atomicrmw umin i64* @sc64, i64 8 acquire
25 %3 = atomicrmw umax i64* @sc64, i64 7 acquire
7226 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
73 ; LINUX: cmpl
74 ; LINUX: seta
7527 ; LINUX: cmpl
7628 ; LINUX: seta
7729 ; LINUX: cmovne
7931 ; LINUX: lock
8032 ; LINUX-NEXT: cmpxchg8b
8133 ; LINUX: jne [[LABEL]]
82 ; NOCMOV: [[LABEL:.LBB[0-9]+_[0-9]+]]
83 ; NOCMOV: cmpl
84 ; NOCMOV: seta
85 ; NOCMOV: cmpl
86 ; NOCMOV: seta
87 ; NOCMOV: jne
88 ; NOCMOV: jne
89 ; NOCMOV: lock
90 ; NOCMOV-NEXT: cmpxchg8b
91 ; NOCMOV: jne [[LABEL]]
34 %4 = atomicrmw umin i64* @sc64, i64 8 acquire
35 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
36 ; LINUX: cmpl
37 ; LINUX: setb
38 ; LINUX: cmovne
39 ; LINUX: cmovne
40 ; LINUX: lock
41 ; LINUX-NEXT: cmpxchg8b
42 ; LINUX: jne [[LABEL]]
9243 ret void
9344 }
9445
9748
9849 define void @tf_bug(i8* %ptr) nounwind {
9950 ; PIC-LABEL: tf_bug:
100 ; PIC: movl _id-L1$pb(
101 ; PIC: movl (_id-L1$pb)+4(
51 ; PIC-DAG: movl _id-L1$pb(
52 ; PIC-DAG: movl (_id-L1$pb)+4(
10253 %tmp1 = atomicrmw add i64* @id, i64 1 seq_cst
10354 %tmp2 = add i64 %tmp1, 1
10455 %tmp3 = bitcast i8* %ptr to i64*
0 ; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s
1
2 @var = global i128 0
3
4 define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
5 ; CHECK-LABEL: val_compare_and_swap:
6 ; CHECK: movq %rsi, %rax
7 ; CHECK: movq %rcx, %rbx
8 ; CHECK: movq %r8, %rcx
9 ; CHECK: lock
10 ; CHECK: cmpxchg16b (%rdi)
11
12 %pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
13 %val = extractvalue { i128, i1 } %pair, 0
14 ret i128 %val
15 }
16
17 define void @fetch_and_nand(i128* %p, i128 %bits) {
18 ; CHECK-LABEL: fetch_and_nand:
19 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
20 ; CHECK-DAG: movq (%rdi), %rax
21 ; CHECK-DAG: movq 8(%rdi), %rdx
22
23 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
24 ; CHECK: movq %rdx, %rcx
25 ; CHECK: andq [[INCHI]], %rcx
26 ; CHECK: movq %rax, %rbx
27 ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
28 ; CHECK: andq %rsi, %rbx
29 ; CHECK: notq %rbx
30 ; CHECK: notq %rcx
31 ; CHECK: lock
32 ; CHECK: cmpxchg16b (%rdi)
33 ; CHECK: jne [[LOOP]]
34
35 ; CHECK: movq %rax, _var
36 ; CHECK: movq %rdx, _var+8
37 %val = atomicrmw nand i128* %p, i128 %bits release
38 store i128 %val, i128* @var, align 16
39 ret void
40 }
41
42 define void @fetch_and_or(i128* %p, i128 %bits) {
43 ; CHECK-LABEL: fetch_and_or:
44 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
45 ; CHECK-DAG: movq (%rdi), %rax
46 ; CHECK-DAG: movq 8(%rdi), %rdx
47
48 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
49 ; CHECK: movq %rax, %rbx
50 ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
51 ; CHECK: orq %rsi, %rbx
52 ; CHECK: movq %rdx, %rcx
53 ; CHECK: orq [[INCHI]], %rcx
54 ; CHECK: lock
55 ; CHECK: cmpxchg16b (%rdi)
56 ; CHECK: jne [[LOOP]]
57
58 ; CHECK: movq %rax, _var
59 ; CHECK: movq %rdx, _var+8
60
61 %val = atomicrmw or i128* %p, i128 %bits seq_cst
62 store i128 %val, i128* @var, align 16
63 ret void
64 }
65
66 define void @fetch_and_add(i128* %p, i128 %bits) {
67 ; CHECK-LABEL: fetch_and_add:
68 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
69 ; CHECK-DAG: movq (%rdi), %rax
70 ; CHECK-DAG: movq 8(%rdi), %rdx
71
72 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
73 ; CHECK: movq %rax, %rbx
74 ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
75 ; CHECK: addq %rsi, %rbx
76 ; CHECK: movq %rdx, %rcx
77 ; CHECK: adcq [[INCHI]], %rcx
78 ; CHECK: lock
79 ; CHECK: cmpxchg16b (%rdi)
80 ; CHECK: jne [[LOOP]]
81
82 ; CHECK: movq %rax, _var
83 ; CHECK: movq %rdx, _var+8
84
85 %val = atomicrmw add i128* %p, i128 %bits seq_cst
86 store i128 %val, i128* @var, align 16
87 ret void
88 }
89
90 define void @fetch_and_sub(i128* %p, i128 %bits) {
91 ; CHECK-LABEL: fetch_and_sub:
92 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
93 ; CHECK-DAG: movq (%rdi), %rax
94 ; CHECK-DAG: movq 8(%rdi), %rdx
95
96 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
97 ; CHECK: movq %rax, %rbx
98 ; INCLO equivalent comes in in %rsi, so it makes sense it stays there.
99 ; CHECK: subq %rsi, %rbx
100 ; CHECK: movq %rdx, %rcx
101 ; CHECK: sbbq [[INCHI]], %rcx
102 ; CHECK: lock
103 ; CHECK: cmpxchg16b (%rdi)
104 ; CHECK: jne [[LOOP]]
105
106 ; CHECK: movq %rax, _var
107 ; CHECK: movq %rdx, _var+8
108
109 %val = atomicrmw sub i128* %p, i128 %bits seq_cst
110 store i128 %val, i128* @var, align 16
111 ret void
112 }
113
114 define void @fetch_and_min(i128* %p, i128 %bits) {
115 ; CHECK-LABEL: fetch_and_min:
116 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
117 ; CHECK-DAG: movq (%rdi), %rax
118 ; CHECK-DAG: movq 8(%rdi), %rdx
119
120 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
121 ; CHECK: cmpq %rsi, %rax
122 ; CHECK: setbe [[CMP:%[a-z0-9]+]]
123 ; CHECK: cmpq [[INCHI]], %rdx
124 ; CHECK: setle [[HICMP:%[a-z0-9]+]]
125 ; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
126
127 ; CHECK: movb [[HICMP]], [[CMP]]
128 ; CHECK: [[USE_LO]]:
129 ; CHECK: testb [[CMP]], [[CMP]]
130 ; CHECK: movq %rsi, %rbx
131 ; CHECK: cmovneq %rax, %rbx
132 ; CHECK: movq [[INCHI]], %rcx
133 ; CHECK: cmovneq %rdx, %rcx
134 ; CHECK: lock
135 ; CHECK: cmpxchg16b (%rdi)
136 ; CHECK: jne [[LOOP]]
137
138 ; CHECK: movq %rax, _var
139 ; CHECK: movq %rdx, _var+8
140
141 %val = atomicrmw min i128* %p, i128 %bits seq_cst
142 store i128 %val, i128* @var, align 16
143 ret void
144 }
145
146 define void @fetch_and_max(i128* %p, i128 %bits) {
147 ; CHECK-LABEL: fetch_and_max:
148 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
149 ; CHECK-DAG: movq (%rdi), %rax
150 ; CHECK-DAG: movq 8(%rdi), %rdx
151
152 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
153 ; CHECK: cmpq %rsi, %rax
154 ; CHECK: setae [[CMP:%[a-z0-9]+]]
155 ; CHECK: cmpq [[INCHI]], %rdx
156 ; CHECK: setge [[HICMP:%[a-z0-9]+]]
157 ; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
158
159 ; CHECK: movb [[HICMP]], [[CMP]]
160 ; CHECK: [[USE_LO]]:
161 ; CHECK: testb [[CMP]], [[CMP]]
162 ; CHECK: movq %rsi, %rbx
163 ; CHECK: cmovneq %rax, %rbx
164 ; CHECK: movq [[INCHI]], %rcx
165 ; CHECK: cmovneq %rdx, %rcx
166 ; CHECK: lock
167 ; CHECK: cmpxchg16b (%rdi)
168 ; CHECK: jne [[LOOP]]
169
170 ; CHECK: movq %rax, _var
171 ; CHECK: movq %rdx, _var+8
172
173 %val = atomicrmw max i128* %p, i128 %bits seq_cst
174 store i128 %val, i128* @var, align 16
175 ret void
176 }
177
178 define void @fetch_and_umin(i128* %p, i128 %bits) {
179 ; CHECK-LABEL: fetch_and_umin:
180 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
181 ; CHECK-DAG: movq (%rdi), %rax
182 ; CHECK-DAG: movq 8(%rdi), %rdx
183
184 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
185 ; CHECK: cmpq %rsi, %rax
186 ; CHECK: setbe [[CMP:%[a-z0-9]+]]
187 ; CHECK: cmpq [[INCHI]], %rdx
188 ; CHECK: setbe [[HICMP:%[a-z0-9]+]]
189 ; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
190
191 ; CHECK: movb [[HICMP]], [[CMP]]
192 ; CHECK: [[USE_LO]]:
193 ; CHECK: testb [[CMP]], [[CMP]]
194 ; CHECK: movq %rsi, %rbx
195 ; CHECK: cmovneq %rax, %rbx
196 ; CHECK: movq [[INCHI]], %rcx
197 ; CHECK: cmovneq %rdx, %rcx
198 ; CHECK: lock
199 ; CHECK: cmpxchg16b (%rdi)
200 ; CHECK: jne [[LOOP]]
201
202 ; CHECK: movq %rax, _var
203 ; CHECK: movq %rdx, _var+8
204
205 %val = atomicrmw umin i128* %p, i128 %bits seq_cst
206 store i128 %val, i128* @var, align 16
207 ret void
208 }
209
210 define void @fetch_and_umax(i128* %p, i128 %bits) {
211 ; CHECK-LABEL: fetch_and_umax:
212 ; CHECK-DAG: movq %rdx, [[INCHI:%[a-z0-9]+]]
213 ; CHECK-DAG: movq (%rdi), %rax
214 ; CHECK-DAG: movq 8(%rdi), %rdx
215
216 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
217 ; CHECK: cmpq %rax, %rsi
218 ; CHECK: setb [[CMP:%[a-z0-9]+]]
219 ; CHECK: cmpq [[INCHI]], %rdx
220 ; CHECK: seta [[HICMP:%[a-z0-9]+]]
221 ; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
222
223 ; CHECK: movb [[HICMP]], [[CMP]]
224 ; CHECK: [[USE_LO]]:
225 ; CHECK: testb [[CMP]], [[CMP]]
226 ; CHECK: movq %rsi, %rbx
227 ; CHECK: cmovneq %rax, %rbx
228 ; CHECK: movq [[INCHI]], %rcx
229 ; CHECK: cmovneq %rdx, %rcx
230 ; CHECK: lock
231 ; CHECK: cmpxchg16b (%rdi)
232 ; CHECK: jne [[LOOP]]
233
234 ; CHECK: movq %rax, _var
235 ; CHECK: movq %rdx, _var+8
236
237 %val = atomicrmw umax i128* %p, i128 %bits seq_cst
238 store i128 %val, i128* @var, align 16
239 ret void
240 }
241
242 define i128 @atomic_load_seq_cst(i128* %p) {
243 ; CHECK-LABEL: atomic_load_seq_cst:
244 ; CHECK: xorl %eax, %eax
245 ; CHECK: xorl %edx, %edx
246 ; CHECK: xorl %ebx, %ebx
247 ; CHECK: xorl %ecx, %ecx
248 ; CHECK: lock
249 ; CHECK: cmpxchg16b (%rdi)
250
251 %r = load atomic i128* %p seq_cst, align 16
252 ret i128 %r
253 }
254
255 define i128 @atomic_load_relaxed(i128* %p) {
256 ; CHECK: atomic_load_relaxed:
257 ; CHECK: xorl %eax, %eax
258 ; CHECK: xorl %edx, %edx
259 ; CHECK: xorl %ebx, %ebx
260 ; CHECK: xorl %ecx, %ecx
261 ; CHECK: lock
262 ; CHECK: cmpxchg16b (%rdi)
263
264 %r = load atomic i128* %p monotonic, align 16
265 ret i128 %r
266 }
267
268 define void @atomic_store_seq_cst(i128* %p, i128 %in) {
269 ; CHECK-LABEL: atomic_store_seq_cst:
270 ; CHECK: movq %rdx, %rcx
271 ; CHECK: movq %rsi, %rbx
272 ; CHECK: movq (%rdi), %rax
273 ; CHECK: movq 8(%rdi), %rdx
274
275 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
276 ; CHECK: lock
277 ; CHECK: cmpxchg16b (%rdi)
278 ; CHECK: jne [[LOOP]]
279
280 store atomic i128 %in, i128* %p seq_cst, align 16
281 ret void
282 }
283
284 define void @atomic_store_release(i128* %p, i128 %in) {
285 ; CHECK-LABEL: atomic_store_release:
286 ; CHECK: movq %rdx, %rcx
287 ; CHECK: movq %rsi, %rbx
288 ; CHECK: movq (%rdi), %rax
289 ; CHECK: movq 8(%rdi), %rdx
290
291 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
292 ; CHECK: lock
293 ; CHECK: cmpxchg16b (%rdi)
294 ; CHECK: jne [[LOOP]]
295
296 store atomic i128 %in, i128* %p release, align 16
297 ret void
298 }
299
300 define void @atomic_store_relaxed(i128* %p, i128 %in) {
301 ; CHECK-LABEL: atomic_store_relaxed:
302 ; CHECK: movq %rdx, %rcx
303 ; CHECK: movq %rsi, %rbx
304 ; CHECK: movq (%rdi), %rax
305 ; CHECK: movq 8(%rdi), %rdx
306
307 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
308 ; CHECK: lock
309 ; CHECK: cmpxchg16b (%rdi)
310 ; CHECK: jne [[LOOP]]
311
312 store atomic i128 %in, i128* %p unordered, align 16
313 ret void
314 }
33 @sc16 = external global i16
44
55 define void @atomic_fetch_add16() nounwind {
6 ; X64: atomic_fetch_add16
7 ; X32: atomic_fetch_add16
6 ; X64-LABEL: atomic_fetch_add16
7 ; X32-LABEL: atomic_fetch_add16
88 entry:
99 ; 32-bit
1010 %t1 = atomicrmw add i16* @sc16, i16 1 acquire
3333 }
3434
3535 define void @atomic_fetch_sub16() nounwind {
36 ; X64: atomic_fetch_sub16
37 ; X32: atomic_fetch_sub16
36 ; X64-LABEL: atomic_fetch_sub16
37 ; X32-LABEL: atomic_fetch_sub16
3838 %t1 = atomicrmw sub i16* @sc16, i16 1 acquire
3939 ; X64: lock
4040 ; X64: decw
6161 }
6262
6363 define void @atomic_fetch_and16() nounwind {
64 ; X64: atomic_fetch_and16
65 ; X32: atomic_fetch_and16
64 ; X64-LABEL: atomic_fetch_and16
65 ; X32-LABEL: atomic_fetch_and16
6666 %t1 = atomicrmw and i16* @sc16, i16 3 acquire
6767 ; X64: lock
6868 ; X64: andw $3, {{.*}} # encoding: [0xf0,0x66
6969 ; X32: lock
7070 ; X32: andw $3
7171 %t2 = atomicrmw and i16* @sc16, i16 5 acquire
72 ; X64: andw
73 ; X64: lock
74 ; X64: cmpxchgw
72 ; X64: andl
73 ; X64: lock
74 ; X64: cmpxchgw
75 ; X32: andl
76 ; X32: lock
77 ; X32: cmpxchgw
78 %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
79 ; X64: lock
80 ; X64: andw {{.*}} # encoding: [0xf0,0x66
81 ; X32: lock
7582 ; X32: andw
76 ; X32: lock
77 ; X32: cmpxchgw
78 %t3 = atomicrmw and i16* @sc16, i16 %t2 acquire
79 ; X64: lock
80 ; X64: andw {{.*}} # encoding: [0xf0,0x66
81 ; X32: lock
82 ; X32: andw
8383 ret void
8484 ; X64: ret
8585 ; X32: ret
8686 }
8787
8888 define void @atomic_fetch_or16() nounwind {
89 ; X64: atomic_fetch_or16
90 ; X32: atomic_fetch_or16
89 ; X64-LABEL: atomic_fetch_or16
90 ; X32-LABEL: atomic_fetch_or16
9191 %t1 = atomicrmw or i16* @sc16, i16 3 acquire
9292 ; X64: lock
9393 ; X64: orw $3, {{.*}} # encoding: [0xf0,0x66
9494 ; X32: lock
9595 ; X32: orw $3
9696 %t2 = atomicrmw or i16* @sc16, i16 5 acquire
97 ; X64: orw
98 ; X64: lock
99 ; X64: cmpxchgw
97 ; X64: orl
98 ; X64: lock
99 ; X64: cmpxchgw
100 ; X32: orl
101 ; X32: lock
102 ; X32: cmpxchgw
103 %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
104 ; X64: lock
105 ; X64: orw {{.*}} # encoding: [0xf0,0x66
106 ; X32: lock
100107 ; X32: orw
101 ; X32: lock
102 ; X32: cmpxchgw
103 %t3 = atomicrmw or i16* @sc16, i16 %t2 acquire
104 ; X64: lock
105 ; X64: orw {{.*}} # encoding: [0xf0,0x66
106 ; X32: lock
107 ; X32: orw
108108 ret void
109109 ; X64: ret
110110 ; X32: ret
111111 }
112112
113113 define void @atomic_fetch_xor16() nounwind {
114 ; X64: atomic_fetch_xor16
115 ; X32: atomic_fetch_xor16
114 ; X64-LABEL: atomic_fetch_xor16
115 ; X32-LABEL: atomic_fetch_xor16
116116 %t1 = atomicrmw xor i16* @sc16, i16 3 acquire
117117 ; X64: lock
118118 ; X64: xorw $3, {{.*}} # encoding: [0xf0,0x66
119119 ; X32: lock
120120 ; X32: xorw $3
121121 %t2 = atomicrmw xor i16* @sc16, i16 5 acquire
122 ; X64: xorw
123 ; X64: lock
124 ; X64: cmpxchgw
122 ; X64: xorl
123 ; X64: lock
124 ; X64: cmpxchgw
125 ; X32: xorl
126 ; X32: lock
127 ; X32: cmpxchgw
128 %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
129 ; X64: lock
130 ; X64: xorw {{.*}} # encoding: [0xf0,0x66
131 ; X32: lock
125132 ; X32: xorw
126 ; X32: lock
127 ; X32: cmpxchgw
128 %t3 = atomicrmw xor i16* @sc16, i16 %t2 acquire
129 ; X64: lock
130 ; X64: xorw {{.*}} # encoding: [0xf0,0x66
131 ; X32: lock
132 ; X32: xorw
133133 ret void
134134 ; X64: ret
135135 ; X32: ret
136136 }
137137
138138 define void @atomic_fetch_nand16(i16 %x) nounwind {
139 ; X64: atomic_fetch_nand16
140 ; X32: atomic_fetch_nand16
139 ; X64-LABEL: atomic_fetch_nand16
140 ; X32-LABEL: atomic_fetch_nand16
141141 %t1 = atomicrmw nand i16* @sc16, i16 %x acquire
142 ; X64: andw
143 ; X64: notw
144 ; X64: lock
145 ; X64: cmpxchgw
146 ; X32: andw
147 ; X32: notw
142 ; X64: andl
143 ; X64: notl
144 ; X64: lock
145 ; X64: cmpxchgw
146 ; X32: andl
147 ; X32: notl
148148 ; X32: lock
149149 ; X32: cmpxchgw
150150 ret void
154154
155155 define void @atomic_fetch_max16(i16 %x) nounwind {
156156 %t1 = atomicrmw max i16* @sc16, i16 %x acquire
157 ; X64: cmpw
158 ; X64: cmov
159 ; X64: lock
160 ; X64: cmpxchgw
161
162 ; X32: cmpw
157 ; X64: movswl
158 ; X64: movswl
159 ; X64: subl
160 ; X64: cmov
161 ; X64: lock
162 ; X64: cmpxchgw
163
164 ; X32: movswl
165 ; X32: movswl
166 ; X32: subl
163167 ; X32: cmov
164168 ; X32: lock
165169 ; X32: cmpxchgw
170174
171175 define void @atomic_fetch_min16(i16 %x) nounwind {
172176 %t1 = atomicrmw min i16* @sc16, i16 %x acquire
173 ; X64: cmpw
174 ; X64: cmov
175 ; X64: lock
176 ; X64: cmpxchgw
177
178 ; X32: cmpw
177 ; X64: movswl
178 ; X64: movswl
179 ; X64: subl
180 ; X64: cmov
181 ; X64: lock
182 ; X64: cmpxchgw
183
184 ; X32: movswl
185 ; X32: movswl
186 ; X32: subl
179187 ; X32: cmov
180188 ; X32: lock
181189 ; X32: cmpxchgw
186194
187195 define void @atomic_fetch_umax16(i16 %x) nounwind {
188196 %t1 = atomicrmw umax i16* @sc16, i16 %x acquire
189 ; X64: cmpw
190 ; X64: cmov
191 ; X64: lock
192 ; X64: cmpxchgw
193
194 ; X32: cmpw
197 ; X64: movzwl
198 ; X64: movzwl
199 ; X64: subl
200 ; X64: cmov
201 ; X64: lock
202 ; X64: cmpxchgw
203
204 ; X32: movzwl
205 ; X32: movzwl
206 ; X32: subl
195207 ; X32: cmov
196208 ; X32: lock
197209 ; X32: cmpxchgw
202214
203215 define void @atomic_fetch_umin16(i16 %x) nounwind {
204216 %t1 = atomicrmw umin i16* @sc16, i16 %x acquire
205 ; X64: cmpw
206 ; X64: cmov
207 ; X64: lock
208 ; X64: cmpxchgw
209 ; X32: cmpw
217 ; X64: movzwl
218 ; X64: movzwl
219 ; X64: subl
220 ; X64: cmov
221 ; X64: lock
222 ; X64: cmpxchgw
223
224 ; X32: movzwl
225 ; X32: movzwl
226 ; X32: subl
210227 ; X32: cmov
211228 ; X32: lock
212229 ; X32: cmpxchgw
44 @sc32 = external global i32
55
66 define void @atomic_fetch_add32() nounwind {
7 ; X64: atomic_fetch_add32
8 ; X32: atomic_fetch_add32
7 ; X64-LABEL: atomic_fetch_add32:
8 ; X32-LABEL: atomic_fetch_add32:
99 entry:
1010 ; 32-bit
1111 %t1 = atomicrmw add i32* @sc32, i32 1 acquire
3434 }
3535
3636 define void @atomic_fetch_sub32() nounwind {
37 ; X64: atomic_fetch_sub32
38 ; X32: atomic_fetch_sub32
37 ; X64-LABEL: atomic_fetch_sub32:
38 ; X32-LABEL: atomic_fetch_sub32:
3939 %t1 = atomicrmw sub i32* @sc32, i32 1 acquire
4040 ; X64: lock
4141 ; X64: decl
6262 }
6363
6464 define void @atomic_fetch_and32() nounwind {
65 ; X64: atomic_fetch_and32
66 ; X32: atomic_fetch_and32
65 ; X64-LABEL: atomic_fetch_and32:
66 ; X32-LABEL: atomic_fetch_and32:
6767 %t1 = atomicrmw and i32* @sc32, i32 3 acquire
6868 ; X64: lock
6969 ; X64: andl $3
8787 }
8888
8989 define void @atomic_fetch_or32() nounwind {
90 ; X64: atomic_fetch_or32
91 ; X32: atomic_fetch_or32
90 ; X64-LABEL: atomic_fetch_or32:
91 ; X32-LABEL: atomic_fetch_or32:
9292 %t1 = atomicrmw or i32* @sc32, i32 3 acquire
9393 ; X64: lock
9494 ; X64: orl $3
112112 }
113113
114114 define void @atomic_fetch_xor32() nounwind {
115 ; X64: atomic_fetch_xor32
116 ; X32: atomic_fetch_xor32
115 ; X64-LABEL: atomic_fetch_xor32:
116 ; X32-LABEL: atomic_fetch_xor32:
117117 %t1 = atomicrmw xor i32* @sc32, i32 3 acquire
118118 ; X64: lock
119119 ; X64: xorl $3
137137 }
138138
139139 define void @atomic_fetch_nand32(i32 %x) nounwind {
140 ; X64: atomic_fetch_nand32
141 ; X32: atomic_fetch_nand32
140 ; X64-LABEL: atomic_fetch_nand32:
141 ; X32-LABEL: atomic_fetch_nand32:
142142 %t1 = atomicrmw nand i32* @sc32, i32 %x acquire
143143 ; X64: andl
144144 ; X64: notl
154154 }
155155
156156 define void @atomic_fetch_max32(i32 %x) nounwind {
157 ; X64-LABEL: atomic_fetch_max32:
158 ; X32-LABEL: atomic_fetch_max32:
159
157160 %t1 = atomicrmw max i32* @sc32, i32 %x acquire
158 ; X64: cmpl
161 ; X64: subl
159162 ; X64: cmov
160163 ; X64: lock
161164 ; X64: cmpxchgl
162165
163 ; X32: cmpl
166 ; X32: subl
164167 ; X32: cmov
165168 ; X32: lock
166169 ; X32: cmpxchgl
167170
168 ; NOCMOV: cmpl
169 ; NOCMOV: jl
171 ; NOCMOV: subl
172 ; NOCMOV: jge
170173 ; NOCMOV: lock
171174 ; NOCMOV: cmpxchgl
172175 ret void
176179 }
177180
178181 define void @atomic_fetch_min32(i32 %x) nounwind {
182 ; X64-LABEL: atomic_fetch_min32:
183 ; X32-LABEL: atomic_fetch_min32:
184 ; NOCMOV-LABEL: atomic_fetch_min32:
185
179186 %t1 = atomicrmw min i32* @sc32, i32 %x acquire
180 ; X64: cmpl
187 ; X64: subl
181188 ; X64: cmov
182189 ; X64: lock
183190 ; X64: cmpxchgl
184191
185 ; X32: cmpl
192 ; X32: subl
186193 ; X32: cmov
187194 ; X32: lock
188195 ; X32: cmpxchgl
189196
190 ; NOCMOV: cmpl
191 ; NOCMOV: jg
197 ; NOCMOV: subl
198 ; NOCMOV: jle
192199 ; NOCMOV: lock
193200 ; NOCMOV: cmpxchgl
194201 ret void
198205 }
199206
200207 define void @atomic_fetch_umax32(i32 %x) nounwind {
208 ; X64-LABEL: atomic_fetch_umax32:
209 ; X32-LABEL: atomic_fetch_umax32:
210 ; NOCMOV-LABEL: atomic_fetch_umax32:
211
201212 %t1 = atomicrmw umax i32* @sc32, i32 %x acquire
202 ; X64: cmpl
213 ; X64: subl
203214 ; X64: cmov
204215 ; X64: lock
205216 ; X64: cmpxchgl
206217
207 ; X32: cmpl
218 ; X32: subl
208219 ; X32: cmov
209220 ; X32: lock
210221 ; X32: cmpxchgl
211222
212 ; NOCMOV: cmpl
223 ; NOCMOV: subl
224 ; NOCMOV: ja
225 ; NOCMOV: lock
226 ; NOCMOV: cmpxchgl
227 ret void
228 ; X64: ret
229 ; X32: ret
230 ; NOCMOV: ret
231 }
232
233 define void @atomic_fetch_umin32(i32 %x) nounwind {
234 ; X64-LABEL: atomic_fetch_umin32:
235 ; X32-LABEL: atomic_fetch_umin32:
236 ; NOCMOV-LABEL: atomic_fetch_umin32:
237
238 %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
239 ; X64: subl
240 ; X64: cmov
241 ; X64: lock
242 ; X64: cmpxchgl
243
244 ; X32: subl
245 ; X32: cmov
246 ; X32: lock
247 ; X32: cmpxchgl
248
249 ; NOCMOV: subl
213250 ; NOCMOV: jb
214251 ; NOCMOV: lock
215252 ; NOCMOV: cmpxchgl
219256 ; NOCMOV: ret
220257 }
221258
222 define void @atomic_fetch_umin32(i32 %x) nounwind {
223 %t1 = atomicrmw umin i32* @sc32, i32 %x acquire
224 ; X64: cmpl
225 ; X64: cmov
226 ; X64: lock
227 ; X64: cmpxchgl
228
229 ; X32: cmpl
230 ; X32: cmov
231 ; X32: lock
232 ; X32: cmpxchgl
233
234 ; NOCMOV: cmpl
235 ; NOCMOV: ja
236 ; NOCMOV: lock
237 ; NOCMOV: cmpxchgl
238 ret void
239 ; X64: ret
240 ; X32: ret
241 ; NOCMOV: ret
242 }
243
244259 define void @atomic_fetch_cmpxchg32() nounwind {
260 ; X64-LABEL: atomic_fetch_cmpxchg32:
261 ; X32-LABEL: atomic_fetch_cmpxchg32:
262
245263 %t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
246264 ; X64: lock
247265 ; X64: cmpxchgl
253271 }
254272
255273 define void @atomic_fetch_store32(i32 %x) nounwind {
274 ; X64-LABEL: atomic_fetch_store32:
275 ; X32-LABEL: atomic_fetch_store32:
276
256277 store atomic i32 %x, i32* @sc32 release, align 4
257278 ; X64-NOT: lock
258279 ; X64: movl
264285 }
265286
266287 define void @atomic_fetch_swap32(i32 %x) nounwind {
288 ; X64-LABEL: atomic_fetch_swap32:
289 ; X32-LABEL: atomic_fetch_swap32:
290
267291 %t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
268292 ; X64-NOT: lock
269293 ; X64: xchgl
22 @sc64 = external global i64
33
44 define void @atomic_fetch_add64() nounwind {
5 ; X64: atomic_fetch_add64
5 ; X64-LABEL: atomic_fetch_add64:
6 ; X32-LABEL: atomic_fetch_add64:
67 entry:
78 %t1 = atomicrmw add i64* @sc64, i64 1 acquire
89 ; X64: lock
2122 }
2223
2324 define void @atomic_fetch_sub64() nounwind {
24 ; X64: atomic_fetch_sub64
25 ; X64-LABEL: atomic_fetch_sub64:
26 ; X32-LABEL: atomic_fetch_sub64:
2527 %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
2628 ; X64: lock
2729 ; X64: decq
3941 }
4042
4143 define void @atomic_fetch_and64() nounwind {
42 ; X64: atomic_fetch_and64
44 ; X64-LABEL: atomic_fetch_and64:
45 ; X32-LABEL: atomic_fetch_and64:
4346 %t1 = atomicrmw and i64* @sc64, i64 3 acquire
4447 ; X64: lock
4548 ; X64: andq $3
5558 }
5659
5760 define void @atomic_fetch_or64() nounwind {
58 ; X64: atomic_fetch_or64
61 ; X64-LABEL: atomic_fetch_or64:
62 ; X32-LABEL: atomic_fetch_or64:
5963 %t1 = atomicrmw or i64* @sc64, i64 3 acquire
6064 ; X64: lock
6165 ; X64: orq $3
7175 }
7276
7377 define void @atomic_fetch_xor64() nounwind {
74 ; X64: atomic_fetch_xor64
78 ; X64-LABEL: atomic_fetch_xor64:
79 ; X32-LABEL: atomic_fetch_xor64:
7580 %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
7681 ; X64: lock
7782 ; X64: xorq $3
8792 }
8893
8994 define void @atomic_fetch_nand64(i64 %x) nounwind {
90 ; X64: atomic_fetch_nand64
91 ; X32: atomic_fetch_nand64
95 ; X64-LABEL: atomic_fetch_nand64:
96 ; X32-LABEL: atomic_fetch_nand64:
9297 %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
9398 ; X64: andq
9499 ; X64: notq
106111 }
107112
108113 define void @atomic_fetch_max64(i64 %x) nounwind {
114 ; X64-LABEL: atomic_fetch_max64:
115 ; X32-LABEL: atomic_fetch_max64:
109116 %t1 = atomicrmw max i64* @sc64, i64 %x acquire
110 ; X64: cmpq
117 ; X64: subq
111118 ; X64: cmov
112119 ; X64: lock
113120 ; X64: cmpxchgq
125132 }
126133
127134 define void @atomic_fetch_min64(i64 %x) nounwind {
135 ; X64-LABEL: atomic_fetch_min64:
136 ; X32-LABEL: atomic_fetch_min64:
128137 %t1 = atomicrmw min i64* @sc64, i64 %x acquire
129 ; X64: cmpq
138 ; X64: subq
130139 ; X64: cmov
131140 ; X64: lock
132141 ; X64: cmpxchgq
144153 }
145154
146155 define void @atomic_fetch_umax64(i64 %x) nounwind {
156 ; X64-LABEL: atomic_fetch_umax64:
157 ; X32-LABEL: atomic_fetch_umax64:
147158 %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
148 ; X64: cmpq
159 ; X64: subq
149160 ; X64: cmov
150161 ; X64: lock
151162 ; X64: cmpxchgq
163174 }
164175
165176 define void @atomic_fetch_umin64(i64 %x) nounwind {
177 ; X64-LABEL: atomic_fetch_umin64:
178 ; X32-LABEL: atomic_fetch_umin64:
166179 %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
167 ; X64: cmpq
180 ; X64: subq
168181 ; X64: cmov
169182 ; X64: lock
170183 ; X64: cmpxchgq
182195 }
183196
184197 define void @atomic_fetch_cmpxchg64() nounwind {
198 ; X64-LABEL: atomic_fetch_cmpxchg64:
199 ; X32-LABEL: atomic_fetch_cmpxchg64:
185200 %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
186201 ; X64: lock
187202 ; X64: cmpxchgq
193208 }
194209
195210 define void @atomic_fetch_store64(i64 %x) nounwind {
211 ; X64-LABEL: atomic_fetch_store64:
212 ; X32-LABEL: atomic_fetch_store64:
196213 store atomic i64 %x, i64* @sc64 release, align 8
197214 ; X64-NOT: lock
198215 ; X64: movq
204221 }
205222
206223 define void @atomic_fetch_swap64(i64 %x) nounwind {
224 ; X64-LABEL: atomic_fetch_swap64:
225 ; X32-LABEL: atomic_fetch_swap64:
207226 %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
208227 ; X64-NOT: lock
209228 ; X64: xchgq
22 @sc64 = external global i64
33
44 define void @atomic_fetch_add64() nounwind {
5 ; X32: atomic_fetch_add64
5 ; X64-LABEL: atomic_fetch_add64:
6 ; X32-LABEL: atomic_fetch_add64:
67 entry:
78 %t1 = atomicrmw add i64* @sc64, i64 1 acquire
89 ; X32: addl
2930 }
3031
3132 define void @atomic_fetch_sub64() nounwind {
32 ; X32: atomic_fetch_sub64
33 ; X64-LABEL: atomic_fetch_sub64:
34 ; X32-LABEL: atomic_fetch_sub64:
3335 %t1 = atomicrmw sub i64* @sc64, i64 1 acquire
36 ; X32: addl $-1
37 ; X32: adcl $-1
38 ; X32: lock
39 ; X32: cmpxchg8b
40 %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
41 ; X32: addl $-3
42 ; X32: adcl $-1
43 ; X32: lock
44 ; X32: cmpxchg8b
45 %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
46 ; X32: addl $-5
47 ; X32: adcl $-1
48 ; X32: lock
49 ; X32: cmpxchg8b
50 %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
3451 ; X32: subl
3552 ; X32: sbbl
3653 ; X32: lock
3754 ; X32: cmpxchg8b
38 %t2 = atomicrmw sub i64* @sc64, i64 3 acquire
39 ; X32: subl
40 ; X32: sbbl
41 ; X32: lock
42 ; X32: cmpxchg8b
43 %t3 = atomicrmw sub i64* @sc64, i64 5 acquire
44 ; X32: subl
45 ; X32: sbbl
46 ; X32: lock
47 ; X32: cmpxchg8b
48 %t4 = atomicrmw sub i64* @sc64, i64 %t3 acquire
49 ; X32: subl
50 ; X32: sbbl
51 ; X32: lock
52 ; X32: cmpxchg8b
5355 ret void
5456 ; X32: ret
5557 }
5658
5759 define void @atomic_fetch_and64() nounwind {
58 ; X32: atomic_fetch_and64
60 ; X64-LABEL: atomic_fetch_and:64
61 ; X32-LABEL: atomic_fetch_and64:
5962 %t1 = atomicrmw and i64* @sc64, i64 3 acquire
60 ; X32: andl
61 ; X32: andl
62 ; X32: lock
63 ; X32: cmpxchg8b
64 %t2 = atomicrmw and i64* @sc64, i64 5 acquire
65 ; X32: andl
66 ; X32: andl
63 ; X32: andl $3
64 ; X32-NOT: andl
65 ; X32: lock
66 ; X32: cmpxchg8b
67 %t2 = atomicrmw and i64* @sc64, i64 4294967297 acquire
68 ; X32: andl $1
69 ; X32: andl $1
6770 ; X32: lock
6871 ; X32: cmpxchg8b
6972 %t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
7679 }
7780
7881 define void @atomic_fetch_or64() nounwind {
79 ; X32: atomic_fetch_or64
82 ; X64-LABEL: atomic_fetch_or64:
83 ; X32-LABEL: atomic_fetch_or64:
8084 %t1 = atomicrmw or i64* @sc64, i64 3 acquire
81 ; X32: orl
82 ; X32: orl
83 ; X32: lock
84 ; X32: cmpxchg8b
85 %t2 = atomicrmw or i64* @sc64, i64 5 acquire
86 ; X32: orl
87 ; X32: orl
85 ; X32: orl $3
86 ; X32-NOT: orl
87 ; X32: lock
88 ; X32: cmpxchg8b
89 %t2 = atomicrmw or i64* @sc64, i64 4294967297 acquire
90 ; X32: orl $1
91 ; X32: orl $1
8892 ; X32: lock
8993 ; X32: cmpxchg8b
9094 %t3 = atomicrmw or i64* @sc64, i64 %t2 acquire
97101 }
98102
99103 define void @atomic_fetch_xor64() nounwind {
100 ; X32: atomic_fetch_xor64
104 ; X64-LABEL: atomic_fetch_xor:64
105 ; X32-LABEL: atomic_fetch_xor64:
101106 %t1 = atomicrmw xor i64* @sc64, i64 3 acquire
102107 ; X32: xorl
103 ; X32: xorl
104 ; X32: lock
105 ; X32: cmpxchg8b
106 %t2 = atomicrmw xor i64* @sc64, i64 5 acquire
107 ; X32: xorl
108 ; X32: xorl
108 ; X32-NOT: xorl
109 ; X32: lock
110 ; X32: cmpxchg8b
111 %t2 = atomicrmw xor i64* @sc64, i64 4294967297 acquire
112 ; X32: xorl $1
113 ; X32: xorl $1
109114 ; X32: lock
110115 ; X32: cmpxchg8b
111116 %t3 = atomicrmw xor i64* @sc64, i64 %t2 acquire
118123 }
119124
120125 define void @atomic_fetch_nand64(i64 %x) nounwind {
121 ; X32: atomic_fetch_nand64
126 ; X64-LABEL: atomic_fetch_nand64:
127 ; X32-LABEL: atomic_fetch_nand64:
122128 %t1 = atomicrmw nand i64* @sc64, i64 %x acquire
123129 ; X32: andl
124130 ; X32: andl
131137 }
132138
133139 define void @atomic_fetch_max64(i64 %x) nounwind {
140 ; X64-LABEL: atomic_fetch_max:64
141 ; X32-LABEL: atomic_fetch_max64:
134142 %t1 = atomicrmw max i64* @sc64, i64 %x acquire
135 ; X32: cmpl
136 ; X32: cmpl
137 ; X32: cmov
143 ; X32: subl
144 ; X32: subl
138145 ; X32: cmov
139146 ; X32: cmov
140147 ; X32: lock
144151 }
145152
146153 define void @atomic_fetch_min64(i64 %x) nounwind {
154 ; X64-LABEL: atomic_fetch_min64:
155 ; X32-LABEL: atomic_fetch_min64:
147156 %t1 = atomicrmw min i64* @sc64, i64 %x acquire
148 ; X32: cmpl
149 ; X32: cmpl
150 ; X32: cmov
157 ; X32: subl
158 ; X32: subl
151159 ; X32: cmov
152160 ; X32: cmov
153161 ; X32: lock
157165 }
158166
159167 define void @atomic_fetch_umax64(i64 %x) nounwind {
168 ; X64-LABEL: atomic_fetch_umax:64
169 ; X32-LABEL: atomic_fetch_umax64:
160170 %t1 = atomicrmw umax i64* @sc64, i64 %x acquire
161 ; X32: cmpl
162 ; X32: cmpl
163 ; X32: cmov
171 ; X32: subl
172 ; X32: subl
164173 ; X32: cmov
165174 ; X32: cmov
166175 ; X32: lock
170179 }
171180
172181 define void @atomic_fetch_umin64(i64 %x) nounwind {
182 ; X64-LABEL: atomic_fetch_umin64:
183 ; X32-LABEL: atomic_fetch_umin64:
173184 %t1 = atomicrmw umin i64* @sc64, i64 %x acquire
174 ; X32: cmpl
175 ; X32: cmpl
176 ; X32: cmov
185 ; X32: subl
186 ; X32: subl
177187 ; X32: cmov
178188 ; X32: cmov
179189 ; X32: lock
183193 }
184194
185195 define void @atomic_fetch_cmpxchg64() nounwind {
196 ; X64-LABEL: atomic_fetch_cmpxchg:64
197 ; X32-LABEL: atomic_fetch_cmpxchg64:
186198 %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
187199 ; X32: lock
188200 ; X32: cmpxchg8b
191203 }
192204
193205 define void @atomic_fetch_store64(i64 %x) nounwind {
206 ; X64-LABEL: atomic_fetch_store64:
207 ; X32-LABEL: atomic_fetch_store64:
194208 store atomic i64 %x, i64* @sc64 release, align 8
195209 ; X32: lock
196210 ; X32: cmpxchg8b
199213 }
200214
201215 define void @atomic_fetch_swap64(i64 %x) nounwind {
216 ; X64-LABEL: atomic_fetch_swap64:
217 ; X32-LABEL: atomic_fetch_swap64:
202218 %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
203219 ; X32: lock
204220 ; X32: xchg8b
33 @sc8 = external global i8
44
55 define void @atomic_fetch_add8() nounwind {
6 ; X64: atomic_fetch_add8
7 ; X32: atomic_fetch_add8
6 ; X64-LABEL: atomic_fetch_add8:
7 ; X32-LABEL: atomic_fetch_add8:
88 entry:
99 ; 32-bit
1010 %t1 = atomicrmw add i8* @sc8, i8 1 acquire
3333 }
3434
3535 define void @atomic_fetch_sub8() nounwind {
36 ; X64: atomic_fetch_sub8
37 ; X32: atomic_fetch_sub8
36 ; X64-LABEL: atomic_fetch_sub8:
37 ; X32-LABEL: atomic_fetch_sub8:
3838 %t1 = atomicrmw sub i8* @sc8, i8 1 acquire
3939 ; X64: lock
4040 ; X64: decb
6161 }
6262
6363 define void @atomic_fetch_and8() nounwind {
64 ; X64: atomic_fetch_and8
65 ; X32: atomic_fetch_and8
64 ; X64-LABEL: atomic_fetch_and8:
65 ; X32-LABEL: atomic_fetch_and8:
6666 %t1 = atomicrmw and i8* @sc8, i8 3 acquire
6767 ; X64: lock
6868 ; X64: andb $3
8686 }
8787
8888 define void @atomic_fetch_or8() nounwind {
89 ; X64: atomic_fetch_or8
90 ; X32: atomic_fetch_or8
89 ; X64-LABEL: atomic_fetch_or8:
90 ; X32-LABEL: atomic_fetch_or8:
9191 %t1 = atomicrmw or i8* @sc8, i8 3 acquire
9292 ; X64: lock
9393 ; X64: orb $3
111111 }
112112
113113 define void @atomic_fetch_xor8() nounwind {
114 ; X64: atomic_fetch_xor8
115 ; X32: atomic_fetch_xor8
114 ; X64-LABEL: atomic_fetch_xor8:
115 ; X32-LABEL: atomic_fetch_xor8:
116116 %t1 = atomicrmw xor i8* @sc8, i8 3 acquire
117117 ; X64: lock
118118 ; X64: xorb $3
136136 }
137137
138138 define void @atomic_fetch_nand8(i8 %x) nounwind {
139 ; X64: atomic_fetch_nand8
140 ; X32: atomic_fetch_nand8
139 ; X64-LABEL: atomic_fetch_nand8:
140 ; X32-LABEL: atomic_fetch_nand8:
141141 %t1 = atomicrmw nand i8* @sc8, i8 %x acquire
142142 ; X64: andb
143143 ; X64: notb
153153 }
154154
155155 define void @atomic_fetch_max8(i8 %x) nounwind {
156 ; X64-LABEL: atomic_fetch_max8:
157 ; X32-LABEL: atomic_fetch_max8:
156158 %t1 = atomicrmw max i8* @sc8, i8 %x acquire
157 ; X64: cmpb
158 ; X64: cmov
159 ; X64: lock
160 ; X64: cmpxchgb
161
162 ; X32: cmpb
163 ; X32: cmov
159 ; X64: movsbl
160 ; X64: movsbl
161 ; X64: subl
162 ; X64: lock
163 ; X64: cmpxchgb
164
165 ; X32: movsbl
166 ; X32: movsbl
167 ; X32: subl
164168 ; X32: lock
165169 ; X32: cmpxchgb
166170 ret void
169173 }
170174
171175 define void @atomic_fetch_min8(i8 %x) nounwind {
176 ; X64-LABEL: atomic_fetch_min8:
177 ; X32-LABEL: atomic_fetch_min8:
172178 %t1 = atomicrmw min i8* @sc8, i8 %x acquire
173 ; X64: cmpb
174 ; X64: cmov
175 ; X64: lock
176 ; X64: cmpxchgb
177
178 ; X32: cmpb
179 ; X32: cmov
179 ; X64: movsbl
180 ; X64: movsbl
181 ; X64: subl
182 ; X64: lock
183 ; X64: cmpxchgb
184
185 ; X32: movsbl
186 ; X32: movsbl
187 ; X32: subl
180188 ; X32: lock
181189 ; X32: cmpxchgb
182190 ret void
185193 }
186194
187195 define void @atomic_fetch_umax8(i8 %x) nounwind {
196 ; X64-LABEL: atomic_fetch_umax8:
197 ; X32-LABEL: atomic_fetch_umax8:
188198 %t1 = atomicrmw umax i8* @sc8, i8 %x acquire
189 ; X64: cmpb
190 ; X64: cmov
191 ; X64: lock
192 ; X64: cmpxchgb
193
194 ; X32: cmpb
195 ; X32: cmov
199 ; X64: movzbl
200 ; X64: movzbl
201 ; X64: subl
202 ; X64: lock
203 ; X64: cmpxchgb
204
205 ; X32: movzbl
206 ; X32: movzbl
207 ; X32: subl
196208 ; X32: lock
197209 ; X32: cmpxchgb
198210 ret void
201213 }
202214
203215 define void @atomic_fetch_umin8(i8 %x) nounwind {
216 ; X64-LABEL: atomic_fetch_umin8:
217 ; X32-LABEL: atomic_fetch_umin8:
204218 %t1 = atomicrmw umin i8* @sc8, i8 %x acquire
205 ; X64: cmpb
206 ; X64: cmov
207 ; X64: lock
208 ; X64: cmpxchgb
209 ; X32: cmpb
210 ; X32: cmov
219 ; X64: movzbl
220 ; X64: movzbl
221 ; X64: subl
222 ; X64: lock
223 ; X64: cmpxchgb
224
225 ; X32: movzbl
226 ; X32: movzbl
227 ; X32: subl
211228 ; X32: lock
212229 ; X32: cmpxchgb
213230 ret void
216233 }
217234
218235 define void @atomic_fetch_cmpxchg8() nounwind {
236 ; X64-LABEL: atomic_fetch_cmpxchg8:
237 ; X32-LABEL: atomic_fetch_cmpxchg8:
219238 %t1 = cmpxchg i8* @sc8, i8 0, i8 1 acquire acquire
220239 ; X64: lock
221240 ; X64: cmpxchgb
227246 }
228247
229248 define void @atomic_fetch_store8(i8 %x) nounwind {
249 ; X64-LABEL: atomic_fetch_store8:
250 ; X32-LABEL: atomic_fetch_store8:
230251 store atomic i8 %x, i8* @sc8 release, align 4
231252 ; X64-NOT: lock
232253 ; X64: movb
238259 }
239260
240261 define void @atomic_fetch_swap8(i8 %x) nounwind {
262 ; X64-LABEL: atomic_fetch_swap8:
263 ; X32-LABEL: atomic_fetch_swap8:
241264 %t1 = atomicrmw xchg i8* @sc8, i8 %x acquire
242265 ; X64-NOT: lock
243266 ; X64: xchgb
None ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov -verify-machineinstrs | FileCheck %s
0 ; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+cmov,cx16 -verify-machineinstrs | FileCheck %s
11
22 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
33
109109 %17 = extractvalue { i32, i1 } %pair17, 0
110110 store i32 %17, i32* %old
111111 ; CHECK: movl [[R17atomic:.*]], %eax
112 ; CHECK: movl $1401, %[[R17mask:[a-z]*]]
113 ; CHECK: andl %eax, %[[R17mask]]
114 ; CHECK: notl %[[R17mask]]
112 ; CHECK: movl %eax, %[[R17mask:[a-z]*]]
113 ; CHECK: notl %[[R17mask]]
114 ; CHECK: orl $-1402, %[[R17mask]]
115115 ; CHECK: lock
116116 ; CHECK: cmpxchgl %[[R17mask]], [[R17atomic]]
117117 ; CHECK: jne
118118 ; CHECK: movl %eax,
119119 %18 = atomicrmw nand i32* %val2, i32 1401 monotonic
120120 store i32 %18, i32* %old
121 ; CHECK: andl
122 ; CHECK: andl
123121 ; CHECK: notl
124122 ; CHECK: notl
123 ; CHECK: orl $252645135
124 ; CHECK: orl $252645135
125125 ; CHECK: lock
126126 ; CHECK: cmpxchg8b
127127 %19 = atomicrmw nand i64* %temp64, i64 17361641481138401520 monotonic
44 ; CHECK: atomic_maxmin_i8
55 %1 = atomicrmw max i8* @sc8, i8 5 acquire
66 ; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
7 ; CHECK: cmpb
8 ; CHECK: cmovl
7 ; CHECK: movsbl
8 ; CHECK: cmpl
99 ; CHECK: lock
1010 ; CHECK-NEXT: cmpxchgb
1111 ; CHECK: jne [[LABEL1]]
1212 %2 = atomicrmw min i8* @sc8, i8 6 acquire
1313 ; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
14 ; CHECK: cmpb
15 ; CHECK: cmovg
14 ; CHECK: movsbl
15 ; CHECK: cmpl
1616 ; CHECK: lock
1717 ; CHECK-NEXT: cmpxchgb
1818 ; CHECK: jne [[LABEL3]]
1919 %3 = atomicrmw umax i8* @sc8, i8 7 acquire
2020 ; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
21 ; CHECK: cmpb
22 ; CHECK: cmovb
21 ; CHECK: movzbl
22 ; CHECK: cmpl
2323 ; CHECK: lock
2424 ; CHECK-NEXT: cmpxchgb
2525 ; CHECK: jne [[LABEL5]]
2626 %4 = atomicrmw umin i8* @sc8, i8 8 acquire
2727 ; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
28 ; CHECK: cmpb
29 ; CHECK: cmova
28 ; CHECK: movzbl
29 ; CHECK: cmpl
3030 ; CHECK: lock
3131 ; CHECK-NEXT: cmpxchgb
3232 ; CHECK: jne [[LABEL7]]