llvm.org GIT mirror llvm / 5c16c4e
[X86] Use the generic AtomicExpandPass instead of X86AtomicExpandPass This required a new hook called hasLoadLinkedStoreConditional to know whether to expand atomics to LL/SC (ARM, AArch64, in a future patch Power) or to CmpXchg (X86). Apart from that, the new code in AtomicExpandPass is mostly moved from X86AtomicExpandPass. The main result of this patch is to get rid of that pass, which had lots of code duplicated with AtomicExpandPass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217928 91177308-0d34-0410-b5e6-96231b3b80d8 Robin Morisset 6 years ago
12 changed file(s) with 215 addition(s) and 342 deletion(s). Raw diff Collapse all Expand all
935935 /// \name Helpers for atomic expansion.
936936 /// @{
937937
938 /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional
939 /// and expand AtomicCmpXchgInst.
940 virtual bool hasLoadLinkedStoreConditional() const { return false; }
941
938942 /// Perform a load-linked operation on Addr, returning a "Value *" with the
939943 /// corresponding pointee type. This may entail some non-trivial operations to
940944 /// truncate or reconstruct types that will be illegal in the backend. See
77 //===----------------------------------------------------------------------===//
88 //
99 // This file contains a pass (at IR level) to replace atomic instructions with
10 // appropriate (intrinsic-based) ldrex/strex loops.
10 // either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg.
1111 //
1212 //===----------------------------------------------------------------------===//
1313
4343 bool expandAtomicLoad(LoadInst *LI);
4444 bool expandAtomicStore(StoreInst *SI);
4545 bool expandAtomicRMW(AtomicRMWInst *AI);
46 bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
47 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
4648 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
4749 };
4850 }
8789 MadeChange |= expandAtomicStore(SI);
8890 } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
8991 MadeChange |= expandAtomicRMW(RMWI);
90 } else if (CASI) {
92 } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
9193 MadeChange |= expandAtomicCmpXchg(CASI);
9294 }
9395 }
126128 }
127129
128130 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
129 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
130 // we need a loop and the entire instruction is essentially an "atomicrmw
131 // xchg" that ignores the value loaded.
131 // This function is only called on atomic stores that are too large to be
132 // atomic if implemented as a native store. So we replace them by an
133 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
134 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
135 // It is the responsibility of the target to only return true in
136 // shouldExpandAtomicRMW in cases where this is required and possible.
132137 IRBuilder<> Builder(SI);
133138 AtomicRMWInst *AI =
134139 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
140145 }
141146
142147 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
148 if (TM->getSubtargetImpl()
149 ->getTargetLowering()
150 ->hasLoadLinkedStoreConditional())
151 return expandAtomicRMWToLLSC(AI);
152 else
153 return expandAtomicRMWToCmpXchg(AI);
154 }
155
156 /// Emit IR to implement the given atomicrmw operation on values in registers,
157 /// returning the new value.
158 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
159 Value *Loaded, Value *Inc) {
160 Value *NewVal;
161 switch (Op) {
162 case AtomicRMWInst::Xchg:
163 return Inc;
164 case AtomicRMWInst::Add:
165 return Builder.CreateAdd(Loaded, Inc, "new");
166 case AtomicRMWInst::Sub:
167 return Builder.CreateSub(Loaded, Inc, "new");
168 case AtomicRMWInst::And:
169 return Builder.CreateAnd(Loaded, Inc, "new");
170 case AtomicRMWInst::Nand:
171 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
172 case AtomicRMWInst::Or:
173 return Builder.CreateOr(Loaded, Inc, "new");
174 case AtomicRMWInst::Xor:
175 return Builder.CreateXor(Loaded, Inc, "new");
176 case AtomicRMWInst::Max:
177 NewVal = Builder.CreateICmpSGT(Loaded, Inc);
178 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
179 case AtomicRMWInst::Min:
180 NewVal = Builder.CreateICmpSLE(Loaded, Inc);
181 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
182 case AtomicRMWInst::UMax:
183 NewVal = Builder.CreateICmpUGT(Loaded, Inc);
184 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
185 case AtomicRMWInst::UMin:
186 NewVal = Builder.CreateICmpULE(Loaded, Inc);
187 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
188 default:
189 llvm_unreachable("Unknown atomic op");
190 }
191 }
192
193 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
143194 auto TLI = TM->getSubtargetImpl()->getTargetLowering();
144 AtomicOrdering Order = AI->getOrdering();
195 AtomicOrdering FenceOrder = AI->getOrdering();
145196 Value *Addr = AI->getPointerOperand();
146197 BasicBlock *BB = AI->getParent();
147198 Function *F = BB->getParent();
151202 // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
152203 // should preserve the ordering.
153204 AtomicOrdering MemOpOrder =
154 TLI->getInsertFencesForAtomic() ? Monotonic : Order;
205 TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
155206
156207 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
157208 //
178229 // the branch entirely.
179230 std::prev(BB->end())->eraseFromParent();
180231 Builder.SetInsertPoint(BB);
181 TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
232 TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
182233 Builder.CreateBr(LoopBB);
183234
184235 // Start the main loop block now that we've taken care of the preliminaries.
185236 Builder.SetInsertPoint(LoopBB);
186237 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
187238
188 Value *NewVal;
189 switch (AI->getOperation()) {
190 case AtomicRMWInst::Xchg:
191 NewVal = AI->getValOperand();
192 break;
193 case AtomicRMWInst::Add:
194 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
195 break;
196 case AtomicRMWInst::Sub:
197 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
198 break;
199 case AtomicRMWInst::And:
200 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
201 break;
202 case AtomicRMWInst::Nand:
203 NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
204 "new");
205 break;
206 case AtomicRMWInst::Or:
207 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
208 break;
209 case AtomicRMWInst::Xor:
210 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
211 break;
212 case AtomicRMWInst::Max:
213 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
214 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
215 break;
216 case AtomicRMWInst::Min:
217 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
218 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
219 break;
220 case AtomicRMWInst::UMax:
221 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
222 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
223 break;
224 case AtomicRMWInst::UMin:
225 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
226 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
227 break;
228 default:
229 llvm_unreachable("Unknown atomic op");
230 }
239 Value *NewVal =
240 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
231241
232242 Value *StoreSuccess =
233243 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
236246 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
237247
238248 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
239 TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
249 TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
240250
241251 AI->replaceAllUsesWith(Loaded);
252 AI->eraseFromParent();
253
254 return true;
255 }
256
257 bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
258 auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
259 AtomicOrdering FenceOrder =
260 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
261 AtomicOrdering MemOpOrder =
262 TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
263 Value *Addr = AI->getPointerOperand();
264 BasicBlock *BB = AI->getParent();
265 Function *F = BB->getParent();
266 LLVMContext &Ctx = F->getContext();
267
268 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
269 //
270 // The standard expansion we produce is:
271 // [...]
272 // %init_loaded = load atomic iN* %addr
273 // br label %loop
274 // loop:
275 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
276 // %new = some_op iN %loaded, %incr
277 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
278 // %new_loaded = extractvalue { iN, i1 } %pair, 0
279 // %success = extractvalue { iN, i1 } %pair, 1
280 // br i1 %success, label %atomicrmw.end, label %loop
281 // atomicrmw.end:
282 // [...]
283 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
284 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
285
286 // This grabs the DebugLoc from AI.
287 IRBuilder<> Builder(AI);
288
289 // The split call above "helpfully" added a branch at the end of BB (to the
290 // wrong place), but we want a load. It's easiest to just remove
291 // the branch entirely.
292 std::prev(BB->end())->eraseFromParent();
293 Builder.SetInsertPoint(BB);
294 TargetLowering->emitLeadingFence(Builder, FenceOrder,
295 /*IsStore=*/true, /*IsLoad=*/true);
296 LoadInst *InitLoaded = Builder.CreateLoad(Addr);
297 // Atomics require at least natural alignment.
298 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
299 Builder.CreateBr(LoopBB);
300
301 // Start the main loop block now that we've taken care of the preliminaries.
302 Builder.SetInsertPoint(LoopBB);
303 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
304 Loaded->addIncoming(InitLoaded, BB);
305
306 Value *NewVal =
307 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
308
309 Value *Pair = Builder.CreateAtomicCmpXchg(
310 Addr, Loaded, NewVal, MemOpOrder,
311 AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
312 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
313 Loaded->addIncoming(NewLoaded, LoopBB);
314
315 Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
316 Builder.CreateCondBr(Success, ExitBB, LoopBB);
317
318 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
319 TargetLowering->emitTrailingFence(Builder, FenceOrder,
320 /*IsStore=*/true, /*IsLoad=*/true);
321
322 AI->replaceAllUsesWith(NewLoaded);
242323 AI->eraseFromParent();
243324
244325 return true;
85708570 return Size <= 128;
85718571 }
85728572
8573 bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
8574 return true;
8575 }
8576
85738577 Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
85748578 AtomicOrdering Ord) const {
85758579 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
323323 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
324324 Type *Ty) const override;
325325
326 bool hasLoadLinkedStoreConditional() const override;
326327 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
327328 AtomicOrdering Ord) const override;
328329 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1098110981 return true;
1098210982 }
1098310983
10984 bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
10985
1098410986 static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) {
1098510987 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
1098610988 Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
391391 bool functionArgumentNeedsConsecutiveRegisters(
392392 Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
393393
394 bool hasLoadLinkedStoreConditional() const override;
394395 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
395396 AtomicOrdering Ord) const override;
396397 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1313
1414 set(sources
1515 X86AsmPrinter.cpp
16 X86AtomicExpandPass.cpp
1716 X86FastISel.cpp
1817 X86FloatingPoint.cpp
1918 X86FrameLowering.cpp
2121 class FunctionPass;
2222 class ImmutablePass;
2323 class X86TargetMachine;
24
25 /// createX86AtomicExpandPass - This pass expands atomic operations that cannot
26 /// be handled natively in terms of a loop using cmpxchg.
27 FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
2824
2925 /// createX86ISelDag - This pass converts a legalized DAG into a
3026 /// X86-specific DAG, ready for instruction scheduling.
+0
-283
lib/Target/X86/X86AtomicExpandPass.cpp less more
None //===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions which
10 // cannot be implemented as a single instruction with cmpxchg-based loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "X86.h"
15 #include "X86TargetMachine.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetLowering.h"
24 #include "llvm/Target/TargetMachine.h"
25 using namespace llvm;
26
27 #define DEBUG_TYPE "x86-atomic-expand"
28
29 namespace {
30 class X86AtomicExpandPass : public FunctionPass {
31 const X86TargetMachine *TM;
32 public:
33 static char ID; // Pass identification, replacement for typeid
34 explicit X86AtomicExpandPass(const X86TargetMachine *TM)
35 : FunctionPass(ID), TM(TM) {}
36
37 bool runOnFunction(Function &F) override;
38 bool expandAtomicInsts(Function &F);
39
40 bool needsCmpXchgNb(Type *MemType);
41
42 /// There are four kinds of atomic operations. Two never need expanding:
43 /// cmpxchg is what we expand the others *to*, and loads are easily handled
44 /// by ISelLowering. Atomicrmw and store can need expanding in some
45 /// circumstances.
46 bool shouldExpand(Instruction *Inst);
47
48 /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
49 /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
50 bool shouldExpandStore(StoreInst *SI);
51
52 /// Only some atomicrmw instructions need expanding -- some operations
53 /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
54 /// limited support but can't return the previous value; some (e.g. add)
55 /// have complete support in the instruction set.
56 ///
57 /// Also, naturally, 128-bit operations always need to be expanded.
58 bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
59
60 bool expandAtomicRMW(AtomicRMWInst *AI);
61 bool expandAtomicStore(StoreInst *SI);
62 };
63 }
64
65 char X86AtomicExpandPass::ID = 0;
66
67 FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
68 return new X86AtomicExpandPass(TM);
69 }
70
71 bool X86AtomicExpandPass::runOnFunction(Function &F) {
72 SmallVector AtomicInsts;
73
74 // Changing control-flow while iterating through it is a bad idea, so gather a
75 // list of all atomic instructions before we start.
76 for (BasicBlock &BB : F)
77 for (Instruction &Inst : BB) {
78 if (isa(&Inst) ||
79 (isa(&Inst) && cast(&Inst)->isAtomic()))
80 AtomicInsts.push_back(&Inst);
81 }
82
83 bool MadeChange = false;
84 for (Instruction *Inst : AtomicInsts) {
85 if (!shouldExpand(Inst))
86 continue;
87
88 if (AtomicRMWInst *AI = dyn_cast(Inst))
89 MadeChange |= expandAtomicRMW(AI);
90 if (StoreInst *SI = dyn_cast(Inst))
91 MadeChange |= expandAtomicStore(SI);
92
93 assert(MadeChange && "Atomic inst not expanded when it should be?");
94 Inst->eraseFromParent();
95 }
96
97 return MadeChange;
98 }
99
100 /// Returns true if the operand type is 1 step up from the native width, and
101 /// the corresponding cmpxchg8b or cmpxchg16b instruction is available
102 /// (otherwise we leave them alone to become __sync_fetch_and_... calls).
103 bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
104 const X86Subtarget &Subtarget = TM->getSubtarget();
105 unsigned OpWidth = MemType->getPrimitiveSizeInBits();
106
107 if (OpWidth == 64)
108 return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
109 if (OpWidth == 128)
110 return Subtarget.hasCmpxchg16b();
111
112 return false;
113 }
114
115 bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
116 const X86Subtarget &Subtarget = TM->getSubtarget();
117 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
118
119 if (needsCmpXchgNb(AI->getType()))
120 return true;
121
122 if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
123 return false;
124
125 AtomicRMWInst::BinOp Op = AI->getOperation();
126 switch (Op) {
127 default:
128 llvm_unreachable("Unknown atomic operation");
129 case AtomicRMWInst::Xchg:
130 case AtomicRMWInst::Add:
131 case AtomicRMWInst::Sub:
132 // It's better to use xadd, xsub or xchg for these in all cases.
133 return false;
134 case AtomicRMWInst::Or:
135 case AtomicRMWInst::And:
136 case AtomicRMWInst::Xor:
137 // If the atomicrmw's result isn't actually used, we can just add a "lock"
138 // prefix to a normal instruction for these operations.
139 return !AI->use_empty();
140 case AtomicRMWInst::Nand:
141 case AtomicRMWInst::Max:
142 case AtomicRMWInst::Min:
143 case AtomicRMWInst::UMax:
144 case AtomicRMWInst::UMin:
145 // These always require a non-trivial set of data operations on x86. We must
146 // use a cmpxchg loop.
147 return true;
148 }
149 }
150
151 bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
152 if (needsCmpXchgNb(SI->getValueOperand()->getType()))
153 return true;
154
155 return false;
156 }
157
158 bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
159 if (AtomicRMWInst *AI = dyn_cast(Inst))
160 return shouldExpandAtomicRMW(AI);
161 if (StoreInst *SI = dyn_cast(Inst))
162 return shouldExpandStore(SI);
163 return false;
164 }
165
166 /// Emit IR to implement the given atomicrmw operation on values in registers,
167 /// returning the new value.
168 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
169 Value *Loaded, Value *Inc) {
170 Value *NewVal;
171 switch (Op) {
172 case AtomicRMWInst::Xchg:
173 return Inc;
174 case AtomicRMWInst::Add:
175 return Builder.CreateAdd(Loaded, Inc, "new");
176 case AtomicRMWInst::Sub:
177 return Builder.CreateSub(Loaded, Inc, "new");
178 case AtomicRMWInst::And:
179 return Builder.CreateAnd(Loaded, Inc, "new");
180 case AtomicRMWInst::Nand:
181 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
182 case AtomicRMWInst::Or:
183 return Builder.CreateOr(Loaded, Inc, "new");
184 case AtomicRMWInst::Xor:
185 return Builder.CreateXor(Loaded, Inc, "new");
186 case AtomicRMWInst::Max:
187 NewVal = Builder.CreateICmpSGT(Loaded, Inc);
188 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
189 case AtomicRMWInst::Min:
190 NewVal = Builder.CreateICmpSLE(Loaded, Inc);
191 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
192 case AtomicRMWInst::UMax:
193 NewVal = Builder.CreateICmpUGT(Loaded, Inc);
194 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
195 case AtomicRMWInst::UMin:
196 NewVal = Builder.CreateICmpULE(Loaded, Inc);
197 return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
198 default:
199 break;
200 }
201 llvm_unreachable("Unknown atomic op");
202 }
203
204 bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
205 AtomicOrdering Order =
206 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
207 Value *Addr = AI->getPointerOperand();
208 BasicBlock *BB = AI->getParent();
209 Function *F = BB->getParent();
210 LLVMContext &Ctx = F->getContext();
211
212 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
213 //
214 // The standard expansion we produce is:
215 // [...]
216 // %init_loaded = load atomic iN* %addr
217 // br label %loop
218 // loop:
219 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
220 // %new = some_op iN %loaded, %incr
221 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
222 // %new_loaded = extractvalue { iN, i1 } %pair, 0
223 // %success = extractvalue { iN, i1 } %pair, 1
224 // br i1 %success, label %atomicrmw.end, label %loop
225 // atomicrmw.end:
226 // [...]
227 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
228 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
229
230 // This grabs the DebugLoc from AI.
231 IRBuilder<> Builder(AI);
232
233 // The split call above "helpfully" added a branch at the end of BB (to the
234 // wrong place), but we want a load. It's easiest to just remove
235 // the branch entirely.
236 std::prev(BB->end())->eraseFromParent();
237 Builder.SetInsertPoint(BB);
238 LoadInst *InitLoaded = Builder.CreateLoad(Addr);
239 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
240 Builder.CreateBr(LoopBB);
241
242 // Start the main loop block now that we've taken care of the preliminaries.
243 Builder.SetInsertPoint(LoopBB);
244 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
245 Loaded->addIncoming(InitLoaded, BB);
246
247 Value *NewVal =
248 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
249
250 Value *Pair = Builder.CreateAtomicCmpXchg(
251 Addr, Loaded, NewVal, Order,
252 AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
253 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
254 Loaded->addIncoming(NewLoaded, LoopBB);
255
256 Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
257 Builder.CreateCondBr(Success, ExitBB, LoopBB);
258
259 AI->replaceAllUsesWith(NewLoaded);
260
261 return true;
262 }
263
264 bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
265 // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
266 // this in terms of the usual expansion to "atomicrmw xchg".
267 IRBuilder<> Builder(SI);
268 AtomicOrdering Order =
269 SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering();
270 AtomicRMWInst *AI =
271 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
272 SI->getValueOperand(), Order);
273
274 // Now we have an appropriate swap instruction, lower it as usual.
275 if (shouldExpandAtomicRMW(AI)) {
276 expandAtomicRMW(AI);
277 AI->eraseFromParent();
278 return true;
279 }
280
281 return AI;
282 }
1681516815 return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff,
1681616816 DAG);
1681716817 }
16818 }
16819 }
16820
16821 /// Returns true if the operand type is exactly twice the native width, and
16822 /// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
16823 /// Used to know whether to use cmpxchg8/16b when expanding atomic operations
16824 /// (otherwise we leave them alone to become __sync_fetch_and_... calls).
16825 bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
16826 const X86Subtarget &Subtarget =
16827 getTargetMachine().getSubtarget();
16828 unsigned OpWidth = MemType->getPrimitiveSizeInBits();
16829
16830 if (OpWidth == 64)
16831 return !Subtarget.is64Bit(); // FIXME this should be Subtarget.hasCmpxchg8b
16832 else if (OpWidth == 128)
16833 return Subtarget.hasCmpxchg16b();
16834 else
16835 return false;
16836 }
16837
16838 bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
16839 return needsCmpXchgNb(SI->getValueOperand()->getType());
16840 }
16841
16842 bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *SI) const {
16843 return false; // FIXME, currently these are expanded separately in this file.
16844 }
16845
16846 bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
16847 const X86Subtarget &Subtarget =
16848 getTargetMachine().getSubtarget();
16849 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
16850 const Type *MemType = AI->getType();
16851
16852 // If the operand is too big, we must see if cmpxchg8/16b is available
16853 // and default to library calls otherwise.
16854 if (MemType->getPrimitiveSizeInBits() > NativeWidth)
16855 return needsCmpXchgNb(MemType);
16856
16857 AtomicRMWInst::BinOp Op = AI->getOperation();
16858 switch (Op) {
16859 default:
16860 llvm_unreachable("Unknown atomic operation");
16861 case AtomicRMWInst::Xchg:
16862 case AtomicRMWInst::Add:
16863 case AtomicRMWInst::Sub:
16864 // It's better to use xadd, xsub or xchg for these in all cases.
16865 return false;
16866 case AtomicRMWInst::Or:
16867 case AtomicRMWInst::And:
16868 case AtomicRMWInst::Xor:
16869 // If the atomicrmw's result isn't actually used, we can just add a "lock"
16870 // prefix to a normal instruction for these operations.
16871 return !AI->use_empty();
16872 case AtomicRMWInst::Nand:
16873 case AtomicRMWInst::Max:
16874 case AtomicRMWInst::Min:
16875 case AtomicRMWInst::UMax:
16876 case AtomicRMWInst::UMin:
16877 // These always require a non-trivial set of data operations on x86. We must
16878 // use a cmpxchg loop.
16879 return true;
1681816880 }
1681916881 }
1682016882
1733717399 case ISD::ATOMIC_LOAD_UMIN:
1733817400 case ISD::ATOMIC_LOAD_UMAX:
1733917401 // Delegate to generic TypeLegalization. Situations we can really handle
17340 // should have already been dealt with by X86AtomicExpandPass.cpp.
17402 // should have already been dealt with by AtomicExpandPass.cpp.
1734117403 break;
1734217404 case ISD::ATOMIC_LOAD: {
1734317405 ReplaceATOMIC_LOAD(N, Results, DAG);
962962
963963 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
964964
965 bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
966 bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
967 bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
968
969 bool needsCmpXchgNb(const Type *MemType) const;
970
965971 /// Utility function to emit atomic-load-arith operations (and, or, xor,
966972 /// nand, max, min, umax, umin). It takes the corresponding instruction to
967973 /// expand, the associated machine basic block, and the associated X86
104104 }
105105
106106 void X86PassConfig::addIRPasses() {
107 addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
107 addPass(createAtomicExpandPass(&getX86TargetMachine()));
108108
109109 TargetPassConfig::addIRPasses();
110110 }