LLVM 19.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
41#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
70 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
71 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
72 bool tryExpandAtomicLoad(LoadInst *LI);
73 bool expandAtomicLoadToLL(LoadInst *LI);
74 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
75 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
76 bool tryExpandAtomicStore(StoreInst *SI);
77 void expandAtomicStore(StoreInst *SI);
78 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
79 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
80 Value *
81 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
82 Align AddrAlign, AtomicOrdering MemOpOrder,
83 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
84 void expandAtomicOpToLLSC(
85 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
86 AtomicOrdering MemOpOrder,
87 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
88 void expandPartwordAtomicRMW(
90 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
91 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
92 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
93 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
94
95 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96 static Value *insertRMWCmpXchgLoop(
97 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
100 CreateCmpXchgInstFun CreateCmpXchg);
101 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102
103 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104 bool isIdempotentRMW(AtomicRMWInst *RMWI);
105 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106
107 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
108 Value *PointerOperand, Value *ValueOperand,
109 Value *CASExpected, AtomicOrdering Ordering,
110 AtomicOrdering Ordering2,
111 ArrayRef<RTLIB::Libcall> Libcalls);
112 void expandAtomicLoadToLibcall(LoadInst *LI);
113 void expandAtomicStoreToLibcall(StoreInst *LI);
114 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116
117 friend bool
119 CreateCmpXchgInstFun CreateCmpXchg);
120
121public:
122 bool run(Function &F, const TargetMachine *TM);
123};
124
125class AtomicExpandLegacy : public FunctionPass {
126public:
127 static char ID; // Pass identification, replacement for typeid
128
129 AtomicExpandLegacy() : FunctionPass(ID) {
131 }
132
133 bool runOnFunction(Function &F) override;
134};
135
136// IRBuilder to be used for replacement atomic instructions.
137struct ReplacementIRBuilder
138 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
139 MDNode *MMRAMD = nullptr;
140
141 // Preserves the DebugLoc from I, and preserves still valid metadata.
142 // Enable StrictFP builder mode when appropriate.
143 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
144 : IRBuilder(I->getContext(), DL,
146 [this](Instruction *I) { addMMRAMD(I); })) {
148 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
149 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
150 this->setIsFPConstrained(true);
151
152 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
153 }
154
155 void addMMRAMD(Instruction *I) {
157 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
158 }
159};
160
161} // end anonymous namespace
162
163char AtomicExpandLegacy::ID = 0;
164
165char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
166
168 "Expand Atomic instructions", false, false)
170INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
171 "Expand Atomic instructions", false, false)
172
173// Helper functions to retrieve the size of atomic instructions.
174static unsigned getAtomicOpSize(LoadInst *LI) {
175 const DataLayout &DL = LI->getModule()->getDataLayout();
176 return DL.getTypeStoreSize(LI->getType());
177}
178
179static unsigned getAtomicOpSize(StoreInst *SI) {
180 const DataLayout &DL = SI->getModule()->getDataLayout();
181 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
182}
183
184static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
185 const DataLayout &DL = RMWI->getModule()->getDataLayout();
186 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
187}
188
189static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
190 const DataLayout &DL = CASI->getModule()->getDataLayout();
191 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
192}
193
194// Determine if a particular atomic operation has a supported size,
195// and is of appropriate alignment, to be passed through for target
196// lowering. (Versus turning into a __atomic libcall)
197template <typename Inst>
198static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
199 unsigned Size = getAtomicOpSize(I);
200 Align Alignment = I->getAlign();
201 return Alignment >= Size &&
203}
204
205bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
206 const auto *Subtarget = TM->getSubtargetImpl(F);
207 if (!Subtarget->enableAtomicExpand())
208 return false;
209 TLI = Subtarget->getTargetLowering();
210 DL = &F.getParent()->getDataLayout();
211
213
214 // Changing control-flow while iterating through it is a bad idea, so gather a
215 // list of all atomic instructions before we start.
216 for (Instruction &I : instructions(F))
217 if (I.isAtomic() && !isa<FenceInst>(&I))
218 AtomicInsts.push_back(&I);
219
220 bool MadeChange = false;
221 for (auto *I : AtomicInsts) {
222 auto LI = dyn_cast<LoadInst>(I);
223 auto SI = dyn_cast<StoreInst>(I);
224 auto RMWI = dyn_cast<AtomicRMWInst>(I);
225 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
226 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
227
228 // If the Size/Alignment is not supported, replace with a libcall.
229 if (LI) {
230 if (!atomicSizeSupported(TLI, LI)) {
231 expandAtomicLoadToLibcall(LI);
232 MadeChange = true;
233 continue;
234 }
235 } else if (SI) {
236 if (!atomicSizeSupported(TLI, SI)) {
237 expandAtomicStoreToLibcall(SI);
238 MadeChange = true;
239 continue;
240 }
241 } else if (RMWI) {
242 if (!atomicSizeSupported(TLI, RMWI)) {
243 expandAtomicRMWToLibcall(RMWI);
244 MadeChange = true;
245 continue;
246 }
247 } else if (CASI) {
248 if (!atomicSizeSupported(TLI, CASI)) {
249 expandAtomicCASToLibcall(CASI);
250 MadeChange = true;
251 continue;
252 }
253 }
254
255 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
256 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
257 I = LI = convertAtomicLoadToIntegerType(LI);
258 MadeChange = true;
259 } else if (SI &&
260 TLI->shouldCastAtomicStoreInIR(SI) ==
261 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
262 I = SI = convertAtomicStoreToIntegerType(SI);
263 MadeChange = true;
264 } else if (RMWI &&
265 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
266 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
267 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
268 MadeChange = true;
269 } else if (CASI) {
270 // TODO: when we're ready to make the change at the IR level, we can
271 // extend convertCmpXchgToInteger for floating point too.
272 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
273 // TODO: add a TLI hook to control this so that each target can
274 // convert to lowering the original type one at a time.
275 I = CASI = convertCmpXchgToIntegerType(CASI);
276 MadeChange = true;
277 }
278 }
279
280 if (TLI->shouldInsertFencesForAtomic(I)) {
281 auto FenceOrdering = AtomicOrdering::Monotonic;
282 if (LI && isAcquireOrStronger(LI->getOrdering())) {
283 FenceOrdering = LI->getOrdering();
284 LI->setOrdering(AtomicOrdering::Monotonic);
285 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
286 FenceOrdering = SI->getOrdering();
287 SI->setOrdering(AtomicOrdering::Monotonic);
288 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
289 isAcquireOrStronger(RMWI->getOrdering()))) {
290 FenceOrdering = RMWI->getOrdering();
291 RMWI->setOrdering(AtomicOrdering::Monotonic);
292 } else if (CASI &&
293 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
294 TargetLoweringBase::AtomicExpansionKind::None &&
295 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
296 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
297 isAcquireOrStronger(CASI->getFailureOrdering()))) {
298 // If a compare and swap is lowered to LL/SC, we can do smarter fence
299 // insertion, with a stronger one on the success path than on the
300 // failure path. As a result, fence insertion is directly done by
301 // expandAtomicCmpXchg in that case.
302 FenceOrdering = CASI->getMergedOrdering();
303 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
304 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
305 }
306
307 if (FenceOrdering != AtomicOrdering::Monotonic) {
308 MadeChange |= bracketInstWithFences(I, FenceOrdering);
309 }
310 } else if (I->hasAtomicStore() &&
311 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
312 auto FenceOrdering = AtomicOrdering::Monotonic;
313 if (SI)
314 FenceOrdering = SI->getOrdering();
315 else if (RMWI)
316 FenceOrdering = RMWI->getOrdering();
317 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
318 TargetLoweringBase::AtomicExpansionKind::LLSC)
319 // LLSC is handled in expandAtomicCmpXchg().
320 FenceOrdering = CASI->getSuccessOrdering();
321
322 IRBuilder Builder(I);
323 if (auto TrailingFence =
324 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
325 TrailingFence->moveAfter(I);
326 MadeChange = true;
327 }
328 }
329
330 if (LI)
331 MadeChange |= tryExpandAtomicLoad(LI);
332 else if (SI)
333 MadeChange |= tryExpandAtomicStore(SI);
334 else if (RMWI) {
335 // There are two different ways of expanding RMW instructions:
336 // - into a load if it is idempotent
337 // - into a Cmpxchg/LL-SC loop otherwise
338 // we try them in that order.
339
340 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
341 MadeChange = true;
342 } else {
343 MadeChange |= tryExpandAtomicRMW(RMWI);
344 }
345 } else if (CASI)
346 MadeChange |= tryExpandAtomicCmpXchg(CASI);
347 }
348 return MadeChange;
349}
350
351bool AtomicExpandLegacy::runOnFunction(Function &F) {
352
353 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
354 if (!TPC)
355 return false;
356 auto *TM = &TPC->getTM<TargetMachine>();
357 AtomicExpandImpl AE;
358 return AE.run(F, TM);
359}
360
362 return new AtomicExpandLegacy();
363}
364
367 AtomicExpandImpl AE;
368
369 bool Changed = AE.run(F, TM);
370 if (!Changed)
371 return PreservedAnalyses::all();
372
374}
375
376bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
377 AtomicOrdering Order) {
378 ReplacementIRBuilder Builder(I, *DL);
379
380 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
381
382 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
383 // We have a guard here because not every atomic operation generates a
384 // trailing fence.
385 if (TrailingFence)
386 TrailingFence->moveAfter(I);
387
388 return (LeadingFence || TrailingFence);
389}
390
391/// Get the iX type with the same bitwidth as T.
393AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
394 EVT VT = TLI->getMemValueType(DL, T);
395 unsigned BitWidth = VT.getStoreSizeInBits();
396 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
397 return IntegerType::get(T->getContext(), BitWidth);
398}
399
400/// Convert an atomic load of a non-integral type to an integer load of the
401/// equivalent bitwidth. See the function comment on
402/// convertAtomicStoreToIntegerType for background.
403LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
404 auto *M = LI->getModule();
405 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
406
407 ReplacementIRBuilder Builder(LI, *DL);
408
410
411 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
412 NewLI->setAlignment(LI->getAlign());
413 NewLI->setVolatile(LI->isVolatile());
414 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
415 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
416
417 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
418 LI->replaceAllUsesWith(NewVal);
419 LI->eraseFromParent();
420 return NewLI;
421}
422
424AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
425 auto *M = RMWI->getModule();
426 Type *NewTy =
427 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
428
429 ReplacementIRBuilder Builder(RMWI, *DL);
430
431 Value *Addr = RMWI->getPointerOperand();
432 Value *Val = RMWI->getValOperand();
433 Value *NewVal = Val->getType()->isPointerTy()
434 ? Builder.CreatePtrToInt(Val, NewTy)
435 : Builder.CreateBitCast(Val, NewTy);
436
437 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
438 RMWI->getAlign(), RMWI->getOrdering(),
439 RMWI->getSyncScopeID());
440 NewRMWI->setVolatile(RMWI->isVolatile());
441 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
442
443 Value *NewRVal = RMWI->getType()->isPointerTy()
444 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
445 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
446 RMWI->replaceAllUsesWith(NewRVal);
447 RMWI->eraseFromParent();
448 return NewRMWI;
449}
450
451bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
452 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
454 return false;
456 expandAtomicOpToLLSC(
457 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
458 LI->getOrdering(),
459 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
460 return true;
462 return expandAtomicLoadToLL(LI);
464 return expandAtomicLoadToCmpXchg(LI);
467 return true;
468 default:
469 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
470 }
471}
472
473bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
474 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
476 return false;
478 expandAtomicStore(SI);
479 return true;
481 SI->setAtomic(AtomicOrdering::NotAtomic);
482 return true;
483 default:
484 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
485 }
486}
487
488bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
489 ReplacementIRBuilder Builder(LI, *DL);
490
491 // On some architectures, load-linked instructions are atomic for larger
492 // sizes than normal loads. For example, the only 64-bit load guaranteed
493 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
494 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
495 LI->getPointerOperand(), LI->getOrdering());
496 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
497
498 LI->replaceAllUsesWith(Val);
499 LI->eraseFromParent();
500
501 return true;
502}
503
504bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
505 ReplacementIRBuilder Builder(LI, *DL);
506 AtomicOrdering Order = LI->getOrdering();
507 if (Order == AtomicOrdering::Unordered)
509
511 Type *Ty = LI->getType();
512 Constant *DummyVal = Constant::getNullValue(Ty);
513
514 Value *Pair = Builder.CreateAtomicCmpXchg(
515 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
517 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
518
519 LI->replaceAllUsesWith(Loaded);
520 LI->eraseFromParent();
521
522 return true;
523}
524
525/// Convert an atomic store of a non-integral type to an integer store of the
526/// equivalent bitwidth. We used to not support floating point or vector
527/// atomics in the IR at all. The backends learned to deal with the bitcast
528/// idiom because that was the only way of expressing the notion of a atomic
529/// float or vector store. The long term plan is to teach each backend to
530/// instruction select from the original atomic store, but as a migration
531/// mechanism, we convert back to the old format which the backends understand.
532/// Each backend will need individual work to recognize the new format.
533StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
534 ReplacementIRBuilder Builder(SI, *DL);
535 auto *M = SI->getModule();
536 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
537 M->getDataLayout());
538 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
539
540 Value *Addr = SI->getPointerOperand();
541
542 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
543 NewSI->setAlignment(SI->getAlign());
544 NewSI->setVolatile(SI->isVolatile());
545 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
546 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
547 SI->eraseFromParent();
548 return NewSI;
549}
550
551void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
552 // This function is only called on atomic stores that are too large to be
553 // atomic if implemented as a native store. So we replace them by an
554 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
555 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
556 // It is the responsibility of the target to only signal expansion via
557 // shouldExpandAtomicRMW in cases where this is required and possible.
558 ReplacementIRBuilder Builder(SI, *DL);
559 AtomicOrdering Ordering = SI->getOrdering();
561 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
563 : Ordering;
564 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
565 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
566 SI->getAlign(), RMWOrdering);
567 SI->eraseFromParent();
568
569 // Now we have an appropriate swap instruction, lower it as usual.
570 tryExpandAtomicRMW(AI);
571}
572
574 Value *Loaded, Value *NewVal, Align AddrAlign,
575 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
576 Value *&Success, Value *&NewLoaded) {
577 Type *OrigTy = NewVal->getType();
578
579 // This code can go away when cmpxchg supports FP and vector types.
580 assert(!OrigTy->isPointerTy());
581 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
582 if (NeedBitcast) {
583 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
584 NewVal = Builder.CreateBitCast(NewVal, IntTy);
585 Loaded = Builder.CreateBitCast(Loaded, IntTy);
586 }
587
588 Value *Pair = Builder.CreateAtomicCmpXchg(
589 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
591 Success = Builder.CreateExtractValue(Pair, 1, "success");
592 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
593
594 if (NeedBitcast)
595 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
596}
597
598bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
599 LLVMContext &Ctx = AI->getModule()->getContext();
600 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
601 switch (Kind) {
603 return false;
605 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
606 unsigned ValueSize = getAtomicOpSize(AI);
607 if (ValueSize < MinCASSize) {
608 expandPartwordAtomicRMW(AI,
610 } else {
611 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
612 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
613 AI->getValOperand());
614 };
615 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
616 AI->getAlign(), AI->getOrdering(), PerformOp);
617 }
618 return true;
619 }
621 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
622 unsigned ValueSize = getAtomicOpSize(AI);
623 if (ValueSize < MinCASSize) {
624 expandPartwordAtomicRMW(AI,
626 } else {
628 Ctx.getSyncScopeNames(SSNs);
629 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
630 ? "system"
631 : SSNs[AI->getSyncScopeID()];
633 ORE.emit([&]() {
634 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
635 << "A compare and swap loop was generated for an atomic "
636 << AI->getOperationName(AI->getOperation()) << " operation at "
637 << MemScope << " memory scope";
638 });
640 }
641 return true;
642 }
644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
645 unsigned ValueSize = getAtomicOpSize(AI);
646 if (ValueSize < MinCASSize) {
648 // Widen And/Or/Xor and give the target another chance at expanding it.
651 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
652 return true;
653 }
654 }
655 expandAtomicRMWToMaskedIntrinsic(AI);
656 return true;
657 }
659 TLI->emitBitTestAtomicRMWIntrinsic(AI);
660 return true;
661 }
663 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
664 return true;
665 }
667 return lowerAtomicRMWInst(AI);
669 TLI->emitExpandAtomicRMW(AI);
670 return true;
671 default:
672 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
673 }
674}
675
676namespace {
677
678struct PartwordMaskValues {
679 // These three fields are guaranteed to be set by createMaskInstrs.
680 Type *WordType = nullptr;
681 Type *ValueType = nullptr;
682 Type *IntValueType = nullptr;
683 Value *AlignedAddr = nullptr;
684 Align AlignedAddrAlignment;
685 // The remaining fields can be null.
686 Value *ShiftAmt = nullptr;
687 Value *Mask = nullptr;
688 Value *Inv_Mask = nullptr;
689};
690
692raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
693 auto PrintObj = [&O](auto *V) {
694 if (V)
695 O << *V;
696 else
697 O << "nullptr";
698 O << '\n';
699 };
700 O << "PartwordMaskValues {\n";
701 O << " WordType: ";
702 PrintObj(PMV.WordType);
703 O << " ValueType: ";
704 PrintObj(PMV.ValueType);
705 O << " AlignedAddr: ";
706 PrintObj(PMV.AlignedAddr);
707 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
708 O << " ShiftAmt: ";
709 PrintObj(PMV.ShiftAmt);
710 O << " Mask: ";
711 PrintObj(PMV.Mask);
712 O << " Inv_Mask: ";
713 PrintObj(PMV.Inv_Mask);
714 O << "}\n";
715 return O;
716}
717
718} // end anonymous namespace
719
720/// This is a helper function which builds instructions to provide
721/// values necessary for partword atomic operations. It takes an
722/// incoming address, Addr, and ValueType, and constructs the address,
723/// shift-amounts and masks needed to work with a larger value of size
724/// WordSize.
725///
726/// AlignedAddr: Addr rounded down to a multiple of WordSize
727///
728/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
729/// from AlignAddr for it to have the same value as if
730/// ValueType was loaded from Addr.
731///
732/// Mask: Value to mask with the value loaded from AlignAddr to
733/// include only the part that would've been loaded from Addr.
734///
735/// Inv_Mask: The inverse of Mask.
736static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
738 Value *Addr, Align AddrAlign,
739 unsigned MinWordSize) {
740 PartwordMaskValues PMV;
741
742 Module *M = I->getModule();
743 LLVMContext &Ctx = M->getContext();
744 const DataLayout &DL = M->getDataLayout();
745 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
746
747 PMV.ValueType = PMV.IntValueType = ValueType;
748 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
749 PMV.IntValueType =
750 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
751
752 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
753 : ValueType;
754 if (PMV.ValueType == PMV.WordType) {
755 PMV.AlignedAddr = Addr;
756 PMV.AlignedAddrAlignment = AddrAlign;
757 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
758 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
759 return PMV;
760 }
761
762 PMV.AlignedAddrAlignment = Align(MinWordSize);
763
764 assert(ValueSize < MinWordSize);
765
766 PointerType *PtrTy = cast<PointerType>(Addr->getType());
767 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
768 Value *PtrLSB;
769
770 if (AddrAlign < MinWordSize) {
771 PMV.AlignedAddr = Builder.CreateIntrinsic(
772 Intrinsic::ptrmask, {PtrTy, IntTy},
773 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
774 "AlignedAddr");
775
776 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
777 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
778 } else {
779 // If the alignment is high enough, the LSB are known 0.
780 PMV.AlignedAddr = Addr;
781 PtrLSB = ConstantInt::getNullValue(IntTy);
782 }
783
784 if (DL.isLittleEndian()) {
785 // turn bytes into bits
786 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
787 } else {
788 // turn bytes into bits, and count from the other side.
789 PMV.ShiftAmt = Builder.CreateShl(
790 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
791 }
792
793 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
794 PMV.Mask = Builder.CreateShl(
795 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
796 "Mask");
797
798 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
799
800 return PMV;
801}
802
803static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
804 const PartwordMaskValues &PMV) {
805 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
806 if (PMV.WordType == PMV.ValueType)
807 return WideWord;
808
809 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
810 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
811 return Builder.CreateBitCast(Trunc, PMV.ValueType);
812}
813
814static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
815 Value *Updated, const PartwordMaskValues &PMV) {
816 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
817 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
818 if (PMV.WordType == PMV.ValueType)
819 return Updated;
820
821 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
822
823 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
824 Value *Shift =
825 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
826 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
827 Value *Or = Builder.CreateOr(And, Shift, "inserted");
828 return Or;
829}
830
831/// Emit IR to implement a masked version of a given atomicrmw
832/// operation. (That is, only the bits under the Mask should be
833/// affected by the operation)
835 IRBuilderBase &Builder, Value *Loaded,
836 Value *Shifted_Inc, Value *Inc,
837 const PartwordMaskValues &PMV) {
838 // TODO: update to use
839 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
840 // to merge bits from two values without requiring PMV.Inv_Mask.
841 switch (Op) {
842 case AtomicRMWInst::Xchg: {
843 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
844 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
845 return FinalVal;
846 }
850 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
853 case AtomicRMWInst::Nand: {
854 // The other arithmetic ops need to be masked into place.
855 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
856 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
857 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
858 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
859 return FinalVal;
860 }
871 // Finally, other ops will operate on the full value, so truncate down to
872 // the original size, and expand out again after doing the
873 // operation. Bitcasts will be inserted for FP values.
874 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
875 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
876 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
877 return FinalVal;
878 }
879 default:
880 llvm_unreachable("Unknown atomic op");
881 }
882}
883
884/// Expand a sub-word atomicrmw operation into an appropriate
885/// word-sized operation.
886///
887/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
888/// way as a typical atomicrmw expansion. The only difference here is
889/// that the operation inside of the loop may operate upon only a
890/// part of the value.
891void AtomicExpandImpl::expandPartwordAtomicRMW(
893 // Widen And/Or/Xor and give the target another chance at expanding it.
897 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
898 return;
899 }
900 AtomicOrdering MemOpOrder = AI->getOrdering();
901 SyncScope::ID SSID = AI->getSyncScopeID();
902
903 ReplacementIRBuilder Builder(AI, *DL);
904
905 PartwordMaskValues PMV =
906 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
907 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
908
909 Value *ValOperand_Shifted = nullptr;
912 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
913 ValOperand_Shifted =
914 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
915 "ValOperand_Shifted");
916 }
917
918 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
919 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
920 AI->getValOperand(), PMV);
921 };
922
923 Value *OldResult;
925 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
926 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
927 PerformPartwordOp, createCmpXchgInstFun);
928 } else {
930 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
931 PMV.AlignedAddrAlignment, MemOpOrder,
932 PerformPartwordOp);
933 }
934
935 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
936 AI->replaceAllUsesWith(FinalOldResult);
937 AI->eraseFromParent();
938}
939
940// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
941AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
942 ReplacementIRBuilder Builder(AI, *DL);
944
947 "Unable to widen operation");
948
949 PartwordMaskValues PMV =
950 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
951 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
952
953 Value *ValOperand_Shifted =
954 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
955 PMV.ShiftAmt, "ValOperand_Shifted");
956
957 Value *NewOperand;
958
959 if (Op == AtomicRMWInst::And)
960 NewOperand =
961 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
962 else
963 NewOperand = ValOperand_Shifted;
964
965 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
966 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967 AI->getOrdering(), AI->getSyncScopeID());
968 // TODO: Preserve metadata
969
970 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
971 AI->replaceAllUsesWith(FinalOldResult);
972 AI->eraseFromParent();
973 return NewAI;
974}
975
976bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
977 // The basic idea here is that we're expanding a cmpxchg of a
978 // smaller memory size up to a word-sized cmpxchg. To do this, we
979 // need to add a retry-loop for strong cmpxchg, so that
980 // modifications to other parts of the word don't cause a spurious
981 // failure.
982
983 // This generates code like the following:
984 // [[Setup mask values PMV.*]]
985 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
986 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
987 // %InitLoaded = load i32* %addr
988 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
989 // br partword.cmpxchg.loop
990 // partword.cmpxchg.loop:
991 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
992 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
993 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
994 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
995 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
996 // i32 %FullWord_NewVal success_ordering failure_ordering
997 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
998 // %Success = extractvalue { i32, i1 } %NewCI, 1
999 // br i1 %Success, label %partword.cmpxchg.end,
1000 // label %partword.cmpxchg.failure
1001 // partword.cmpxchg.failure:
1002 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1003 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1004 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1005 // label %partword.cmpxchg.end
1006 // partword.cmpxchg.end:
1007 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1008 // %FinalOldVal = trunc i32 %tmp1 to i8
1009 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1010 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1011
1012 Value *Addr = CI->getPointerOperand();
1013 Value *Cmp = CI->getCompareOperand();
1014 Value *NewVal = CI->getNewValOperand();
1015
1016 BasicBlock *BB = CI->getParent();
1017 Function *F = BB->getParent();
1018 ReplacementIRBuilder Builder(CI, *DL);
1019 LLVMContext &Ctx = Builder.getContext();
1020
1021 BasicBlock *EndBB =
1022 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1023 auto FailureBB =
1024 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1025 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1026
1027 // The split call above "helpfully" added a branch at the end of BB
1028 // (to the wrong place).
1029 std::prev(BB->end())->eraseFromParent();
1030 Builder.SetInsertPoint(BB);
1031
1032 PartwordMaskValues PMV =
1033 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1034 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1035
1036 // Shift the incoming values over, into the right location in the word.
1037 Value *NewVal_Shifted =
1038 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1039 Value *Cmp_Shifted =
1040 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1041
1042 // Load the entire current word, and mask into place the expected and new
1043 // values
1044 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1045 InitLoaded->setVolatile(CI->isVolatile());
1046 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1047 Builder.CreateBr(LoopBB);
1048
1049 // partword.cmpxchg.loop:
1050 Builder.SetInsertPoint(LoopBB);
1051 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1052 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1053
1054 // Mask/Or the expected and new values into place in the loaded word.
1055 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1056 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1057 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1058 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1060 NewCI->setVolatile(CI->isVolatile());
1061 // When we're building a strong cmpxchg, we need a loop, so you
1062 // might think we could use a weak cmpxchg inside. But, using strong
1063 // allows the below comparison for ShouldContinue, and we're
1064 // expecting the underlying cmpxchg to be a machine instruction,
1065 // which is strong anyways.
1066 NewCI->setWeak(CI->isWeak());
1067
1068 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1069 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1070
1071 if (CI->isWeak())
1072 Builder.CreateBr(EndBB);
1073 else
1074 Builder.CreateCondBr(Success, EndBB, FailureBB);
1075
1076 // partword.cmpxchg.failure:
1077 Builder.SetInsertPoint(FailureBB);
1078 // Upon failure, verify that the masked-out part of the loaded value
1079 // has been modified. If it didn't, abort the cmpxchg, since the
1080 // masked-in part must've.
1081 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1082 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1083 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1084
1085 // Add the second value to the phi from above
1086 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1087
1088 // partword.cmpxchg.end:
1089 Builder.SetInsertPoint(CI);
1090
1091 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1092 Value *Res = PoisonValue::get(CI->getType());
1093 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1094 Res = Builder.CreateInsertValue(Res, Success, 1);
1095
1096 CI->replaceAllUsesWith(Res);
1097 CI->eraseFromParent();
1098 return true;
1099}
1100
1101void AtomicExpandImpl::expandAtomicOpToLLSC(
1102 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1103 AtomicOrdering MemOpOrder,
1104 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1105 ReplacementIRBuilder Builder(I, *DL);
1106 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1107 MemOpOrder, PerformOp);
1108
1109 I->replaceAllUsesWith(Loaded);
1110 I->eraseFromParent();
1111}
1112
1113void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1114 ReplacementIRBuilder Builder(AI, *DL);
1115
1116 PartwordMaskValues PMV =
1117 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1118 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1119
1120 // The value operand must be sign-extended for signed min/max so that the
1121 // target's signed comparison instructions can be used. Otherwise, just
1122 // zero-ext.
1123 Instruction::CastOps CastOp = Instruction::ZExt;
1124 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1125 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1126 CastOp = Instruction::SExt;
1127
1128 Value *ValOperand_Shifted = Builder.CreateShl(
1129 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1130 PMV.ShiftAmt, "ValOperand_Shifted");
1131 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1132 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1133 AI->getOrdering());
1134 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1135 AI->replaceAllUsesWith(FinalOldResult);
1136 AI->eraseFromParent();
1137}
1138
1139void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1140 AtomicCmpXchgInst *CI) {
1141 ReplacementIRBuilder Builder(CI, *DL);
1142
1143 PartwordMaskValues PMV = createMaskInstrs(
1144 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1145 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1146
1147 Value *CmpVal_Shifted = Builder.CreateShl(
1148 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1149 "CmpVal_Shifted");
1150 Value *NewVal_Shifted = Builder.CreateShl(
1151 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1152 "NewVal_Shifted");
1153 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1154 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1155 CI->getMergedOrdering());
1156 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1157 Value *Res = PoisonValue::get(CI->getType());
1158 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1159 Value *Success = Builder.CreateICmpEQ(
1160 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1161 Res = Builder.CreateInsertValue(Res, Success, 1);
1162
1163 CI->replaceAllUsesWith(Res);
1164 CI->eraseFromParent();
1165}
1166
1167Value *AtomicExpandImpl::insertRMWLLSCLoop(
1168 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1169 AtomicOrdering MemOpOrder,
1170 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1171 LLVMContext &Ctx = Builder.getContext();
1172 BasicBlock *BB = Builder.GetInsertBlock();
1173 Function *F = BB->getParent();
1174
1175 assert(AddrAlign >=
1176 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1177 "Expected at least natural alignment at this point.");
1178
1179 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1180 //
1181 // The standard expansion we produce is:
1182 // [...]
1183 // atomicrmw.start:
1184 // %loaded = @load.linked(%addr)
1185 // %new = some_op iN %loaded, %incr
1186 // %stored = @store_conditional(%new, %addr)
1187 // %try_again = icmp i32 ne %stored, 0
1188 // br i1 %try_again, label %loop, label %atomicrmw.end
1189 // atomicrmw.end:
1190 // [...]
1191 BasicBlock *ExitBB =
1192 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1193 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1194
1195 // The split call above "helpfully" added a branch at the end of BB (to the
1196 // wrong place).
1197 std::prev(BB->end())->eraseFromParent();
1198 Builder.SetInsertPoint(BB);
1199 Builder.CreateBr(LoopBB);
1200
1201 // Start the main loop block now that we've taken care of the preliminaries.
1202 Builder.SetInsertPoint(LoopBB);
1203 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1204
1205 Value *NewVal = PerformOp(Builder, Loaded);
1206
1207 Value *StoreSuccess =
1208 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1209 Value *TryAgain = Builder.CreateICmpNE(
1210 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1211 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1212
1213 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1214 return Loaded;
1215}
1216
1217/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1218/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1219/// IR. As a migration step, we convert back to what use to be the standard
1220/// way to represent a pointer cmpxchg so that we can update backends one by
1221/// one.
1223AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1224 auto *M = CI->getModule();
1225 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1226 M->getDataLayout());
1227
1228 ReplacementIRBuilder Builder(CI, *DL);
1229
1230 Value *Addr = CI->getPointerOperand();
1231
1232 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1233 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1234
1235 auto *NewCI = Builder.CreateAtomicCmpXchg(
1236 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1237 CI->getFailureOrdering(), CI->getSyncScopeID());
1238 NewCI->setVolatile(CI->isVolatile());
1239 NewCI->setWeak(CI->isWeak());
1240 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1241
1242 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1243 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1244
1245 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1246
1247 Value *Res = PoisonValue::get(CI->getType());
1248 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1249 Res = Builder.CreateInsertValue(Res, Succ, 1);
1250
1251 CI->replaceAllUsesWith(Res);
1252 CI->eraseFromParent();
1253 return NewCI;
1254}
1255
1256bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1257 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1258 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1259 Value *Addr = CI->getPointerOperand();
1260 BasicBlock *BB = CI->getParent();
1261 Function *F = BB->getParent();
1262 LLVMContext &Ctx = F->getContext();
1263 // If shouldInsertFencesForAtomic() returns true, then the target does not
1264 // want to deal with memory orders, and emitLeading/TrailingFence should take
1265 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1266 // should preserve the ordering.
1267 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1268 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1270 : CI->getMergedOrdering();
1271
1272 // In implementations which use a barrier to achieve release semantics, we can
1273 // delay emitting this barrier until we know a store is actually going to be
1274 // attempted. The cost of this delay is that we need 2 copies of the block
1275 // emitting the load-linked, affecting code size.
1276 //
1277 // Ideally, this logic would be unconditional except for the minsize check
1278 // since in other cases the extra blocks naturally collapse down to the
1279 // minimal loop. Unfortunately, this puts too much stress on later
1280 // optimisations so we avoid emitting the extra logic in those cases too.
1281 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1282 SuccessOrder != AtomicOrdering::Monotonic &&
1283 SuccessOrder != AtomicOrdering::Acquire &&
1284 !F->hasMinSize();
1285
1286 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1287 // do it even on minsize.
1288 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1289
1290 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1291 //
1292 // The full expansion we produce is:
1293 // [...]
1294 // %aligned.addr = ...
1295 // cmpxchg.start:
1296 // %unreleasedload = @load.linked(%aligned.addr)
1297 // %unreleasedload.extract = extract value from %unreleasedload
1298 // %should_store = icmp eq %unreleasedload.extract, %desired
1299 // br i1 %should_store, label %cmpxchg.releasingstore,
1300 // label %cmpxchg.nostore
1301 // cmpxchg.releasingstore:
1302 // fence?
1303 // br label cmpxchg.trystore
1304 // cmpxchg.trystore:
1305 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1306 // [%releasedload, %cmpxchg.releasedload]
1307 // %updated.new = insert %new into %loaded.trystore
1308 // %stored = @store_conditional(%updated.new, %aligned.addr)
1309 // %success = icmp eq i32 %stored, 0
1310 // br i1 %success, label %cmpxchg.success,
1311 // label %cmpxchg.releasedload/%cmpxchg.failure
1312 // cmpxchg.releasedload:
1313 // %releasedload = @load.linked(%aligned.addr)
1314 // %releasedload.extract = extract value from %releasedload
1315 // %should_store = icmp eq %releasedload.extract, %desired
1316 // br i1 %should_store, label %cmpxchg.trystore,
1317 // label %cmpxchg.failure
1318 // cmpxchg.success:
1319 // fence?
1320 // br label %cmpxchg.end
1321 // cmpxchg.nostore:
1322 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1323 // [%releasedload,
1324 // %cmpxchg.releasedload/%cmpxchg.trystore]
1325 // @load_linked_fail_balance()?
1326 // br label %cmpxchg.failure
1327 // cmpxchg.failure:
1328 // fence?
1329 // br label %cmpxchg.end
1330 // cmpxchg.end:
1331 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1332 // [%loaded.trystore, %cmpxchg.trystore]
1333 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1334 // %loaded = extract value from %loaded.exit
1335 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1336 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1337 // [...]
1338 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1339 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1340 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1341 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1342 auto ReleasedLoadBB =
1343 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1344 auto TryStoreBB =
1345 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1346 auto ReleasingStoreBB =
1347 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1348 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1349
1350 ReplacementIRBuilder Builder(CI, *DL);
1351
1352 // The split call above "helpfully" added a branch at the end of BB (to the
1353 // wrong place), but we might want a fence too. It's easiest to just remove
1354 // the branch entirely.
1355 std::prev(BB->end())->eraseFromParent();
1356 Builder.SetInsertPoint(BB);
1357 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1358 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1359
1360 PartwordMaskValues PMV =
1361 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1362 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1363 Builder.CreateBr(StartBB);
1364
1365 // Start the main loop block now that we've taken care of the preliminaries.
1366 Builder.SetInsertPoint(StartBB);
1367 Value *UnreleasedLoad =
1368 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1369 Value *UnreleasedLoadExtract =
1370 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1371 Value *ShouldStore = Builder.CreateICmpEQ(
1372 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1373
1374 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1375 // jump straight past that fence instruction (if it exists).
1376 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1377
1378 Builder.SetInsertPoint(ReleasingStoreBB);
1379 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1380 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1381 Builder.CreateBr(TryStoreBB);
1382
1383 Builder.SetInsertPoint(TryStoreBB);
1384 PHINode *LoadedTryStore =
1385 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1386 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1387 Value *NewValueInsert =
1388 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1389 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1390 PMV.AlignedAddr, MemOpOrder);
1391 StoreSuccess = Builder.CreateICmpEQ(
1392 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1393 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1394 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1395 CI->isWeak() ? FailureBB : RetryBB);
1396
1397 Builder.SetInsertPoint(ReleasedLoadBB);
1398 Value *SecondLoad;
1399 if (HasReleasedLoadBB) {
1400 SecondLoad =
1401 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1402 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1403 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1404 CI->getCompareOperand(), "should_store");
1405
1406 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1407 // jump straight past that fence instruction (if it exists).
1408 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1409 // Update PHI node in TryStoreBB.
1410 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1411 } else
1412 Builder.CreateUnreachable();
1413
1414 // Make sure later instructions don't get reordered with a fence if
1415 // necessary.
1416 Builder.SetInsertPoint(SuccessBB);
1417 if (ShouldInsertFencesForAtomic ||
1418 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1419 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1420 Builder.CreateBr(ExitBB);
1421
1422 Builder.SetInsertPoint(NoStoreBB);
1423 PHINode *LoadedNoStore =
1424 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1425 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1426 if (HasReleasedLoadBB)
1427 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1428
1429 // In the failing case, where we don't execute the store-conditional, the
1430 // target might want to balance out the load-linked with a dedicated
1431 // instruction (e.g., on ARM, clearing the exclusive monitor).
1432 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1433 Builder.CreateBr(FailureBB);
1434
1435 Builder.SetInsertPoint(FailureBB);
1436 PHINode *LoadedFailure =
1437 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1438 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1439 if (CI->isWeak())
1440 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1441 if (ShouldInsertFencesForAtomic)
1442 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1443 Builder.CreateBr(ExitBB);
1444
1445 // Finally, we have control-flow based knowledge of whether the cmpxchg
1446 // succeeded or not. We expose this to later passes by converting any
1447 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1448 // PHI.
1449 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1450 PHINode *LoadedExit =
1451 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1452 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1453 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1454 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1455 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1456 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1457
1458 // This is the "exit value" from the cmpxchg expansion. It may be of
1459 // a type wider than the one in the cmpxchg instruction.
1460 Value *LoadedFull = LoadedExit;
1461
1462 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1463 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1464
1465 // Look for any users of the cmpxchg that are just comparing the loaded value
1466 // against the desired one, and replace them with the CFG-derived version.
1468 for (auto *User : CI->users()) {
1469 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1470 if (!EV)
1471 continue;
1472
1473 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1474 "weird extraction from { iN, i1 }");
1475
1476 if (EV->getIndices()[0] == 0)
1477 EV->replaceAllUsesWith(Loaded);
1478 else
1480
1481 PrunedInsts.push_back(EV);
1482 }
1483
1484 // We can remove the instructions now we're no longer iterating through them.
1485 for (auto *EV : PrunedInsts)
1486 EV->eraseFromParent();
1487
1488 if (!CI->use_empty()) {
1489 // Some use of the full struct return that we don't understand has happened,
1490 // so we've got to reconstruct it properly.
1491 Value *Res;
1492 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1493 Res = Builder.CreateInsertValue(Res, Success, 1);
1494
1495 CI->replaceAllUsesWith(Res);
1496 }
1497
1498 CI->eraseFromParent();
1499 return true;
1500}
1501
1502bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1503 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1504 if (!C)
1505 return false;
1506
1508 switch (Op) {
1509 case AtomicRMWInst::Add:
1510 case AtomicRMWInst::Sub:
1511 case AtomicRMWInst::Or:
1512 case AtomicRMWInst::Xor:
1513 return C->isZero();
1514 case AtomicRMWInst::And:
1515 return C->isMinusOne();
1516 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1517 default:
1518 return false;
1519 }
1520}
1521
1522bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1523 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1524 tryExpandAtomicLoad(ResultingLoad);
1525 return true;
1526 }
1527 return false;
1528}
1529
1530Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1531 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1532 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1533 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1534 CreateCmpXchgInstFun CreateCmpXchg) {
1535 LLVMContext &Ctx = Builder.getContext();
1536 BasicBlock *BB = Builder.GetInsertBlock();
1537 Function *F = BB->getParent();
1538
1539 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1540 //
1541 // The standard expansion we produce is:
1542 // [...]
1543 // %init_loaded = load atomic iN* %addr
1544 // br label %loop
1545 // loop:
1546 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1547 // %new = some_op iN %loaded, %incr
1548 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1549 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1550 // %success = extractvalue { iN, i1 } %pair, 1
1551 // br i1 %success, label %atomicrmw.end, label %loop
1552 // atomicrmw.end:
1553 // [...]
1554 BasicBlock *ExitBB =
1555 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1556 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1557
1558 // The split call above "helpfully" added a branch at the end of BB (to the
1559 // wrong place), but we want a load. It's easiest to just remove
1560 // the branch entirely.
1561 std::prev(BB->end())->eraseFromParent();
1562 Builder.SetInsertPoint(BB);
1563 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1564 Builder.CreateBr(LoopBB);
1565
1566 // Start the main loop block now that we've taken care of the preliminaries.
1567 Builder.SetInsertPoint(LoopBB);
1568 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1569 Loaded->addIncoming(InitLoaded, BB);
1570
1571 Value *NewVal = PerformOp(Builder, Loaded);
1572
1573 Value *NewLoaded = nullptr;
1574 Value *Success = nullptr;
1575
1576 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1577 MemOpOrder == AtomicOrdering::Unordered
1579 : MemOpOrder,
1580 SSID, Success, NewLoaded);
1581 assert(Success && NewLoaded);
1582
1583 Loaded->addIncoming(NewLoaded, LoopBB);
1584
1585 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1586
1587 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1588 return NewLoaded;
1589}
1590
1591bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1592 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1593 unsigned ValueSize = getAtomicOpSize(CI);
1594
1595 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1596 default:
1597 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1599 if (ValueSize < MinCASSize)
1600 return expandPartwordCmpXchg(CI);
1601 return false;
1603 return expandAtomicCmpXchg(CI);
1604 }
1606 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1607 return true;
1609 return lowerAtomicCmpXchgInst(CI);
1610 }
1611}
1612
1613// Note: This function is exposed externally by AtomicExpandUtils.h
1615 CreateCmpXchgInstFun CreateCmpXchg) {
1616 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1617 Builder.setIsFPConstrained(
1618 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1619
1620 // FIXME: If FP exceptions are observable, we should force them off for the
1621 // loop for the FP atomics.
1622 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1623 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1624 AI->getOrdering(), AI->getSyncScopeID(),
1625 [&](IRBuilderBase &Builder, Value *Loaded) {
1626 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1627 AI->getValOperand());
1628 },
1629 CreateCmpXchg);
1630
1631 AI->replaceAllUsesWith(Loaded);
1632 AI->eraseFromParent();
1633 return true;
1634}
1635
1636// In order to use one of the sized library calls such as
1637// __atomic_fetch_add_4, the alignment must be sufficient, the size
1638// must be one of the potentially-specialized sizes, and the value
1639// type must actually exist in C on the target (otherwise, the
1640// function wouldn't actually be defined.)
1641static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1642 const DataLayout &DL) {
1643 // TODO: "LargestSize" is an approximation for "largest type that
1644 // you can express in C". It seems to be the case that int128 is
1645 // supported on all 64-bit platforms, otherwise only up to 64-bit
1646 // integers are supported. If we get this wrong, then we'll try to
1647 // call a sized libcall that doesn't actually exist. There should
1648 // really be some more reliable way in LLVM of determining integer
1649 // sizes which are valid in the target's C ABI...
1650 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1651 return Alignment >= Size &&
1652 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1653 Size <= LargestSize;
1654}
1655
1656void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1657 static const RTLIB::Libcall Libcalls[6] = {
1658 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1659 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1660 unsigned Size = getAtomicOpSize(I);
1661
1662 bool expanded = expandAtomicOpToLibcall(
1663 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1664 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1665 if (!expanded)
1666 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1667}
1668
1669void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1670 static const RTLIB::Libcall Libcalls[6] = {
1671 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1672 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1673 unsigned Size = getAtomicOpSize(I);
1674
1675 bool expanded = expandAtomicOpToLibcall(
1676 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1677 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1678 if (!expanded)
1679 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1680}
1681
1682void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1683 static const RTLIB::Libcall Libcalls[6] = {
1684 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1685 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1686 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1687 unsigned Size = getAtomicOpSize(I);
1688
1689 bool expanded = expandAtomicOpToLibcall(
1690 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1691 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1692 Libcalls);
1693 if (!expanded)
1694 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1695}
1696
1698 static const RTLIB::Libcall LibcallsXchg[6] = {
1699 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1700 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1701 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1702 static const RTLIB::Libcall LibcallsAdd[6] = {
1703 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1704 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1705 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1706 static const RTLIB::Libcall LibcallsSub[6] = {
1707 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1708 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1709 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1710 static const RTLIB::Libcall LibcallsAnd[6] = {
1711 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1712 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1713 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1714 static const RTLIB::Libcall LibcallsOr[6] = {
1715 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1716 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1717 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1718 static const RTLIB::Libcall LibcallsXor[6] = {
1719 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1720 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1721 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1722 static const RTLIB::Libcall LibcallsNand[6] = {
1723 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1724 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1725 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1726
1727 switch (Op) {
1729 llvm_unreachable("Should not have BAD_BINOP.");
1731 return ArrayRef(LibcallsXchg);
1732 case AtomicRMWInst::Add:
1733 return ArrayRef(LibcallsAdd);
1734 case AtomicRMWInst::Sub:
1735 return ArrayRef(LibcallsSub);
1736 case AtomicRMWInst::And:
1737 return ArrayRef(LibcallsAnd);
1738 case AtomicRMWInst::Or:
1739 return ArrayRef(LibcallsOr);
1740 case AtomicRMWInst::Xor:
1741 return ArrayRef(LibcallsXor);
1743 return ArrayRef(LibcallsNand);
1744 case AtomicRMWInst::Max:
1745 case AtomicRMWInst::Min:
1754 // No atomic libcalls are available for max/min/umax/umin.
1755 return {};
1756 }
1757 llvm_unreachable("Unexpected AtomicRMW operation.");
1758}
1759
1760void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1761 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1762
1763 unsigned Size = getAtomicOpSize(I);
1764
1765 bool Success = false;
1766 if (!Libcalls.empty())
1767 Success = expandAtomicOpToLibcall(
1768 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1769 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1770
1771 // The expansion failed: either there were no libcalls at all for
1772 // the operation (min/max), or there were only size-specialized
1773 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1774 // CAS libcall, via a CAS loop, instead.
1775 if (!Success) {
1777 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1778 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1779 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1780 // Create the CAS instruction normally...
1781 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1782 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1784 Success = Builder.CreateExtractValue(Pair, 1, "success");
1785 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1786
1787 // ...and then expand the CAS into a libcall.
1788 expandAtomicCASToLibcall(Pair);
1789 });
1790 }
1791}
1792
1793// A helper routine for the above expandAtomic*ToLibcall functions.
1794//
1795// 'Libcalls' contains an array of enum values for the particular
1796// ATOMIC libcalls to be emitted. All of the other arguments besides
1797// 'I' are extracted from the Instruction subclass by the
1798// caller. Depending on the particular call, some will be null.
1799bool AtomicExpandImpl::expandAtomicOpToLibcall(
1800 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1801 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1802 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1803 assert(Libcalls.size() == 6);
1804
1805 LLVMContext &Ctx = I->getContext();
1806 Module *M = I->getModule();
1807 const DataLayout &DL = M->getDataLayout();
1808 IRBuilder<> Builder(I);
1809 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1810
1811 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1812 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1813
1814 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1815
1816 // TODO: the "order" argument type is "int", not int32. So
1817 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1818 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1819 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1820 Constant *OrderingVal =
1821 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1822 Constant *Ordering2Val = nullptr;
1823 if (CASExpected) {
1824 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1825 Ordering2Val =
1826 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1827 }
1828 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1829
1830 RTLIB::Libcall RTLibType;
1831 if (UseSizedLibcall) {
1832 switch (Size) {
1833 case 1:
1834 RTLibType = Libcalls[1];
1835 break;
1836 case 2:
1837 RTLibType = Libcalls[2];
1838 break;
1839 case 4:
1840 RTLibType = Libcalls[3];
1841 break;
1842 case 8:
1843 RTLibType = Libcalls[4];
1844 break;
1845 case 16:
1846 RTLibType = Libcalls[5];
1847 break;
1848 }
1849 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1850 RTLibType = Libcalls[0];
1851 } else {
1852 // Can't use sized function, and there's no generic for this
1853 // operation, so give up.
1854 return false;
1855 }
1856
1857 if (!TLI->getLibcallName(RTLibType)) {
1858 // This target does not implement the requested atomic libcall so give up.
1859 return false;
1860 }
1861
1862 // Build up the function call. There's two kinds. First, the sized
1863 // variants. These calls are going to be one of the following (with
1864 // N=1,2,4,8,16):
1865 // iN __atomic_load_N(iN *ptr, int ordering)
1866 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1867 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1868 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1869 // int success_order, int failure_order)
1870 //
1871 // Note that these functions can be used for non-integer atomic
1872 // operations, the values just need to be bitcast to integers on the
1873 // way in and out.
1874 //
1875 // And, then, the generic variants. They look like the following:
1876 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1877 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1878 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1879 // int ordering)
1880 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1881 // void *desired, int success_order,
1882 // int failure_order)
1883 //
1884 // The different signatures are built up depending on the
1885 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1886 // variables.
1887
1888 AllocaInst *AllocaCASExpected = nullptr;
1889 AllocaInst *AllocaValue = nullptr;
1890 AllocaInst *AllocaResult = nullptr;
1891
1892 Type *ResultTy;
1894 AttributeList Attr;
1895
1896 // 'size' argument.
1897 if (!UseSizedLibcall) {
1898 // Note, getIntPtrType is assumed equivalent to size_t.
1899 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1900 }
1901
1902 // 'ptr' argument.
1903 // note: This assumes all address spaces share a common libfunc
1904 // implementation and that addresses are convertable. For systems without
1905 // that property, we'd need to extend this mechanism to support AS-specific
1906 // families of atomic intrinsics.
1907 Value *PtrVal = PointerOperand;
1908 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1909 Args.push_back(PtrVal);
1910
1911 // 'expected' argument, if present.
1912 if (CASExpected) {
1913 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1914 AllocaCASExpected->setAlignment(AllocaAlignment);
1915 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1916 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1917 Args.push_back(AllocaCASExpected);
1918 }
1919
1920 // 'val' argument ('desired' for cas), if present.
1921 if (ValueOperand) {
1922 if (UseSizedLibcall) {
1923 Value *IntValue =
1924 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1925 Args.push_back(IntValue);
1926 } else {
1927 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1928 AllocaValue->setAlignment(AllocaAlignment);
1929 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1930 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1931 Args.push_back(AllocaValue);
1932 }
1933 }
1934
1935 // 'ret' argument.
1936 if (!CASExpected && HasResult && !UseSizedLibcall) {
1937 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1938 AllocaResult->setAlignment(AllocaAlignment);
1939 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1940 Args.push_back(AllocaResult);
1941 }
1942
1943 // 'ordering' ('success_order' for cas) argument.
1944 Args.push_back(OrderingVal);
1945
1946 // 'failure_order' argument, if present.
1947 if (Ordering2Val)
1948 Args.push_back(Ordering2Val);
1949
1950 // Now, the return type.
1951 if (CASExpected) {
1952 ResultTy = Type::getInt1Ty(Ctx);
1953 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1954 } else if (HasResult && UseSizedLibcall)
1955 ResultTy = SizedIntTy;
1956 else
1957 ResultTy = Type::getVoidTy(Ctx);
1958
1959 // Done with setting up arguments and return types, create the call:
1961 for (Value *Arg : Args)
1962 ArgTys.push_back(Arg->getType());
1963 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1964 FunctionCallee LibcallFn =
1965 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1966 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1967 Call->setAttributes(Attr);
1968 Value *Result = Call;
1969
1970 // And then, extract the results...
1971 if (ValueOperand && !UseSizedLibcall)
1972 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
1973
1974 if (CASExpected) {
1975 // The final result from the CAS is {load of 'expected' alloca, bool result
1976 // from call}
1977 Type *FinalResultTy = I->getType();
1978 Value *V = PoisonValue::get(FinalResultTy);
1979 Value *ExpectedOut = Builder.CreateAlignedLoad(
1980 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1981 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
1982 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1983 V = Builder.CreateInsertValue(V, Result, 1);
1984 I->replaceAllUsesWith(V);
1985 } else if (HasResult) {
1986 Value *V;
1987 if (UseSizedLibcall)
1988 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1989 else {
1990 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1991 AllocaAlignment);
1992 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
1993 }
1994 I->replaceAllUsesWith(V);
1995 }
1996 I->eraseFromParent();
1997 return true;
1998}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:651
void setWeak(bool IsWeak)
Definition: Instructions.h:608
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:599
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:588
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:606
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:603
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:626
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:664
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:877
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ FSub
*p = old - v
Definition: Instructions.h:788
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:796
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:792
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
Value * getPointerOperand()
Definition: Instructions.h:910
BinOp getOperation() const
Definition: Instructions.h:845
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:901
Value * getValOperand()
Definition: Instructions.h:914
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:567
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1841
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:539
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1807
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:481
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:233
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2205
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:317
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:496
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2132
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:76
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:230
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:245
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:233
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:255
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
Metadata node.
Definition: Metadata.h:1067
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:364
void setAlignment(Align Align)
Definition: Instructions.h:373
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397