llvm.org GIT mirror llvm / 09da6b5
Atomics: promote ARM's IR-based atomics pass to CodeGen. Still only 32-bit ARM using it at this stage, but the promotion allows direct testing via opt and is a reasonably self-contained patch on the way to switching ARM64. At this point, other targets should be able to make use of it without too much difficulty if they want. (See ARM64 commit coming soon for an example). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206485 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 6 years ago
16 changed file(s) with 1011 addition(s) and 411 deletion(s). Raw diff Collapse all Expand all
348348
349349 /// List of target independent CodeGen pass IDs.
350350 namespace llvm {
351 FunctionPass *createAtomicExpandLoadLinkedPass(const TargetMachine *TM);
352
351353 /// \brief Create a basic TargetTransformInfo analysis pass.
352354 ///
353355 /// This pass implements the target transform info analysis using the target
373375 /// matching during instruction selection.
374376 FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr);
375377
378 /// AtomicExpandLoadLinkedID -- FIXME
379 extern char &AtomicExpandLoadLinkedID;
380
376381 /// MachineLoopInfo - This pass is a loop analysis pass.
377382 extern char &MachineLoopInfoID;
378383
7070 void initializeAliasSetPrinterPass(PassRegistry&);
7171 void initializeAlwaysInlinerPass(PassRegistry&);
7272 void initializeArgPromotionPass(PassRegistry&);
73 void initializeAtomicExpandLoadLinkedPass(PassRegistry&);
7374 void initializeSampleProfileLoaderPass(PassRegistry&);
7475 void initializeBarrierNoopPass(PassRegistry&);
7576 void initializeBasicAliasAnalysisPass(PassRegistry&);
3030 #include "llvm/IR/CallSite.h"
3131 #include "llvm/IR/CallingConv.h"
3232 #include "llvm/IR/InlineAsm.h"
33 #include "llvm/IR/IRBuilder.h"
3334 #include "llvm/MC/MCRegisterInfo.h"
3435 #include "llvm/Target/TargetCallingConv.h"
3536 #include "llvm/Target/TargetMachine.h"
896897 /// @}
897898
898899 //===--------------------------------------------------------------------===//
900 /// \name Helpers for load-linked/store-conditional atomic expansion.
901 /// @{
902
903 /// Perform a load-linked operation on Addr, returning a "Value *" with the
904 /// corresponding pointee type. This may entail some non-trivial operations to
905 /// truncate or reconstruct types that will be illegal in the backend. See
906 /// ARMISelLowering for an example implementation.
907 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
908 AtomicOrdering Ord) const {
909 llvm_unreachable("Load linked unimplemented on this target");
910 }
911
912 /// Perform a store-conditional operation to Addr. Return the status of the
913 /// store. This should be 0 if the store succeeded, non-zero otherwise.
914 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
915 Value *Addr, AtomicOrdering Ord) const {
916 llvm_unreachable("Store conditional unimplemented on this target");
917 }
918
919 /// Return true if the given (atomic) instruction should be expanded by the
920 /// IR-level AtomicExpandLoadLinked pass into a loop involving
921 /// load-linked/store-conditional pairs. Atomic stores will be expanded in the
922 /// same way as "atomic xchg" operations which ignore their output if needed.
923 virtual bool shouldExpandAtomicInIR(Instruction *Inst) const {
924 return false;
925 }
926
927
928 //===--------------------------------------------------------------------===//
899929 // TargetLowering Configuration Methods - These methods should be invoked by
900930 // the derived class constructor to configure this object for the target.
901931 //
0 //===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // appropriate (intrinsic-based) ldrex/strex loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "arm-atomic-expand"
15 #include "llvm/CodeGen/Passes.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instructions.h"
19 #include "llvm/IR/Intrinsics.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Target/TargetLowering.h"
23 #include "llvm/Target/TargetMachine.h"
24 using namespace llvm;
25
26 namespace {
27 class AtomicExpandLoadLinked : public FunctionPass {
28 const TargetLowering *TLI;
29 public:
30 static char ID; // Pass identification, replacement for typeid
31 explicit AtomicExpandLoadLinked(const TargetMachine *TM = 0)
32 : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : 0) {
33 initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
34 }
35
36 bool runOnFunction(Function &F) override;
37 bool expandAtomicInsts(Function &F);
38
39 bool expandAtomicLoad(LoadInst *LI);
40 bool expandAtomicStore(StoreInst *LI);
41 bool expandAtomicRMW(AtomicRMWInst *AI);
42 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
43
44 AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
45 void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
46 };
47 }
48
49 char AtomicExpandLoadLinked::ID = 0;
50 char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
51
52 static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
53 PassInfo *PI = new PassInfo(
54 "Expand Atomic calls in terms of load-linked & store-conditional",
55 "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
56 PassInfo::NormalCtor_t(callDefaultCtor), false,
57 false, PassInfo::TargetMachineCtor_t(
58 callTargetMachineCtor));
59 Registry.registerPass(*PI, true);
60 return PI;
61 }
62
63 void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
64 CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
65 }
66
67
68 FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
69 return new AtomicExpandLoadLinked(TM);
70 }
71
72 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
73 if (!TLI)
74 return false;
75
76 SmallVector AtomicInsts;
77
78 // Changing control-flow while iterating through it is a bad idea, so gather a
79 // list of all atomic instructions before we start.
80 for (BasicBlock &BB : F)
81 for (Instruction &Inst : BB) {
82 if (isa(&Inst) || isa(&Inst) ||
83 (isa(&Inst) && cast(&Inst)->isAtomic()) ||
84 (isa(&Inst) && cast(&Inst)->isAtomic()))
85 AtomicInsts.push_back(&Inst);
86 }
87
88 bool MadeChange = false;
89 for (Instruction *Inst : AtomicInsts) {
90 if (!TLI->shouldExpandAtomicInIR(Inst))
91 continue;
92
93 if (AtomicRMWInst *AI = dyn_cast(Inst))
94 MadeChange |= expandAtomicRMW(AI);
95 else if (AtomicCmpXchgInst *CI = dyn_cast(Inst))
96 MadeChange |= expandAtomicCmpXchg(CI);
97 else if (LoadInst *LI = dyn_cast(Inst))
98 MadeChange |= expandAtomicLoad(LI);
99 else if (StoreInst *SI = dyn_cast(Inst))
100 MadeChange |= expandAtomicStore(SI);
101 else
102 llvm_unreachable("Unknown atomic instruction");
103 }
104
105 return MadeChange;
106 }
107
108 bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
109 // Load instructions don't actually need a leading fence, even in the
110 // SequentiallyConsistent case.
111 AtomicOrdering MemOpOrder =
112 TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
113
114 // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
115 // an ldrexd (A3.5.3).
116 IRBuilder<> Builder(LI);
117 Value *Val =
118 TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
119
120 insertTrailingFence(Builder, LI->getOrdering());
121
122 LI->replaceAllUsesWith(Val);
123 LI->eraseFromParent();
124
125 return true;
126 }
127
128 bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
129 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
130 // we need a loop and the entire instruction is essentially an "atomicrmw
131 // xchg" that ignores the value loaded.
132 IRBuilder<> Builder(SI);
133 AtomicRMWInst *AI =
134 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
135 SI->getValueOperand(), SI->getOrdering());
136 SI->eraseFromParent();
137
138 // Now we have an appropriate swap instruction, lower it as usual.
139 return expandAtomicRMW(AI);
140 }
141
142 bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
143 AtomicOrdering Order = AI->getOrdering();
144 Value *Addr = AI->getPointerOperand();
145 BasicBlock *BB = AI->getParent();
146 Function *F = BB->getParent();
147 LLVMContext &Ctx = F->getContext();
148
149 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
150 //
151 // The standard expansion we produce is:
152 // [...]
153 // fence?
154 // atomicrmw.start:
155 // %loaded = @load.linked(%addr)
156 // %new = some_op iN %loaded, %incr
157 // %stored = @store_conditional(%new, %addr)
158 // %try_again = icmp i32 ne %stored, 0
159 // br i1 %try_again, label %loop, label %atomicrmw.end
160 // atomicrmw.end:
161 // fence?
162 // [...]
163 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
164 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
165
166 // This grabs the DebugLoc from AI.
167 IRBuilder<> Builder(AI);
168
169 // The split call above "helpfully" added a branch at the end of BB (to the
170 // wrong place), but we might want a fence too. It's easiest to just remove
171 // the branch entirely.
172 std::prev(BB->end())->eraseFromParent();
173 Builder.SetInsertPoint(BB);
174 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
175 Builder.CreateBr(LoopBB);
176
177 // Start the main loop block now that we've taken care of the preliminaries.
178 Builder.SetInsertPoint(LoopBB);
179 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
180
181 Value *NewVal;
182 switch (AI->getOperation()) {
183 case AtomicRMWInst::Xchg:
184 NewVal = AI->getValOperand();
185 break;
186 case AtomicRMWInst::Add:
187 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
188 break;
189 case AtomicRMWInst::Sub:
190 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
191 break;
192 case AtomicRMWInst::And:
193 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
194 break;
195 case AtomicRMWInst::Nand:
196 NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
197 "new");
198 break;
199 case AtomicRMWInst::Or:
200 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
201 break;
202 case AtomicRMWInst::Xor:
203 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
204 break;
205 case AtomicRMWInst::Max:
206 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
207 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
208 break;
209 case AtomicRMWInst::Min:
210 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
211 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
212 break;
213 case AtomicRMWInst::UMax:
214 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
215 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
216 break;
217 case AtomicRMWInst::UMin:
218 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
219 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
220 break;
221 default:
222 llvm_unreachable("Unknown atomic op");
223 }
224
225 Value *StoreSuccess =
226 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
227 Value *TryAgain = Builder.CreateICmpNE(
228 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
229 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
230
231 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
232 insertTrailingFence(Builder, Order);
233
234 AI->replaceAllUsesWith(Loaded);
235 AI->eraseFromParent();
236
237 return true;
238 }
239
240 bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
241 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
242 AtomicOrdering FailureOrder = CI->getFailureOrdering();
243 Value *Addr = CI->getPointerOperand();
244 BasicBlock *BB = CI->getParent();
245 Function *F = BB->getParent();
246 LLVMContext &Ctx = F->getContext();
247
248 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
249 //
250 // The full expansion we produce is:
251 // [...]
252 // fence?
253 // cmpxchg.start:
254 // %loaded = @load.linked(%addr)
255 // %should_store = icmp eq %loaded, %desired
256 // br i1 %should_store, label %cmpxchg.trystore,
257 // label %cmpxchg.end/%cmpxchg.barrier
258 // cmpxchg.trystore:
259 // %stored = @store_conditional(%new, %addr)
260 // %try_again = icmp i32 ne %stored, 0
261 // br i1 %try_again, label %loop, label %cmpxchg.end
262 // cmpxchg.barrier:
263 // fence?
264 // br label %cmpxchg.end
265 // cmpxchg.end:
266 // [...]
267 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
268 auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
269 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
270 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
271
272 // This grabs the DebugLoc from CI
273 IRBuilder<> Builder(CI);
274
275 // The split call above "helpfully" added a branch at the end of BB (to the
276 // wrong place), but we might want a fence too. It's easiest to just remove
277 // the branch entirely.
278 std::prev(BB->end())->eraseFromParent();
279 Builder.SetInsertPoint(BB);
280 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
281 Builder.CreateBr(LoopBB);
282
283 // Start the main loop block now that we've taken care of the preliminaries.
284 Builder.SetInsertPoint(LoopBB);
285 Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
286 Value *ShouldStore =
287 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
288
289 // If the the cmpxchg doesn't actually need any ordering when it fails, we can
290 // jump straight past that fence instruction (if it exists).
291 BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
292 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
293
294 Builder.SetInsertPoint(TryStoreBB);
295 Value *StoreSuccess = TLI->emitStoreConditional(
296 Builder, CI->getNewValOperand(), Addr, MemOpOrder);
297 Value *TryAgain = Builder.CreateICmpNE(
298 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
299 Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
300
301 // Finally, make sure later instructions don't get reordered with a fence if
302 // necessary.
303 Builder.SetInsertPoint(BarrierBB);
304 insertTrailingFence(Builder, SuccessOrder);
305 Builder.CreateBr(ExitBB);
306
307 CI->replaceAllUsesWith(Loaded);
308 CI->eraseFromParent();
309
310 return true;
311 }
312
313 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
314 AtomicOrdering Ord) {
315 if (!TLI->getInsertFencesForAtomic())
316 return Ord;
317
318 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
319 Builder.CreateFence(Release);
320
321 // The exclusive operations don't need any barrier if we're adding separate
322 // fences.
323 return Monotonic;
324 }
325
326 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
327 AtomicOrdering Ord) {
328 if (!TLI->getInsertFencesForAtomic())
329 return;
330
331 if (Ord == Acquire || Ord == AcquireRelease)
332 Builder.CreateFence(Acquire);
333 else if (Ord == SequentiallyConsistent)
334 Builder.CreateFence(SequentiallyConsistent);
335 }
11 AggressiveAntiDepBreaker.cpp
22 AllocationOrder.cpp
33 Analysis.cpp
4 AtomicExpandLoadLinkedPass.cpp
45 BasicTargetTransformInfo.cpp
56 BranchFolding.cpp
67 CalcSpillWeights.cpp
1919
2020 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
2121 void llvm::initializeCodeGen(PassRegistry &Registry) {
22 initializeAtomicExpandLoadLinkedPass(Registry);
2223 initializeBasicTTIPass(Registry);
2324 initializeBranchFolderPassPass(Registry);
2425 initializeCodeGenPreparePass(Registry);
4848 /// \brief Creates an ARM-specific Target Transformation Info pass.
4949 ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
5050
51 FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM);
52
5351 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
5452 ARMAsmPrinter &AP);
5553
+0
-406
lib/Target/ARM/ARMAtomicExpandPass.cpp less more
None //===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // appropriate (intrinsic-based) ldrex/strex loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #define DEBUG_TYPE "arm-atomic-expand"
15 #include "ARM.h"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetLowering.h"
24 #include "llvm/Target/TargetMachine.h"
25 using namespace llvm;
26
27 namespace {
28 class ARMAtomicExpandPass : public FunctionPass {
29 const TargetLowering *TLI;
30 public:
31 static char ID; // Pass identification, replacement for typeid
32 explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
33 : FunctionPass(ID), TLI(TM->getTargetLowering()) {}
34
35 bool runOnFunction(Function &F) override;
36 bool expandAtomicInsts(Function &F);
37
38 bool expandAtomicLoad(LoadInst *LI);
39 bool expandAtomicStore(StoreInst *LI);
40 bool expandAtomicRMW(AtomicRMWInst *AI);
41 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
42
43 AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
44 void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
45
46 /// Perform a load-linked operation on Addr, returning a "Value *" with the
47 /// corresponding pointee type. This may entail some non-trivial operations
48 /// to truncate or reconstruct illegal types since intrinsics must be legal
49 Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);
50
51 /// Perform a store-conditional operation to Addr. Return the status of the
52 /// store: 0 if the it succeeded, non-zero otherwise.
53 Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
54 AtomicOrdering Ord);
55
56 /// Return true if the given (atomic) instruction should be expanded by this
57 /// pass.
58 bool shouldExpandAtomic(Instruction *Inst);
59 };
60 }
61
62 char ARMAtomicExpandPass::ID = 0;
63
64 FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
65 return new ARMAtomicExpandPass(TM);
66 }
67
68 bool ARMAtomicExpandPass::runOnFunction(Function &F) {
69 SmallVector AtomicInsts;
70
71 // Changing control-flow while iterating through it is a bad idea, so gather a
72 // list of all atomic instructions before we start.
73 for (BasicBlock &BB : F)
74 for (Instruction &Inst : BB) {
75 if (isa(&Inst) || isa(&Inst) ||
76 (isa(&Inst) && cast(&Inst)->isAtomic()) ||
77 (isa(&Inst) && cast(&Inst)->isAtomic()))
78 AtomicInsts.push_back(&Inst);
79 }
80
81 bool MadeChange = false;
82 for (Instruction *Inst : AtomicInsts) {
83 if (!shouldExpandAtomic(Inst))
84 continue;
85
86 if (AtomicRMWInst *AI = dyn_cast(Inst))
87 MadeChange |= expandAtomicRMW(AI);
88 else if (AtomicCmpXchgInst *CI = dyn_cast(Inst))
89 MadeChange |= expandAtomicCmpXchg(CI);
90 else if (LoadInst *LI = dyn_cast(Inst))
91 MadeChange |= expandAtomicLoad(LI);
92 else if (StoreInst *SI = dyn_cast(Inst))
93 MadeChange |= expandAtomicStore(SI);
94 else
95 llvm_unreachable("Unknown atomic instruction");
96 }
97
98 return MadeChange;
99 }
100
101 bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
102 // Load instructions don't actually need a leading fence, even in the
103 // SequentiallyConsistent case.
104 AtomicOrdering MemOpOrder =
105 TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
106
107 // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
108 // an ldrexd (A3.5.3).
109 IRBuilder<> Builder(LI);
110 Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
111
112 insertTrailingFence(Builder, LI->getOrdering());
113
114 LI->replaceAllUsesWith(Val);
115 LI->eraseFromParent();
116
117 return true;
118 }
119
120 bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
121 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
122 // we need a loop and the entire instruction is essentially an "atomicrmw
123 // xchg" that ignores the value loaded.
124 IRBuilder<> Builder(SI);
125 AtomicRMWInst *AI =
126 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
127 SI->getValueOperand(), SI->getOrdering());
128 SI->eraseFromParent();
129
130 // Now we have an appropriate swap instruction, lower it as usual.
131 return expandAtomicRMW(AI);
132 }
133
134 bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
135 AtomicOrdering Order = AI->getOrdering();
136 Value *Addr = AI->getPointerOperand();
137 BasicBlock *BB = AI->getParent();
138 Function *F = BB->getParent();
139 LLVMContext &Ctx = F->getContext();
140
141 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
142 //
143 // The standard expansion we produce is:
144 // [...]
145 // fence?
146 // atomicrmw.start:
147 // %loaded = @load.linked(%addr)
148 // %new = some_op iN %loaded, %incr
149 // %stored = @store_conditional(%new, %addr)
150 // %try_again = icmp i32 ne %stored, 0
151 // br i1 %try_again, label %loop, label %atomicrmw.end
152 // atomicrmw.end:
153 // fence?
154 // [...]
155 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
156 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
157
158 // This grabs the DebugLoc from AI.
159 IRBuilder<> Builder(AI);
160
161 // The split call above "helpfully" added a branch at the end of BB (to the
162 // wrong place), but we might want a fence too. It's easiest to just remove
163 // the branch entirely.
164 std::prev(BB->end())->eraseFromParent();
165 Builder.SetInsertPoint(BB);
166 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
167 Builder.CreateBr(LoopBB);
168
169 // Start the main loop block now that we've taken care of the preliminaries.
170 Builder.SetInsertPoint(LoopBB);
171 Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
172
173 Value *NewVal;
174 switch (AI->getOperation()) {
175 case AtomicRMWInst::Xchg:
176 NewVal = AI->getValOperand();
177 break;
178 case AtomicRMWInst::Add:
179 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
180 break;
181 case AtomicRMWInst::Sub:
182 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
183 break;
184 case AtomicRMWInst::And:
185 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
186 break;
187 case AtomicRMWInst::Nand:
188 NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
189 "new");
190 break;
191 case AtomicRMWInst::Or:
192 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
193 break;
194 case AtomicRMWInst::Xor:
195 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
196 break;
197 case AtomicRMWInst::Max:
198 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
199 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
200 break;
201 case AtomicRMWInst::Min:
202 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
203 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
204 break;
205 case AtomicRMWInst::UMax:
206 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
207 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
208 break;
209 case AtomicRMWInst::UMin:
210 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
211 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
212 break;
213 default:
214 llvm_unreachable("Unknown atomic op");
215 }
216
217 Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
218 Value *TryAgain = Builder.CreateICmpNE(
219 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
220 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
221
222 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
223 insertTrailingFence(Builder, Order);
224
225 AI->replaceAllUsesWith(Loaded);
226 AI->eraseFromParent();
227
228 return true;
229 }
230
231 bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
232 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
233 AtomicOrdering FailureOrder = CI->getFailureOrdering();
234 Value *Addr = CI->getPointerOperand();
235 BasicBlock *BB = CI->getParent();
236 Function *F = BB->getParent();
237 LLVMContext &Ctx = F->getContext();
238
239 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
240 //
241 // The full expansion we produce is:
242 // [...]
243 // fence?
244 // cmpxchg.start:
245 // %loaded = @load.linked(%addr)
246 // %should_store = icmp eq %loaded, %desired
247 // br i1 %should_store, label %cmpxchg.trystore,
248 // label %cmpxchg.end/%cmpxchg.barrier
249 // cmpxchg.trystore:
250 // %stored = @store_conditional(%new, %addr)
251 // %try_again = icmp i32 ne %stored, 0
252 // br i1 %try_again, label %loop, label %cmpxchg.end
253 // cmpxchg.barrier:
254 // fence?
255 // br label %cmpxchg.end
256 // cmpxchg.end:
257 // [...]
258 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
259 auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
260 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB);
261 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
262
263 // This grabs the DebugLoc from CI
264 IRBuilder<> Builder(CI);
265
266 // The split call above "helpfully" added a branch at the end of BB (to the
267 // wrong place), but we might want a fence too. It's easiest to just remove
268 // the branch entirely.
269 std::prev(BB->end())->eraseFromParent();
270 Builder.SetInsertPoint(BB);
271 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
272 Builder.CreateBr(LoopBB);
273
274 // Start the main loop block now that we've taken care of the preliminaries.
275 Builder.SetInsertPoint(LoopBB);
276 Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
277 Value *ShouldStore =
278 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
279
280 // If the the cmpxchg doesn't actually need any ordering when it fails, we can
281 // jump straight past that fence instruction (if it exists).
282 BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
283 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
284
285 Builder.SetInsertPoint(TryStoreBB);
286 Value *StoreSuccess =
287 storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
288 Value *TryAgain = Builder.CreateICmpNE(
289 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
290 Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
291
292 // Finally, make sure later instructions don't get reordered with a fence if
293 // necessary.
294 Builder.SetInsertPoint(BarrierBB);
295 insertTrailingFence(Builder, SuccessOrder);
296 Builder.CreateBr(ExitBB);
297
298 CI->replaceAllUsesWith(Loaded);
299 CI->eraseFromParent();
300
301 return true;
302 }
303
304 Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
305 AtomicOrdering Ord) {
306 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
307 Type *ValTy = cast(Addr->getType())->getElementType();
308 bool IsAcquire =
309 Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
310
311 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
312 // intrinsic must return {i32, i32} and we have to recombine them into a
313 // single i64 here.
314 if (ValTy->getPrimitiveSizeInBits() == 64) {
315 Intrinsic::ID Int =
316 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
317 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
318
319 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
320 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
321
322 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
323 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
324 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
325 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
326 return Builder.CreateOr(
327 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
328 }
329
330 Type *Tys[] = { Addr->getType() };
331 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
332 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
333
334 return Builder.CreateTruncOrBitCast(
335 Builder.CreateCall(Ldrex, Addr),
336 cast(Addr->getType())->getElementType());
337 }
338
339 Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
340 Value *Addr, AtomicOrdering Ord) {
341 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
342 bool IsRelease =
343 Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
344
345 // Since the intrinsics must have legal type, the i64 intrinsics take two
346 // parameters: "i32, i32". We must marshal Val into the appropriate form
347 // before the call.
348 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
349 Intrinsic::ID Int =
350 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
351 Function *Strex = Intrinsic::getDeclaration(M, Int);
352 Type *Int32Ty = Type::getInt32Ty(M->getContext());
353
354 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
355 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
356 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
357 return Builder.CreateCall3(Strex, Lo, Hi, Addr);
358 }
359
360 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
361 Type *Tys[] = { Addr->getType() };
362 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
363
364 return Builder.CreateCall2(
365 Strex, Builder.CreateZExtOrBitCast(
366 Val, Strex->getFunctionType()->getParamType(0)),
367 Addr);
368 }
369
370 AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
371 AtomicOrdering Ord) {
372 if (!TLI->getInsertFencesForAtomic())
373 return Ord;
374
375 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
376 Builder.CreateFence(Release);
377
378 // The exclusive operations don't need any barrier if we're adding separate
379 // fences.
380 return Monotonic;
381 }
382
383 void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
384 AtomicOrdering Ord) {
385 if (!TLI->getInsertFencesForAtomic())
386 return;
387
388 if (Ord == Acquire || Ord == AcquireRelease)
389 Builder.CreateFence(Acquire);
390 else if (Ord == SequentiallyConsistent)
391 Builder.CreateFence(SequentiallyConsistent);
392 }
393
394 bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
395 // Loads and stores less than 64-bits are already atomic; ones above that
396 // are doomed anyway, so defer to the default libcall and blame the OS when
397 // things go wrong:
398 if (StoreInst *SI = dyn_cast(Inst))
399 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
400 else if (LoadInst *LI = dyn_cast(Inst))
401 return LI->getType()->getPrimitiveSizeInBits() == 64;
402
403 // For the real atomic operations, we have ldrex/strex up to 64 bits.
404 return Inst->getType()->getPrimitiveSizeInBits() <= 64;
405 }
3636 #include "llvm/IR/Constants.h"
3737 #include "llvm/IR/Function.h"
3838 #include "llvm/IR/GlobalValue.h"
39 #include "llvm/IR/IRBuilder.h"
3940 #include "llvm/IR/Instruction.h"
4041 #include "llvm/IR/Instructions.h"
4142 #include "llvm/IR/Intrinsics.h"
1049310494 return false;
1049410495 return true;
1049510496 }
10497
10498 bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
10499 // Loads and stores less than 64-bits are already atomic; ones above that
10500 // are doomed anyway, so defer to the default libcall and blame the OS when
10501 // things go wrong:
10502 if (StoreInst *SI = dyn_cast(Inst))
10503 return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
10504 else if (LoadInst *LI = dyn_cast(Inst))
10505 return LI->getType()->getPrimitiveSizeInBits() == 64;
10506
10507 // For the real atomic operations, we have ldrex/strex up to 64 bits.
10508 return Inst->getType()->getPrimitiveSizeInBits() <= 64;
10509 }
10510
10511 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
10512 AtomicOrdering Ord) const {
10513 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10514 Type *ValTy = cast(Addr->getType())->getElementType();
10515 bool IsAcquire =
10516 Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
10517
10518 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
10519 // intrinsic must return {i32, i32} and we have to recombine them into a
10520 // single i64 here.
10521 if (ValTy->getPrimitiveSizeInBits() == 64) {
10522 Intrinsic::ID Int =
10523 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
10524 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
10525
10526 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
10527 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
10528
10529 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
10530 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
10531 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
10532 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
10533 return Builder.CreateOr(
10534 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
10535 }
10536
10537 Type *Tys[] = { Addr->getType() };
10538 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
10539 Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
10540
10541 return Builder.CreateTruncOrBitCast(
10542 Builder.CreateCall(Ldrex, Addr),
10543 cast(Addr->getType())->getElementType());
10544 }
10545
10546 Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
10547 Value *Addr,
10548 AtomicOrdering Ord) const {
10549 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10550 bool IsRelease =
10551 Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
10552
10553 // Since the intrinsics must have legal type, the i64 intrinsics take two
10554 // parameters: "i32, i32". We must marshal Val into the appropriate form
10555 // before the call.
10556 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
10557 Intrinsic::ID Int =
10558 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
10559 Function *Strex = Intrinsic::getDeclaration(M, Int);
10560 Type *Int32Ty = Type::getInt32Ty(M->getContext());
10561
10562 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
10563 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
10564 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
10565 return Builder.CreateCall3(Strex, Lo, Hi, Addr);
10566 }
10567
10568 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
10569 Type *Tys[] = { Addr->getType() };
10570 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
10571
10572 return Builder.CreateCall2(
10573 Strex, Builder.CreateZExtOrBitCast(
10574 Val, Strex->getFunctionType()->getParamType(0)),
10575 Addr);
10576 }
382382 /// to just the constant itself.
383383 bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
384384 Type *Ty) const override;
385
386 Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
387 AtomicOrdering Ord) const override;
388 Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
389 Value *Addr, AtomicOrdering Ord) const override;
390
391 bool shouldExpandAtomicInIR(Instruction *Inst) const override;
385392
386393 protected:
387394 std::pair
227227 bool ARMPassConfig::addPreISel() {
228228 const ARMSubtarget *Subtarget = &getARMSubtarget();
229229 if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
230 addPass(createARMAtomicExpandPass(TM));
230 addPass(createAtomicExpandLoadLinkedPass(TM));
231231
232232 if (TM->getOptLevel() != CodeGenOpt::None)
233233 addPass(createGlobalMergePass(TM));
1616 add_llvm_target(ARMCodeGen
1717 A15SDOptimizer.cpp
1818 ARMAsmPrinter.cpp
19 ARMAtomicExpandPass.cpp
2019 ARMBaseInstrInfo.cpp
2120 ARMBaseRegisterInfo.cpp
2221 ARMCodeEmitter.cpp
0 ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK: fence release
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK: fence seq_cst
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK: fence acquire
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
57 ; CHECK-LABEL: @test_atomic_and_i8
58 ; CHECK: fence release
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
62 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
63 ; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
64 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
65 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
66 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
67 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
68 ; CHECK: [[END]]:
69 ; CHECK-NOT: fence
70 ; CHECK: ret i8 [[OLDVAL]]
71 %res = atomicrmw and i8* %ptr, i8 %andend release
72 ret i8 %res
73 }
74
75 define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
76 ; CHECK-LABEL: @test_atomic_nand_i16
77 ; CHECK: fence release
78 ; CHECK: br label %[[LOOP:.*]]
79 ; CHECK: [[LOOP]]:
80 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
81 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
82 ; CHECK: [[NEWVAL_TMP:%.*]] = xor i16 %nandend, -1
83 ; CHECK: [[NEWVAL:%.*]] = and i16 [[OLDVAL]], [[NEWVAL_TMP]]
84 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
85 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
86 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
87 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
88 ; CHECK: [[END]]:
89 ; CHECK: fence seq_cst
90 ; CHECK: ret i16 [[OLDVAL]]
91 %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
92 ret i16 %res
93 }
94
95 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
96 ; CHECK-LABEL: @test_atomic_or_i64
97 ; CHECK: fence release
98 ; CHECK: br label %[[LOOP:.*]]
99 ; CHECK: [[LOOP]]:
100 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
101 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
102 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
103 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
104 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
105 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
106 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
107 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
108 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
109 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
110 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
111 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
112 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
113 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
114 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
115 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
116 ; CHECK: [[END]]:
117 ; CHECK: fence seq_cst
118 ; CHECK: ret i64 [[OLDVAL]]
119 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
120 ret i64 %res
121 }
122
123 define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
124 ; CHECK-LABEL: @test_atomic_xor_i8
125 ; CHECK: fence release
126 ; CHECK: br label %[[LOOP:.*]]
127 ; CHECK: [[LOOP]]:
128 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
129 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
130 ; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
131 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
132 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
133 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
134 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
135 ; CHECK: [[END]]:
136 ; CHECK: fence seq_cst
137 ; CHECK: ret i8 [[OLDVAL]]
138 %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
139 ret i8 %res
140 }
141
142 define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
143 ; CHECK-LABEL: @test_atomic_max_i8
144 ; CHECK: fence release
145 ; CHECK: br label %[[LOOP:.*]]
146 ; CHECK: [[LOOP]]:
147 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
148 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
149 ; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
150 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
151 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
152 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
153 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
154 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
155 ; CHECK: [[END]]:
156 ; CHECK: fence seq_cst
157 ; CHECK: ret i8 [[OLDVAL]]
158 %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
159 ret i8 %res
160 }
161
162 define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
163 ; CHECK-LABEL: @test_atomic_min_i8
164 ; CHECK: fence release
165 ; CHECK: br label %[[LOOP:.*]]
166 ; CHECK: [[LOOP]]:
167 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
168 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
169 ; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
170 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
171 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
172 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
173 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
174 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
175 ; CHECK: [[END]]:
176 ; CHECK: fence seq_cst
177 ; CHECK: ret i8 [[OLDVAL]]
178 %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
179 ret i8 %res
180 }
181
182 define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
183 ; CHECK-LABEL: @test_atomic_umax_i8
184 ; CHECK: fence release
185 ; CHECK: br label %[[LOOP:.*]]
186 ; CHECK: [[LOOP]]:
187 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
188 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
189 ; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
190 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
191 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
192 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
193 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
194 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
195 ; CHECK: [[END]]:
196 ; CHECK: fence seq_cst
197 ; CHECK: ret i8 [[OLDVAL]]
198 %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
199 ret i8 %res
200 }
201
202 define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
203 ; CHECK-LABEL: @test_atomic_umin_i8
204 ; CHECK: fence release
205 ; CHECK: br label %[[LOOP:.*]]
206 ; CHECK: [[LOOP]]:
207 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
208 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
209 ; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
210 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
211 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
212 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
213 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
214 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
215 ; CHECK: [[END]]:
216 ; CHECK: fence seq_cst
217 ; CHECK: ret i8 [[OLDVAL]]
218 %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
219 ret i8 %res
220 }
221
222 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
223 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
224 ; CHECK: fence release
225 ; CHECK: br label %[[LOOP:.*]]
226
227 ; CHECK: [[LOOP]]:
228 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
229 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
230 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
231 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
232
233 ; CHECK: [[TRY_STORE]]:
234 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
235 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
236 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
237 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
238
239 ; CHECK: [[BARRIER]]:
240 ; CHECK: fence seq_cst
241 ; CHECK: br label %[[DONE:.*]]
242
243 ; CHECK: [[DONE]]:
244 ; CHECK: ret i8 [[OLDVAL]]
245
246 %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
247 ret i8 %old
248 }
249
250 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
251 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
252 ; CHECK: fence release
253 ; CHECK: br label %[[LOOP:.*]]
254
255 ; CHECK: [[LOOP]]:
256 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
257 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
258 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
259 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
260
261 ; CHECK: [[TRY_STORE]]:
262 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
263 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
264 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
265 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
266
267 ; CHECK: [[BARRIER]]:
268 ; CHECK: fence seq_cst
269 ; CHECK: br label %[[DONE:.*]]
270
271 ; CHECK: [[DONE]]:
272 ; CHECK: ret i16 [[OLDVAL]]
273
274 %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
275 ret i16 %old
276 }
277
278 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
279 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
280 ; CHECK-NOT: fence
281 ; CHECK: br label %[[LOOP:.*]]
282
283 ; CHECK: [[LOOP]]:
284 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
285 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
286 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
287
288 ; CHECK: [[TRY_STORE]]:
289 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
290 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
291 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
292
293 ; CHECK: [[BARRIER]]:
294 ; CHECK: fence acquire
295 ; CHECK: br label %[[DONE:.*]]
296
297 ; CHECK: [[DONE]]:
298 ; CHECK: ret i32 [[OLDVAL]]
299
300 %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
301 ret i32 %old
302 }
303
304 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
305 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
306 ; CHECK-NOT: fence
307 ; CHECK: br label %[[LOOP:.*]]
308
309 ; CHECK: [[LOOP]]:
310 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
311 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
312 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
313 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
314 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
315 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
316 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
317 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
318 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
319 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
320
321 ; CHECK: [[TRY_STORE]]:
322 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
323 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
324 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
325 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
326 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
327 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
328 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
329
330 ; CHECK: [[BARRIER]]:
331 ; CHECK-NOT: fence
332 ; CHECK: br label %[[DONE:.*]]
333
334 ; CHECK: [[DONE]]:
335 ; CHECK: ret i64 [[OLDVAL]]
336
337 %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
338 ret i64 %old
339 }
0 ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK-NOT: fence
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK-NOT: fence
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK-NOT: fence
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
57 ; CHECK-LABEL: @test_atomic_or_i64
58 ; CHECK-NOT: fence
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
62 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
63 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
64 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
65 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
66 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
67 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
68 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
69 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
70 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
71 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
72 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
73 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
74 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
75 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
76 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
77 ; CHECK: [[END]]:
78 ; CHECK-NOT: fence
79 ; CHECK: ret i64 [[OLDVAL]]
80 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
81 ret i64 %res
82 }
83
84 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
85 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
86 ; CHECK-NOT: fence
87 ; CHECK: br label %[[LOOP:.*]]
88
89 ; CHECK: [[LOOP]]:
90 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
91 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
92 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
93 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[BARRIER:.*]]
94
95 ; CHECK: [[TRY_STORE]]:
96 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
97 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
98 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
99 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
100
101 ; CHECK: [[BARRIER]]:
102 ; CHECK-NOT: fence
103 ; CHECK: br label %[[DONE:.*]]
104
105 ; CHECK: [[DONE]]:
106 ; CHECK: ret i8 [[OLDVAL]]
107
108 %old = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
109 ret i8 %old
110 }
111
112 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
113 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
114 ; CHECK-NOT: fence
115 ; CHECK: br label %[[LOOP:.*]]
116
117 ; CHECK: [[LOOP]]:
118 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
119 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
120 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
121 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
122
123 ; CHECK: [[TRY_STORE]]:
124 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
125 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
126 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
127 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
128
129 ; CHECK: [[BARRIER]]:
130 ; CHECK-NOT: fence
131 ; CHECK: br label %[[DONE:.*]]
132
133 ; CHECK: [[DONE]]:
134 ; CHECK: ret i16 [[OLDVAL]]
135
136 %old = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
137 ret i16 %old
138 }
139
140 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
141 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
142 ; CHECK-NOT: fence
143 ; CHECK: br label %[[LOOP:.*]]
144
145 ; CHECK: [[LOOP]]:
146 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
147 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
148 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
149
150 ; CHECK: [[TRY_STORE]]:
151 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
152 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
153 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
154
155 ; CHECK: [[BARRIER]]:
156 ; CHECK-NOT: fence
157 ; CHECK: br label %[[DONE:.*]]
158
159 ; CHECK: [[DONE]]:
160 ; CHECK: ret i32 [[OLDVAL]]
161
162 %old = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
163 ret i32 %old
164 }
165
166 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
167 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
168 ; CHECK-NOT: fence
169 ; CHECK: br label %[[LOOP:.*]]
170
171 ; CHECK: [[LOOP]]:
172 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
173 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
174 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
175 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
176 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
177 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
178 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
179 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
180 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
181 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[DONE:.*]]
182
183 ; CHECK: [[TRY_STORE]]:
184 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
185 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
186 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
187 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
188 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
189 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
190 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[BARRIER:.*]]
191
192 ; CHECK: [[BARRIER]]:
193 ; CHECK-NOT: fence
194 ; CHECK: br label %[[DONE:.*]]
195
196 ; CHECK: [[DONE]]:
197 ; CHECK: ret i64 [[OLDVAL]]
198
199 %old = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
200 ret i64 %old
201 }
0 targets = set(config.root.targets_to_build.split())
1 if not 'ARM' in targets:
2 config.unsupported = True
3
350350 initializeInstrumentation(Registry);
351351 initializeTarget(Registry);
352352 // For codegen passes, only passes that do IR to IR transformation are
353 // supported. For now, just add CodeGenPrepare.
353 // supported.
354354 initializeCodeGenPreparePass(Registry);
355 initializeAtomicExpandLoadLinkedPass(Registry);
355356
356357 #ifdef LINK_POLLY_INTO_TOOLS
357358 polly::initializePollyPasses(Registry);