llvm.org GIT mirror llvm / cf165c3
Rename AtomicExpandLoadLinked into AtomicExpand AtomicExpandLoadLinked is currently rather ARM-specific. This patch is the first of a group that aim at making it more target-independent. See http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075873.html for details The command line option is "atomic-expand" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216231 91177308-0d34-0410-b5e6-96231b3b80d8 Robin Morisset 5 years ago
22 changed file(s) with 1091 addition(s) and 1090 deletion(s). Raw diff Collapse all Expand all
344344
345345 /// List of target independent CodeGen pass IDs.
346346 namespace llvm {
347 FunctionPass *createAtomicExpandLoadLinkedPass(const TargetMachine *TM);
347 FunctionPass *createAtomicExpandPass(const TargetMachine *TM);
348348
349349 /// \brief Create a basic TargetTransformInfo analysis pass.
350350 ///
371371 /// matching during instruction selection.
372372 FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr);
373373
374 /// AtomicExpandLoadLinkedID -- FIXME
375 extern char &AtomicExpandLoadLinkedID;
374 /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg
375 /// load-linked/store-conditional loops.
376 extern char &AtomicExpandID;
376377
377378 /// MachineLoopInfo - This pass is a loop analysis pass.
378379 extern char &MachineLoopInfoID;
7070 void initializeAliasSetPrinterPass(PassRegistry&);
7171 void initializeAlwaysInlinerPass(PassRegistry&);
7272 void initializeArgPromotionPass(PassRegistry&);
73 void initializeAtomicExpandLoadLinkedPass(PassRegistry&);
73 void initializeAtomicExpandPass(PassRegistry&);
7474 void initializeSampleProfileLoaderPass(PassRegistry&);
7575 void initializeBarrierNoopPass(PassRegistry&);
7676 void initializeBasicAliasAnalysisPass(PassRegistry&);
936936 /// @}
937937
938938 //===--------------------------------------------------------------------===//
939 /// \name Helpers for load-linked/store-conditional atomic expansion.
939 /// \name Helpers for atomic expansion.
940940 /// @{
941941
942942 /// Perform a load-linked operation on Addr, returning a "Value *" with the
956956 }
957957
958958 /// Return true if the given (atomic) instruction should be expanded by the
959 /// IR-level AtomicExpandLoadLinked pass into a loop involving
959 /// IR-level AtomicExpand pass into a loop involving
960960 /// load-linked/store-conditional pairs. Atomic stores will be expanded in the
961961 /// same way as "atomic xchg" operations which ignore their output if needed.
962962 virtual bool shouldExpandAtomicInIR(Instruction *Inst) const {
117117 virtual bool enablePostMachineScheduler() const;
118118
119119 /// \brief True if the subtarget should run the atomic expansion pass.
120 virtual bool enableAtomicExpandLoadLinked() const;
120 virtual bool enableAtomicExpand() const;
121121
122122 /// \brief Override generic scheduling policy within a region.
123123 ///
+0
-384
lib/CodeGen/AtomicExpandLoadLinkedPass.cpp less more
None //===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // appropriate (intrinsic-based) ldrex/strex loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/CodeGen/Passes.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/IRBuilder.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Target/TargetLowering.h"
22 #include "llvm/Target/TargetMachine.h"
23 #include "llvm/Target/TargetSubtargetInfo.h"
24
25 using namespace llvm;
26
27 #define DEBUG_TYPE "arm-atomic-expand"
28
29 namespace {
30 class AtomicExpandLoadLinked : public FunctionPass {
31 const TargetMachine *TM;
32 public:
33 static char ID; // Pass identification, replacement for typeid
34 explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
35 : FunctionPass(ID), TM(TM) {
36 initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
37 }
38
39 bool runOnFunction(Function &F) override;
40 bool expandAtomicInsts(Function &F);
41
42 bool expandAtomicLoad(LoadInst *LI);
43 bool expandAtomicStore(StoreInst *LI);
44 bool expandAtomicRMW(AtomicRMWInst *AI);
45 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
46
47 AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
48 void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
49 };
50 }
51
52 char AtomicExpandLoadLinked::ID = 0;
53 char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
54 INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
55 "Expand Atomic calls in terms of load-linked & store-conditional",
56 false, false)
57
58 FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
59 return new AtomicExpandLoadLinked(TM);
60 }
61
62 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
63 if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
64 return false;
65
66 SmallVector AtomicInsts;
67
68 // Changing control-flow while iterating through it is a bad idea, so gather a
69 // list of all atomic instructions before we start.
70 for (BasicBlock &BB : F)
71 for (Instruction &Inst : BB) {
72 if (isa(&Inst) || isa(&Inst) ||
73 (isa(&Inst) && cast(&Inst)->isAtomic()) ||
74 (isa(&Inst) && cast(&Inst)->isAtomic()))
75 AtomicInsts.push_back(&Inst);
76 }
77
78 bool MadeChange = false;
79 for (Instruction *Inst : AtomicInsts) {
80 if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR(
81 Inst))
82 continue;
83
84 if (AtomicRMWInst *AI = dyn_cast(Inst))
85 MadeChange |= expandAtomicRMW(AI);
86 else if (AtomicCmpXchgInst *CI = dyn_cast(Inst))
87 MadeChange |= expandAtomicCmpXchg(CI);
88 else if (LoadInst *LI = dyn_cast(Inst))
89 MadeChange |= expandAtomicLoad(LI);
90 else if (StoreInst *SI = dyn_cast(Inst))
91 MadeChange |= expandAtomicStore(SI);
92 else
93 llvm_unreachable("Unknown atomic instruction");
94 }
95
96 return MadeChange;
97 }
98
99 bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
100 // Load instructions don't actually need a leading fence, even in the
101 // SequentiallyConsistent case.
102 AtomicOrdering MemOpOrder =
103 TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
104 ? Monotonic
105 : LI->getOrdering();
106
107 // The only 64-bit load guaranteed to be single-copy atomic by the ARM is
108 // an ldrexd (A3.5.3).
109 IRBuilder<> Builder(LI);
110 Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
111 Builder, LI->getPointerOperand(), MemOpOrder);
112
113 insertTrailingFence(Builder, LI->getOrdering());
114
115 LI->replaceAllUsesWith(Val);
116 LI->eraseFromParent();
117
118 return true;
119 }
120
121 bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
122 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
123 // we need a loop and the entire instruction is essentially an "atomicrmw
124 // xchg" that ignores the value loaded.
125 IRBuilder<> Builder(SI);
126 AtomicRMWInst *AI =
127 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
128 SI->getValueOperand(), SI->getOrdering());
129 SI->eraseFromParent();
130
131 // Now we have an appropriate swap instruction, lower it as usual.
132 return expandAtomicRMW(AI);
133 }
134
135 bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
136 AtomicOrdering Order = AI->getOrdering();
137 Value *Addr = AI->getPointerOperand();
138 BasicBlock *BB = AI->getParent();
139 Function *F = BB->getParent();
140 LLVMContext &Ctx = F->getContext();
141
142 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
143 //
144 // The standard expansion we produce is:
145 // [...]
146 // fence?
147 // atomicrmw.start:
148 // %loaded = @load.linked(%addr)
149 // %new = some_op iN %loaded, %incr
150 // %stored = @store_conditional(%new, %addr)
151 // %try_again = icmp i32 ne %stored, 0
152 // br i1 %try_again, label %loop, label %atomicrmw.end
153 // atomicrmw.end:
154 // fence?
155 // [...]
156 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
157 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
158
159 // This grabs the DebugLoc from AI.
160 IRBuilder<> Builder(AI);
161
162 // The split call above "helpfully" added a branch at the end of BB (to the
163 // wrong place), but we might want a fence too. It's easiest to just remove
164 // the branch entirely.
165 std::prev(BB->end())->eraseFromParent();
166 Builder.SetInsertPoint(BB);
167 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
168 Builder.CreateBr(LoopBB);
169
170 // Start the main loop block now that we've taken care of the preliminaries.
171 Builder.SetInsertPoint(LoopBB);
172 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
173 Builder, Addr, MemOpOrder);
174
175 Value *NewVal;
176 switch (AI->getOperation()) {
177 case AtomicRMWInst::Xchg:
178 NewVal = AI->getValOperand();
179 break;
180 case AtomicRMWInst::Add:
181 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
182 break;
183 case AtomicRMWInst::Sub:
184 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
185 break;
186 case AtomicRMWInst::And:
187 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
188 break;
189 case AtomicRMWInst::Nand:
190 NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
191 "new");
192 break;
193 case AtomicRMWInst::Or:
194 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
195 break;
196 case AtomicRMWInst::Xor:
197 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
198 break;
199 case AtomicRMWInst::Max:
200 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
201 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
202 break;
203 case AtomicRMWInst::Min:
204 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
205 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
206 break;
207 case AtomicRMWInst::UMax:
208 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
209 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
210 break;
211 case AtomicRMWInst::UMin:
212 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
213 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
214 break;
215 default:
216 llvm_unreachable("Unknown atomic op");
217 }
218
219 Value *StoreSuccess =
220 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
221 Builder, NewVal, Addr, MemOpOrder);
222 Value *TryAgain = Builder.CreateICmpNE(
223 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
224 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
225
226 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
227 insertTrailingFence(Builder, Order);
228
229 AI->replaceAllUsesWith(Loaded);
230 AI->eraseFromParent();
231
232 return true;
233 }
234
235 bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
236 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
237 AtomicOrdering FailureOrder = CI->getFailureOrdering();
238 Value *Addr = CI->getPointerOperand();
239 BasicBlock *BB = CI->getParent();
240 Function *F = BB->getParent();
241 LLVMContext &Ctx = F->getContext();
242
243 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
244 //
245 // The full expansion we produce is:
246 // [...]
247 // fence?
248 // cmpxchg.start:
249 // %loaded = @load.linked(%addr)
250 // %should_store = icmp eq %loaded, %desired
251 // br i1 %should_store, label %cmpxchg.trystore,
252 // label %cmpxchg.failure
253 // cmpxchg.trystore:
254 // %stored = @store_conditional(%new, %addr)
255 // %success = icmp eq i32 %stored, 0
256 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
257 // cmpxchg.success:
258 // fence?
259 // br label %cmpxchg.end
260 // cmpxchg.failure:
261 // fence?
262 // br label %cmpxchg.end
263 // cmpxchg.end:
264 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
265 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
266 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
267 // [...]
268 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
269 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
270 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
271 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
272 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
273
274 // This grabs the DebugLoc from CI
275 IRBuilder<> Builder(CI);
276
277 // The split call above "helpfully" added a branch at the end of BB (to the
278 // wrong place), but we might want a fence too. It's easiest to just remove
279 // the branch entirely.
280 std::prev(BB->end())->eraseFromParent();
281 Builder.SetInsertPoint(BB);
282 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
283 Builder.CreateBr(LoopBB);
284
285 // Start the main loop block now that we've taken care of the preliminaries.
286 Builder.SetInsertPoint(LoopBB);
287 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
288 Builder, Addr, MemOpOrder);
289 Value *ShouldStore =
290 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
291
292 // If the the cmpxchg doesn't actually need any ordering when it fails, we can
293 // jump straight past that fence instruction (if it exists).
294 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
295
296 Builder.SetInsertPoint(TryStoreBB);
297 Value *StoreSuccess =
298 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
299 Builder, CI->getNewValOperand(), Addr, MemOpOrder);
300 StoreSuccess = Builder.CreateICmpEQ(
301 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
302 Builder.CreateCondBr(StoreSuccess, SuccessBB,
303 CI->isWeak() ? FailureBB : LoopBB);
304
305 // Make sure later instructions don't get reordered with a fence if necessary.
306 Builder.SetInsertPoint(SuccessBB);
307 insertTrailingFence(Builder, SuccessOrder);
308 Builder.CreateBr(ExitBB);
309
310 Builder.SetInsertPoint(FailureBB);
311 insertTrailingFence(Builder, FailureOrder);
312 Builder.CreateBr(ExitBB);
313
314 // Finally, we have control-flow based knowledge of whether the cmpxchg
315 // succeeded or not. We expose this to later passes by converting any
316 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
317
318 // Setup the builder so we can create any PHIs we need.
319 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
320 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
321 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
322 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
323
324 // Look for any users of the cmpxchg that are just comparing the loaded value
325 // against the desired one, and replace them with the CFG-derived version.
326 SmallVector PrunedInsts;
327 for (auto User : CI->users()) {
328 ExtractValueInst *EV = dyn_cast(User);
329 if (!EV)
330 continue;
331
332 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
333 "weird extraction from { iN, i1 }");
334
335 if (EV->getIndices()[0] == 0)
336 EV->replaceAllUsesWith(Loaded);
337 else
338 EV->replaceAllUsesWith(Success);
339
340 PrunedInsts.push_back(EV);
341 }
342
343 // We can remove the instructions now we're no longer iterating through them.
344 for (auto EV : PrunedInsts)
345 EV->eraseFromParent();
346
347 if (!CI->use_empty()) {
348 // Some use of the full struct return that we don't understand has happened,
349 // so we've got to reconstruct it properly.
350 Value *Res;
351 Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
352 Res = Builder.CreateInsertValue(Res, Success, 1);
353
354 CI->replaceAllUsesWith(Res);
355 }
356
357 CI->eraseFromParent();
358 return true;
359 }
360
361 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
362 AtomicOrdering Ord) {
363 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
364 return Ord;
365
366 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
367 Builder.CreateFence(Release);
368
369 // The exclusive operations don't need any barrier if we're adding separate
370 // fences.
371 return Monotonic;
372 }
373
374 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
375 AtomicOrdering Ord) {
376 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
377 return;
378
379 if (Ord == Acquire || Ord == AcquireRelease)
380 Builder.CreateFence(Acquire);
381 else if (Ord == SequentiallyConsistent)
382 Builder.CreateFence(SequentiallyConsistent);
383 }
0 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass (at IR level) to replace atomic instructions with
10 // appropriate (intrinsic-based) ldrex/strex loops.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/CodeGen/Passes.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/IRBuilder.h"
17 #include "llvm/IR/Instructions.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Target/TargetLowering.h"
22 #include "llvm/Target/TargetMachine.h"
23 #include "llvm/Target/TargetSubtargetInfo.h"
24
25 using namespace llvm;
26
27 #define DEBUG_TYPE "atomic-expand"
28
29 namespace {
30 class AtomicExpand: public FunctionPass {
31 const TargetMachine *TM;
32 public:
33 static char ID; // Pass identification, replacement for typeid
34 explicit AtomicExpand(const TargetMachine *TM = nullptr)
35 : FunctionPass(ID), TM(TM) {
36 initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
37 }
38
39 bool runOnFunction(Function &F) override;
40 bool expandAtomicInsts(Function &F);
41
42 bool expandAtomicLoad(LoadInst *LI);
43 bool expandAtomicStore(StoreInst *LI);
44 bool expandAtomicRMW(AtomicRMWInst *AI);
45 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
46
47 AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
48 void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
49 };
50 }
51
52 char AtomicExpand::ID = 0;
53 char &llvm::AtomicExpandID = AtomicExpand::ID;
54 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
55 "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
56 false, false)
57
58 FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
59 return new AtomicExpand(TM);
60 }
61
62 bool AtomicExpand::runOnFunction(Function &F) {
63 if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
64 return false;
65
66 SmallVector AtomicInsts;
67
68 // Changing control-flow while iterating through it is a bad idea, so gather a
69 // list of all atomic instructions before we start.
70 for (BasicBlock &BB : F)
71 for (Instruction &Inst : BB) {
72 if (isa(&Inst) || isa(&Inst) ||
73 (isa(&Inst) && cast(&Inst)->isAtomic()) ||
74 (isa(&Inst) && cast(&Inst)->isAtomic()))
75 AtomicInsts.push_back(&Inst);
76 }
77
78 bool MadeChange = false;
79 for (Instruction *Inst : AtomicInsts) {
80 if (!TM->getSubtargetImpl()->getTargetLowering()->shouldExpandAtomicInIR(
81 Inst))
82 continue;
83
84 if (AtomicRMWInst *AI = dyn_cast(Inst))
85 MadeChange |= expandAtomicRMW(AI);
86 else if (AtomicCmpXchgInst *CI = dyn_cast(Inst))
87 MadeChange |= expandAtomicCmpXchg(CI);
88 else if (LoadInst *LI = dyn_cast(Inst))
89 MadeChange |= expandAtomicLoad(LI);
90 else if (StoreInst *SI = dyn_cast(Inst))
91 MadeChange |= expandAtomicStore(SI);
92 else
93 llvm_unreachable("Unknown atomic instruction");
94 }
95
96 return MadeChange;
97 }
98
99 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
100 // Load instructions don't actually need a leading fence, even in the
101 // SequentiallyConsistent case.
102 AtomicOrdering MemOpOrder =
103 TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
104 ? Monotonic
105 : LI->getOrdering();
106
107 // The only 64-bit load guaranteed to be single-copy atomic by the ARM is
108 // an ldrexd (A3.5.3).
109 IRBuilder<> Builder(LI);
110 Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
111 Builder, LI->getPointerOperand(), MemOpOrder);
112
113 insertTrailingFence(Builder, LI->getOrdering());
114
115 LI->replaceAllUsesWith(Val);
116 LI->eraseFromParent();
117
118 return true;
119 }
120
121 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
122 // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
123 // we need a loop and the entire instruction is essentially an "atomicrmw
124 // xchg" that ignores the value loaded.
125 IRBuilder<> Builder(SI);
126 AtomicRMWInst *AI =
127 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
128 SI->getValueOperand(), SI->getOrdering());
129 SI->eraseFromParent();
130
131 // Now we have an appropriate swap instruction, lower it as usual.
132 return expandAtomicRMW(AI);
133 }
134
135 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
136 AtomicOrdering Order = AI->getOrdering();
137 Value *Addr = AI->getPointerOperand();
138 BasicBlock *BB = AI->getParent();
139 Function *F = BB->getParent();
140 LLVMContext &Ctx = F->getContext();
141
142 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
143 //
144 // The standard expansion we produce is:
145 // [...]
146 // fence?
147 // atomicrmw.start:
148 // %loaded = @load.linked(%addr)
149 // %new = some_op iN %loaded, %incr
150 // %stored = @store_conditional(%new, %addr)
151 // %try_again = icmp i32 ne %stored, 0
152 // br i1 %try_again, label %loop, label %atomicrmw.end
153 // atomicrmw.end:
154 // fence?
155 // [...]
156 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
157 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
158
159 // This grabs the DebugLoc from AI.
160 IRBuilder<> Builder(AI);
161
162 // The split call above "helpfully" added a branch at the end of BB (to the
163 // wrong place), but we might want a fence too. It's easiest to just remove
164 // the branch entirely.
165 std::prev(BB->end())->eraseFromParent();
166 Builder.SetInsertPoint(BB);
167 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
168 Builder.CreateBr(LoopBB);
169
170 // Start the main loop block now that we've taken care of the preliminaries.
171 Builder.SetInsertPoint(LoopBB);
172 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
173 Builder, Addr, MemOpOrder);
174
175 Value *NewVal;
176 switch (AI->getOperation()) {
177 case AtomicRMWInst::Xchg:
178 NewVal = AI->getValOperand();
179 break;
180 case AtomicRMWInst::Add:
181 NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
182 break;
183 case AtomicRMWInst::Sub:
184 NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
185 break;
186 case AtomicRMWInst::And:
187 NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
188 break;
189 case AtomicRMWInst::Nand:
190 NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
191 "new");
192 break;
193 case AtomicRMWInst::Or:
194 NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
195 break;
196 case AtomicRMWInst::Xor:
197 NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
198 break;
199 case AtomicRMWInst::Max:
200 NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
201 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
202 break;
203 case AtomicRMWInst::Min:
204 NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
205 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
206 break;
207 case AtomicRMWInst::UMax:
208 NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
209 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
210 break;
211 case AtomicRMWInst::UMin:
212 NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
213 NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
214 break;
215 default:
216 llvm_unreachable("Unknown atomic op");
217 }
218
219 Value *StoreSuccess =
220 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
221 Builder, NewVal, Addr, MemOpOrder);
222 Value *TryAgain = Builder.CreateICmpNE(
223 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
224 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
225
226 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
227 insertTrailingFence(Builder, Order);
228
229 AI->replaceAllUsesWith(Loaded);
230 AI->eraseFromParent();
231
232 return true;
233 }
234
235 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
236 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
237 AtomicOrdering FailureOrder = CI->getFailureOrdering();
238 Value *Addr = CI->getPointerOperand();
239 BasicBlock *BB = CI->getParent();
240 Function *F = BB->getParent();
241 LLVMContext &Ctx = F->getContext();
242
243 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
244 //
245 // The full expansion we produce is:
246 // [...]
247 // fence?
248 // cmpxchg.start:
249 // %loaded = @load.linked(%addr)
250 // %should_store = icmp eq %loaded, %desired
251 // br i1 %should_store, label %cmpxchg.trystore,
252 // label %cmpxchg.failure
253 // cmpxchg.trystore:
254 // %stored = @store_conditional(%new, %addr)
255 // %success = icmp eq i32 %stored, 0
256 // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
257 // cmpxchg.success:
258 // fence?
259 // br label %cmpxchg.end
260 // cmpxchg.failure:
261 // fence?
262 // br label %cmpxchg.end
263 // cmpxchg.end:
264 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
265 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
266 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
267 // [...]
268 BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
269 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
270 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
271 auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
272 auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
273
274 // This grabs the DebugLoc from CI
275 IRBuilder<> Builder(CI);
276
277 // The split call above "helpfully" added a branch at the end of BB (to the
278 // wrong place), but we might want a fence too. It's easiest to just remove
279 // the branch entirely.
280 std::prev(BB->end())->eraseFromParent();
281 Builder.SetInsertPoint(BB);
282 AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
283 Builder.CreateBr(LoopBB);
284
285 // Start the main loop block now that we've taken care of the preliminaries.
286 Builder.SetInsertPoint(LoopBB);
287 Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
288 Builder, Addr, MemOpOrder);
289 Value *ShouldStore =
290 Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
291
292 // If the the cmpxchg doesn't actually need any ordering when it fails, we can
293 // jump straight past that fence instruction (if it exists).
294 Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
295
296 Builder.SetInsertPoint(TryStoreBB);
297 Value *StoreSuccess =
298 TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
299 Builder, CI->getNewValOperand(), Addr, MemOpOrder);
300 StoreSuccess = Builder.CreateICmpEQ(
301 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
302 Builder.CreateCondBr(StoreSuccess, SuccessBB,
303 CI->isWeak() ? FailureBB : LoopBB);
304
305 // Make sure later instructions don't get reordered with a fence if necessary.
306 Builder.SetInsertPoint(SuccessBB);
307 insertTrailingFence(Builder, SuccessOrder);
308 Builder.CreateBr(ExitBB);
309
310 Builder.SetInsertPoint(FailureBB);
311 insertTrailingFence(Builder, FailureOrder);
312 Builder.CreateBr(ExitBB);
313
314 // Finally, we have control-flow based knowledge of whether the cmpxchg
315 // succeeded or not. We expose this to later passes by converting any
316 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
317
318 // Setup the builder so we can create any PHIs we need.
319 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
320 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
321 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
322 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
323
324 // Look for any users of the cmpxchg that are just comparing the loaded value
325 // against the desired one, and replace them with the CFG-derived version.
326 SmallVector PrunedInsts;
327 for (auto User : CI->users()) {
328 ExtractValueInst *EV = dyn_cast(User);
329 if (!EV)
330 continue;
331
332 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
333 "weird extraction from { iN, i1 }");
334
335 if (EV->getIndices()[0] == 0)
336 EV->replaceAllUsesWith(Loaded);
337 else
338 EV->replaceAllUsesWith(Success);
339
340 PrunedInsts.push_back(EV);
341 }
342
343 // We can remove the instructions now we're no longer iterating through them.
344 for (auto EV : PrunedInsts)
345 EV->eraseFromParent();
346
347 if (!CI->use_empty()) {
348 // Some use of the full struct return that we don't understand has happened,
349 // so we've got to reconstruct it properly.
350 Value *Res;
351 Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
352 Res = Builder.CreateInsertValue(Res, Success, 1);
353
354 CI->replaceAllUsesWith(Res);
355 }
356
357 CI->eraseFromParent();
358 return true;
359 }
360
361 AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder,
362 AtomicOrdering Ord) {
363 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
364 return Ord;
365
366 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
367 Builder.CreateFence(Release);
368
369 // The exclusive operations don't need any barrier if we're adding separate
370 // fences.
371 return Monotonic;
372 }
373
374 void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder,
375 AtomicOrdering Ord) {
376 if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
377 return;
378
379 if (Ord == Acquire || Ord == AcquireRelease)
380 Builder.CreateFence(Acquire);
381 else if (Ord == SequentiallyConsistent)
382 Builder.CreateFence(SequentiallyConsistent);
383 }
11 AggressiveAntiDepBreaker.cpp
22 AllocationOrder.cpp
33 Analysis.cpp
4 AtomicExpandLoadLinkedPass.cpp
4 AtomicExpandPass.cpp
55 BasicTargetTransformInfo.cpp
66 BranchFolding.cpp
77 CalcSpillWeights.cpp
1919
2020 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
2121 void llvm::initializeCodeGen(PassRegistry &Registry) {
22 initializeAtomicExpandLoadLinkedPass(Registry);
22 initializeAtomicExpandPass(Registry);
2323 initializeBasicTTIPass(Registry);
2424 initializeBranchFolderPassPass(Registry);
2525 initializeCodeGenPreparePass(Registry);
143143 void AArch64PassConfig::addIRPasses() {
144144 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
145145 // ourselves.
146 addPass(createAtomicExpandLoadLinkedPass(TM));
146 addPass(createAtomicExpandPass(TM));
147147
148148 // Cmpxchg instructions are often used with a subsequent comparison to
149149 // determine whether it succeeded. We can exploit existing control-flow in
427427 return (!isThumb() || hasThumb2());
428428 }
429429
430 bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
430 bool ARMSubtarget::enableAtomicExpand() const {
431431 return hasAnyDataBarrier() && !isThumb1Only();
432432 }
433433
435435 /// True for some subtargets at > -O0.
436436 bool enablePostMachineScheduler() const override;
437437
438 // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
439 bool enableAtomicExpandLoadLinked() const override;
438 // enableAtomicExpand- True if we need to expand our atomics.
439 bool enableAtomicExpand() const override;
440440
441441 /// getInstrItins - Return the instruction itineraries based on subtarget
442442 /// selection.
160160 if (TM->Options.ThreadModel == ThreadModel::Single)
161161 addPass(createLowerAtomicPass());
162162 else
163 addPass(createAtomicExpandLoadLinkedPass(TM));
163 addPass(createAtomicExpandPass(TM));
164164
165165 // Cmpxchg instructions are often used with a subsequent comparison to
166166 // determine whether it succeeded. We can exploit existing control-flow in
3838 return enableMachineScheduler();
3939 }
4040
41 bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
41 bool TargetSubtargetInfo::enableAtomicExpand() const {
4242 return true;
4343 }
4444
0 ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK: fence release
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK: fence seq_cst
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK: fence acquire
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
57 ; CHECK-LABEL: @test_atomic_and_i8
58 ; CHECK: fence release
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
62 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
63 ; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
64 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
65 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
66 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
67 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
68 ; CHECK: [[END]]:
69 ; CHECK-NOT: fence
70 ; CHECK: ret i8 [[OLDVAL]]
71 %res = atomicrmw and i8* %ptr, i8 %andend release
72 ret i8 %res
73 }
74
75 define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
76 ; CHECK-LABEL: @test_atomic_nand_i16
77 ; CHECK: fence release
78 ; CHECK: br label %[[LOOP:.*]]
79 ; CHECK: [[LOOP]]:
80 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
81 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
82 ; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
83 ; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
84 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
85 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
86 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
87 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
88 ; CHECK: [[END]]:
89 ; CHECK: fence seq_cst
90 ; CHECK: ret i16 [[OLDVAL]]
91 %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
92 ret i16 %res
93 }
94
95 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
96 ; CHECK-LABEL: @test_atomic_or_i64
97 ; CHECK: fence release
98 ; CHECK: br label %[[LOOP:.*]]
99 ; CHECK: [[LOOP]]:
100 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
101 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
102 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
103 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
104 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
105 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
106 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
107 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
108 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
109 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
110 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
111 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
112 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
113 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
114 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
115 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
116 ; CHECK: [[END]]:
117 ; CHECK: fence seq_cst
118 ; CHECK: ret i64 [[OLDVAL]]
119 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
120 ret i64 %res
121 }
122
123 define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
124 ; CHECK-LABEL: @test_atomic_xor_i8
125 ; CHECK: fence release
126 ; CHECK: br label %[[LOOP:.*]]
127 ; CHECK: [[LOOP]]:
128 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
129 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
130 ; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
131 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
132 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
133 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
134 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
135 ; CHECK: [[END]]:
136 ; CHECK: fence seq_cst
137 ; CHECK: ret i8 [[OLDVAL]]
138 %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
139 ret i8 %res
140 }
141
142 define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
143 ; CHECK-LABEL: @test_atomic_max_i8
144 ; CHECK: fence release
145 ; CHECK: br label %[[LOOP:.*]]
146 ; CHECK: [[LOOP]]:
147 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
148 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
149 ; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
150 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
151 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
152 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
153 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
154 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
155 ; CHECK: [[END]]:
156 ; CHECK: fence seq_cst
157 ; CHECK: ret i8 [[OLDVAL]]
158 %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
159 ret i8 %res
160 }
161
162 define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
163 ; CHECK-LABEL: @test_atomic_min_i8
164 ; CHECK: fence release
165 ; CHECK: br label %[[LOOP:.*]]
166 ; CHECK: [[LOOP]]:
167 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
168 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
169 ; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
170 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
171 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
172 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
173 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
174 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
175 ; CHECK: [[END]]:
176 ; CHECK: fence seq_cst
177 ; CHECK: ret i8 [[OLDVAL]]
178 %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
179 ret i8 %res
180 }
181
182 define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
183 ; CHECK-LABEL: @test_atomic_umax_i8
184 ; CHECK: fence release
185 ; CHECK: br label %[[LOOP:.*]]
186 ; CHECK: [[LOOP]]:
187 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
188 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
189 ; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
190 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
191 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
192 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
193 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
194 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
195 ; CHECK: [[END]]:
196 ; CHECK: fence seq_cst
197 ; CHECK: ret i8 [[OLDVAL]]
198 %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
199 ret i8 %res
200 }
201
202 define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
203 ; CHECK-LABEL: @test_atomic_umin_i8
204 ; CHECK: fence release
205 ; CHECK: br label %[[LOOP:.*]]
206 ; CHECK: [[LOOP]]:
207 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
208 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
209 ; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
210 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
211 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
212 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
213 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
214 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
215 ; CHECK: [[END]]:
216 ; CHECK: fence seq_cst
217 ; CHECK: ret i8 [[OLDVAL]]
218 %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
219 ret i8 %res
220 }
221
222 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
223 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
224 ; CHECK: fence release
225 ; CHECK: br label %[[LOOP:.*]]
226
227 ; CHECK: [[LOOP]]:
228 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
229 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
230 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
231 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
232
233 ; CHECK: [[TRY_STORE]]:
234 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
235 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
236 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
237 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
238
239 ; CHECK: [[SUCCESS_BB]]:
240 ; CHECK: fence seq_cst
241 ; CHECK: br label %[[DONE:.*]]
242
243 ; CHECK: [[FAILURE_BB]]:
244 ; CHECK: fence seq_cst
245 ; CHECK: br label %[[DONE]]
246
247 ; CHECK: [[DONE]]:
248 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
249 ; CHECK: ret i8 [[OLDVAL]]
250
251 %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
252 %old = extractvalue { i8, i1 } %pairold, 0
253 ret i8 %old
254 }
255
256 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
257 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
258 ; CHECK: fence release
259 ; CHECK: br label %[[LOOP:.*]]
260
261 ; CHECK: [[LOOP]]:
262 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
263 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
264 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
265 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
266
267 ; CHECK: [[TRY_STORE]]:
268 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
269 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
270 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
271 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
272
273 ; CHECK: [[SUCCESS_BB]]:
274 ; CHECK: fence seq_cst
275 ; CHECK: br label %[[DONE:.*]]
276
277 ; CHECK: [[FAILURE_BB]]:
278 ; CHECK-NOT: fence
279 ; CHECK: br label %[[DONE]]
280
281 ; CHECK: [[DONE]]:
282 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
283 ; CHECK: ret i16 [[OLDVAL]]
284
285 %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
286 %old = extractvalue { i16, i1 } %pairold, 0
287 ret i16 %old
288 }
289
290 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
291 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
292 ; CHECK-NOT: fence
293 ; CHECK: br label %[[LOOP:.*]]
294
295 ; CHECK: [[LOOP]]:
296 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
297 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
298 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
299
300 ; CHECK: [[TRY_STORE]]:
301 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
302 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
303 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
304
305 ; CHECK: [[SUCCESS_BB]]:
306 ; CHECK: fence acquire
307 ; CHECK: br label %[[DONE:.*]]
308
309 ; CHECK: [[FAILURE_BB]]:
310 ; CHECK: fence acquire
311 ; CHECK: br label %[[DONE]]
312
313 ; CHECK: [[DONE]]:
314 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
315 ; CHECK: ret i32 [[OLDVAL]]
316
317 %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
318 %old = extractvalue { i32, i1 } %pairold, 0
319 ret i32 %old
320 }
321
322 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
323 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
324 ; CHECK-NOT: fence
325 ; CHECK: br label %[[LOOP:.*]]
326
327 ; CHECK: [[LOOP]]:
328 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
329 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
330 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
331 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
332 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
333 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
334 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
335 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
336 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
337 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
338
339 ; CHECK: [[TRY_STORE]]:
340 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
341 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
342 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
343 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
344 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
345 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
346 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
347
348 ; CHECK: [[SUCCESS_BB]]:
349 ; CHECK-NOT: fence
350 ; CHECK: br label %[[DONE:.*]]
351
352 ; CHECK: [[FAILURE_BB]]:
353 ; CHECK-NOT: fence
354 ; CHECK: br label %[[DONE]]
355
356 ; CHECK: [[DONE]]:
357 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
358 ; CHECK: ret i64 [[OLDVAL]]
359
360 %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
361 %old = extractvalue { i64, i1 } %pairold, 0
362 ret i64 %old
363 }
0 ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK-NOT: fence
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK-NOT: fence
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK-NOT: fence
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
57 ; CHECK-LABEL: @test_atomic_or_i64
58 ; CHECK-NOT: fence
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
62 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
63 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
64 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
65 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
66 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
67 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
68 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
69 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
70 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
71 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
72 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
73 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
74 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
75 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
76 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
77 ; CHECK: [[END]]:
78 ; CHECK-NOT: fence
79 ; CHECK: ret i64 [[OLDVAL]]
80 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
81 ret i64 %res
82 }
83
84 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
85 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
86 ; CHECK-NOT: fence
87 ; CHECK: br label %[[LOOP:.*]]
88
89 ; CHECK: [[LOOP]]:
90 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
91 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
92 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
93 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
94
95 ; CHECK: [[TRY_STORE]]:
96 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
97 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
98 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
99 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
100
101 ; CHECK: [[SUCCESS_BB]]:
102 ; CHECK-NOT: fence_cst
103 ; CHECK: br label %[[DONE:.*]]
104
105 ; CHECK: [[FAILURE_BB]]:
106 ; CHECK-NOT: fence_cst
107 ; CHECK: br label %[[DONE]]
108
109 ; CHECK: [[DONE]]:
110 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
111 ; CHECK: ret i8 [[OLDVAL]]
112
113 %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
114 %old = extractvalue { i8, i1 } %pairold, 0
115 ret i8 %old
116 }
117
118 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
119 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
120 ; CHECK-NOT: fence
121 ; CHECK: br label %[[LOOP:.*]]
122
123 ; CHECK: [[LOOP]]:
124 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
125 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
126 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
127 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
128
129 ; CHECK: [[TRY_STORE]]:
130 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
131 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
132 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
133 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
134
135 ; CHECK: [[SUCCESS_BB]]:
136 ; CHECK-NOT: fence
137 ; CHECK: br label %[[DONE:.*]]
138
139 ; CHECK: [[FAILURE_BB]]:
140 ; CHECK-NOT: fence
141 ; CHECK: br label %[[DONE]]
142
143 ; CHECK: [[DONE]]:
144 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
145 ; CHECK: ret i16 [[OLDVAL]]
146
147 %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
148 %old = extractvalue { i16, i1 } %pairold, 0
149 ret i16 %old
150 }
151
152 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
153 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
154 ; CHECK-NOT: fence
155 ; CHECK: br label %[[LOOP:.*]]
156
157 ; CHECK: [[LOOP]]:
158 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
159 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
160 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
161
162 ; CHECK: [[TRY_STORE]]:
163 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
164 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
165 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
166
167 ; CHECK: [[SUCCESS_BB]]:
168 ; CHECK-NOT: fence_cst
169 ; CHECK: br label %[[DONE:.*]]
170
171 ; CHECK: [[FAILURE_BB]]:
172 ; CHECK-NOT: fence_cst
173 ; CHECK: br label %[[DONE]]
174
175 ; CHECK: [[DONE]]:
176 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
177 ; CHECK: ret i32 [[OLDVAL]]
178
179 %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
180 %old = extractvalue { i32, i1 } %pairold, 0
181 ret i32 %old
182 }
183
184 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
185 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
186 ; CHECK-NOT: fence
187 ; CHECK: br label %[[LOOP:.*]]
188
189 ; CHECK: [[LOOP]]:
190 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
191 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
192 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
193 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
194 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
195 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
196 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
197 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
198 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
199 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
200
201 ; CHECK: [[TRY_STORE]]:
202 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
203 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
204 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
205 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
206 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
207 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
208 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
209
210 ; CHECK: [[SUCCESS_BB]]:
211 ; CHECK-NOT: fence_cst
212 ; CHECK: br label %[[DONE:.*]]
213
214 ; CHECK: [[FAILURE_BB]]:
215 ; CHECK-NOT: fence_cst
216 ; CHECK: br label %[[DONE]]
217
218 ; CHECK: [[DONE]]:
219 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
220 ; CHECK: ret i64 [[OLDVAL]]
221
222 %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
223 %old = extractvalue { i64, i1 } %pairold, 0
224 ret i64 %old
225 }
0 ; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
1
2 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
3 ; CHECK-LABEL: @test_cmpxchg_seq_cst
4 ; CHECK: fence release
5 ; CHECK: br label %[[START:.*]]
6
7 ; CHECK: [[START]]:
8 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
9 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
10 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
11
12 ; CHECK: [[TRY_STORE]]:
13 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
14 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
15 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
16
17 ; CHECK: [[SUCCESS_BB]]:
18 ; CHECK: fence seq_cst
19 ; CHECK: br label %[[END:.*]]
20
21 ; CHECK: [[FAILURE_BB]]:
22 ; CHECK: fence seq_cst
23 ; CHECK: br label %[[END]]
24
25 ; CHECK: [[END]]:
26 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
27 ; CHECK: ret i32 [[LOADED]]
28
29 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
30 %oldval = extractvalue { i32, i1 } %pair, 0
31 ret i32 %oldval
32 }
33
34 define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
35 ; CHECK-LABEL: @test_cmpxchg_weak_fail
36 ; CHECK: fence release
37 ; CHECK: br label %[[START:.*]]
38
39 ; CHECK: [[START]]:
40 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
41 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
42 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
43
44 ; CHECK: [[TRY_STORE]]:
45 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
46 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
47 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
48
49 ; CHECK: [[SUCCESS_BB]]:
50 ; CHECK: fence seq_cst
51 ; CHECK: br label %[[END:.*]]
52
53 ; CHECK: [[FAILURE_BB]]:
54 ; CHECK-NOT: fence
55 ; CHECK: br label %[[END]]
56
57 ; CHECK: [[END]]:
58 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
59 ; CHECK: ret i1 [[SUCCESS]]
60
61 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
62 %oldval = extractvalue { i32, i1 } %pair, 1
63 ret i1 %oldval
64 }
65
66 define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
67 ; CHECK-LABEL: @test_cmpxchg_monotonic
68 ; CHECK-NOT: fence
69 ; CHECK: br label %[[START:.*]]
70
71 ; CHECK: [[START]]:
72 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
73 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
74 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
75
76 ; CHECK: [[TRY_STORE]]:
77 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
78 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
79 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
80
81 ; CHECK: [[SUCCESS_BB]]:
82 ; CHECK-NOT: fence
83 ; CHECK: br label %[[END:.*]]
84
85 ; CHECK: [[FAILURE_BB]]:
86 ; CHECK-NOT: fence
87 ; CHECK: br label %[[END]]
88
89 ; CHECK: [[END]]:
90 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
91 ; CHECK: ret i32 [[LOADED]]
92
93 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
94 %oldval = extractvalue { i32, i1 } %pair, 0
95 ret i32 %oldval
96 }
0 if not 'ARM' in config.root.targets:
1 config.unsupported = True
2
+0
-364
test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll less more
None ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK: fence release
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK: fence seq_cst
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK: fence acquire
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
57 ; CHECK-LABEL: @test_atomic_and_i8
58 ; CHECK: fence release
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
62 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
63 ; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
64 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
65 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
66 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
67 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
68 ; CHECK: [[END]]:
69 ; CHECK-NOT: fence
70 ; CHECK: ret i8 [[OLDVAL]]
71 %res = atomicrmw and i8* %ptr, i8 %andend release
72 ret i8 %res
73 }
74
75 define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
76 ; CHECK-LABEL: @test_atomic_nand_i16
77 ; CHECK: fence release
78 ; CHECK: br label %[[LOOP:.*]]
79 ; CHECK: [[LOOP]]:
80 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
81 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
82 ; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
83 ; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
84 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
85 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
86 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
87 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
88 ; CHECK: [[END]]:
89 ; CHECK: fence seq_cst
90 ; CHECK: ret i16 [[OLDVAL]]
91 %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
92 ret i16 %res
93 }
94
95 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
96 ; CHECK-LABEL: @test_atomic_or_i64
97 ; CHECK: fence release
98 ; CHECK: br label %[[LOOP:.*]]
99 ; CHECK: [[LOOP]]:
100 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
101 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
102 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
103 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
104 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
105 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
106 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
107 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
108 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
109 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
110 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
111 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
112 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
113 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
114 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
115 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
116 ; CHECK: [[END]]:
117 ; CHECK: fence seq_cst
118 ; CHECK: ret i64 [[OLDVAL]]
119 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
120 ret i64 %res
121 }
122
123 define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
124 ; CHECK-LABEL: @test_atomic_xor_i8
125 ; CHECK: fence release
126 ; CHECK: br label %[[LOOP:.*]]
127 ; CHECK: [[LOOP]]:
128 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
129 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
130 ; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
131 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
132 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
133 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
134 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
135 ; CHECK: [[END]]:
136 ; CHECK: fence seq_cst
137 ; CHECK: ret i8 [[OLDVAL]]
138 %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
139 ret i8 %res
140 }
141
142 define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
143 ; CHECK-LABEL: @test_atomic_max_i8
144 ; CHECK: fence release
145 ; CHECK: br label %[[LOOP:.*]]
146 ; CHECK: [[LOOP]]:
147 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
148 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
149 ; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
150 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
151 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
152 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
153 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
154 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
155 ; CHECK: [[END]]:
156 ; CHECK: fence seq_cst
157 ; CHECK: ret i8 [[OLDVAL]]
158 %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
159 ret i8 %res
160 }
161
162 define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
163 ; CHECK-LABEL: @test_atomic_min_i8
164 ; CHECK: fence release
165 ; CHECK: br label %[[LOOP:.*]]
166 ; CHECK: [[LOOP]]:
167 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
168 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
169 ; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
170 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
171 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
172 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
173 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
174 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
175 ; CHECK: [[END]]:
176 ; CHECK: fence seq_cst
177 ; CHECK: ret i8 [[OLDVAL]]
178 %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
179 ret i8 %res
180 }
181
182 define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
183 ; CHECK-LABEL: @test_atomic_umax_i8
184 ; CHECK: fence release
185 ; CHECK: br label %[[LOOP:.*]]
186 ; CHECK: [[LOOP]]:
187 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
188 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
189 ; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
190 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
191 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
192 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
193 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
194 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
195 ; CHECK: [[END]]:
196 ; CHECK: fence seq_cst
197 ; CHECK: ret i8 [[OLDVAL]]
198 %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
199 ret i8 %res
200 }
201
202 define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
203 ; CHECK-LABEL: @test_atomic_umin_i8
204 ; CHECK: fence release
205 ; CHECK: br label %[[LOOP:.*]]
206 ; CHECK: [[LOOP]]:
207 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
208 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
209 ; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
210 ; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
211 ; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
212 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
213 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
214 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
215 ; CHECK: [[END]]:
216 ; CHECK: fence seq_cst
217 ; CHECK: ret i8 [[OLDVAL]]
218 %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
219 ret i8 %res
220 }
221
222 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
223 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
224 ; CHECK: fence release
225 ; CHECK: br label %[[LOOP:.*]]
226
227 ; CHECK: [[LOOP]]:
228 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
229 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
230 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
231 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
232
233 ; CHECK: [[TRY_STORE]]:
234 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
235 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
236 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
237 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
238
239 ; CHECK: [[SUCCESS_BB]]:
240 ; CHECK: fence seq_cst
241 ; CHECK: br label %[[DONE:.*]]
242
243 ; CHECK: [[FAILURE_BB]]:
244 ; CHECK: fence seq_cst
245 ; CHECK: br label %[[DONE]]
246
247 ; CHECK: [[DONE]]:
248 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
249 ; CHECK: ret i8 [[OLDVAL]]
250
251 %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
252 %old = extractvalue { i8, i1 } %pairold, 0
253 ret i8 %old
254 }
255
256 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
257 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
258 ; CHECK: fence release
259 ; CHECK: br label %[[LOOP:.*]]
260
261 ; CHECK: [[LOOP]]:
262 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
263 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
264 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
265 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
266
267 ; CHECK: [[TRY_STORE]]:
268 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
269 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
270 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
271 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
272
273 ; CHECK: [[SUCCESS_BB]]:
274 ; CHECK: fence seq_cst
275 ; CHECK: br label %[[DONE:.*]]
276
277 ; CHECK: [[FAILURE_BB]]:
278 ; CHECK-NOT: fence
279 ; CHECK: br label %[[DONE]]
280
281 ; CHECK: [[DONE]]:
282 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
283 ; CHECK: ret i16 [[OLDVAL]]
284
285 %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
286 %old = extractvalue { i16, i1 } %pairold, 0
287 ret i16 %old
288 }
289
290 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
291 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
292 ; CHECK-NOT: fence
293 ; CHECK: br label %[[LOOP:.*]]
294
295 ; CHECK: [[LOOP]]:
296 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
297 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
298 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
299
300 ; CHECK: [[TRY_STORE]]:
301 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
302 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
303 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
304
305 ; CHECK: [[SUCCESS_BB]]:
306 ; CHECK: fence acquire
307 ; CHECK: br label %[[DONE:.*]]
308
309 ; CHECK: [[FAILURE_BB]]:
310 ; CHECK: fence acquire
311 ; CHECK: br label %[[DONE]]
312
313 ; CHECK: [[DONE]]:
314 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
315 ; CHECK: ret i32 [[OLDVAL]]
316
317 %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
318 %old = extractvalue { i32, i1 } %pairold, 0
319 ret i32 %old
320 }
321
322 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
323 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
324 ; CHECK-NOT: fence
325 ; CHECK: br label %[[LOOP:.*]]
326
327 ; CHECK: [[LOOP]]:
328 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
329 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
330 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
331 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
332 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
333 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
334 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
335 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
336 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
337 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
338
339 ; CHECK: [[TRY_STORE]]:
340 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
341 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
342 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
343 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
344 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
345 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
346 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
347
348 ; CHECK: [[SUCCESS_BB]]:
349 ; CHECK-NOT: fence
350 ; CHECK: br label %[[DONE:.*]]
351
352 ; CHECK: [[FAILURE_BB]]:
353 ; CHECK-NOT: fence
354 ; CHECK: br label %[[DONE]]
355
356 ; CHECK: [[DONE]]:
357 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
358 ; CHECK: ret i64 [[OLDVAL]]
359
360 %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
361 %old = extractvalue { i64, i1 } %pairold, 0
362 ret i64 %old
363 }
+0
-226
test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll less more
None ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s
1
2 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
3 ; CHECK-LABEL: @test_atomic_xchg_i8
4 ; CHECK-NOT: fence
5 ; CHECK: br label %[[LOOP:.*]]
6 ; CHECK: [[LOOP]]:
7 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
8 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
9 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
10 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
11 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
12 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
13 ; CHECK: [[END]]:
14 ; CHECK-NOT: fence
15 ; CHECK: ret i8 [[OLDVAL]]
16 %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
17 ret i8 %res
18 }
19
20 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
21 ; CHECK-LABEL: @test_atomic_add_i16
22 ; CHECK-NOT: fence
23 ; CHECK: br label %[[LOOP:.*]]
24 ; CHECK: [[LOOP]]:
25 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
26 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
27 ; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
28 ; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
29 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
30 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
31 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
32 ; CHECK: [[END]]:
33 ; CHECK-NOT: fence
34 ; CHECK: ret i16 [[OLDVAL]]
35 %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
36 ret i16 %res
37 }
38
39 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
40 ; CHECK-LABEL: @test_atomic_sub_i32
41 ; CHECK-NOT: fence
42 ; CHECK: br label %[[LOOP:.*]]
43 ; CHECK: [[LOOP]]:
44 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
45 ; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
46 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
47 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
48 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
49 ; CHECK: [[END]]:
50 ; CHECK-NOT: fence
51 ; CHECK: ret i32 [[OLDVAL]]
52 %res = atomicrmw sub i32* %ptr, i32 %subend acquire
53 ret i32 %res
54 }
55
56 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
57 ; CHECK-LABEL: @test_atomic_or_i64
58 ; CHECK-NOT: fence
59 ; CHECK: br label %[[LOOP:.*]]
60 ; CHECK: [[LOOP]]:
61 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
62 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
63 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
64 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
65 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
66 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
67 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
68 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
69 ; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
70 ; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
71 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
72 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
73 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
74 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
75 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
76 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
77 ; CHECK: [[END]]:
78 ; CHECK-NOT: fence
79 ; CHECK: ret i64 [[OLDVAL]]
80 %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
81 ret i64 %res
82 }
83
84 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
85 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
86 ; CHECK-NOT: fence
87 ; CHECK: br label %[[LOOP:.*]]
88
89 ; CHECK: [[LOOP]]:
90 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
91 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
92 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
93 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
94
95 ; CHECK: [[TRY_STORE]]:
96 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
97 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
98 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
99 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
100
101 ; CHECK: [[SUCCESS_BB]]:
102 ; CHECK-NOT: fence_cst
103 ; CHECK: br label %[[DONE:.*]]
104
105 ; CHECK: [[FAILURE_BB]]:
106 ; CHECK-NOT: fence_cst
107 ; CHECK: br label %[[DONE]]
108
109 ; CHECK: [[DONE]]:
110 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
111 ; CHECK: ret i8 [[OLDVAL]]
112
113 %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
114 %old = extractvalue { i8, i1 } %pairold, 0
115 ret i8 %old
116 }
117
118 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
119 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
120 ; CHECK-NOT: fence
121 ; CHECK: br label %[[LOOP:.*]]
122
123 ; CHECK: [[LOOP]]:
124 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
125 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
126 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
127 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
128
129 ; CHECK: [[TRY_STORE]]:
130 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
131 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
132 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
133 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
134
135 ; CHECK: [[SUCCESS_BB]]:
136 ; CHECK-NOT: fence
137 ; CHECK: br label %[[DONE:.*]]
138
139 ; CHECK: [[FAILURE_BB]]:
140 ; CHECK-NOT: fence
141 ; CHECK: br label %[[DONE]]
142
143 ; CHECK: [[DONE]]:
144 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
145 ; CHECK: ret i16 [[OLDVAL]]
146
147 %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
148 %old = extractvalue { i16, i1 } %pairold, 0
149 ret i16 %old
150 }
151
152 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
153 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
154 ; CHECK-NOT: fence
155 ; CHECK: br label %[[LOOP:.*]]
156
157 ; CHECK: [[LOOP]]:
158 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
159 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
160 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
161
162 ; CHECK: [[TRY_STORE]]:
163 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
164 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
165 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
166
167 ; CHECK: [[SUCCESS_BB]]:
168 ; CHECK-NOT: fence_cst
169 ; CHECK: br label %[[DONE:.*]]
170
171 ; CHECK: [[FAILURE_BB]]:
172 ; CHECK-NOT: fence_cst
173 ; CHECK: br label %[[DONE]]
174
175 ; CHECK: [[DONE]]:
176 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
177 ; CHECK: ret i32 [[OLDVAL]]
178
179 %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
180 %old = extractvalue { i32, i1 } %pairold, 0
181 ret i32 %old
182 }
183
184 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
185 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
186 ; CHECK-NOT: fence
187 ; CHECK: br label %[[LOOP:.*]]
188
189 ; CHECK: [[LOOP]]:
190 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
191 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
192 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
193 ; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
194 ; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
195 ; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
196 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
197 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
198 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
199 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
200
201 ; CHECK: [[TRY_STORE]]:
202 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
203 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
204 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
205 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
206 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
207 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
208 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
209
210 ; CHECK: [[SUCCESS_BB]]:
211 ; CHECK-NOT: fence_cst
212 ; CHECK: br label %[[DONE:.*]]
213
214 ; CHECK: [[FAILURE_BB]]:
215 ; CHECK-NOT: fence_cst
216 ; CHECK: br label %[[DONE]]
217
218 ; CHECK: [[DONE]]:
219 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
220 ; CHECK: ret i64 [[OLDVAL]]
221
222 %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
223 %old = extractvalue { i64, i1 } %pairold, 0
224 ret i64 %old
225 }
+0
-97
test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll less more
None ; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
1
2 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
3 ; CHECK-LABEL: @test_cmpxchg_seq_cst
4 ; CHECK: fence release
5 ; CHECK: br label %[[START:.*]]
6
7 ; CHECK: [[START]]:
8 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
9 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
10 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
11
12 ; CHECK: [[TRY_STORE]]:
13 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
14 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
15 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
16
17 ; CHECK: [[SUCCESS_BB]]:
18 ; CHECK: fence seq_cst
19 ; CHECK: br label %[[END:.*]]
20
21 ; CHECK: [[FAILURE_BB]]:
22 ; CHECK: fence seq_cst
23 ; CHECK: br label %[[END]]
24
25 ; CHECK: [[END]]:
26 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
27 ; CHECK: ret i32 [[LOADED]]
28
29 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
30 %oldval = extractvalue { i32, i1 } %pair, 0
31 ret i32 %oldval
32 }
33
34 define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
35 ; CHECK-LABEL: @test_cmpxchg_weak_fail
36 ; CHECK: fence release
37 ; CHECK: br label %[[START:.*]]
38
39 ; CHECK: [[START]]:
40 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
41 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
42 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
43
44 ; CHECK: [[TRY_STORE]]:
45 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
46 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
47 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
48
49 ; CHECK: [[SUCCESS_BB]]:
50 ; CHECK: fence seq_cst
51 ; CHECK: br label %[[END:.*]]
52
53 ; CHECK: [[FAILURE_BB]]:
54 ; CHECK-NOT: fence
55 ; CHECK: br label %[[END]]
56
57 ; CHECK: [[END]]:
58 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
59 ; CHECK: ret i1 [[SUCCESS]]
60
61 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
62 %oldval = extractvalue { i32, i1 } %pair, 1
63 ret i1 %oldval
64 }
65
66 define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
67 ; CHECK-LABEL: @test_cmpxchg_monotonic
68 ; CHECK-NOT: fence
69 ; CHECK: br label %[[START:.*]]
70
71 ; CHECK: [[START]]:
72 ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
73 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
74 ; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
75
76 ; CHECK: [[TRY_STORE]]:
77 ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
78 ; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
79 ; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
80
81 ; CHECK: [[SUCCESS_BB]]:
82 ; CHECK-NOT: fence
83 ; CHECK: br label %[[END:.*]]
84
85 ; CHECK: [[FAILURE_BB]]:
86 ; CHECK-NOT: fence
87 ; CHECK: br label %[[END]]
88
89 ; CHECK: [[END]]:
90 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
91 ; CHECK: ret i32 [[LOADED]]
92
93 %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
94 %oldval = extractvalue { i32, i1 } %pair, 0
95 ret i32 %oldval
96 }
+0
-3
test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg less more
None if not 'ARM' in config.root.targets:
1 config.unsupported = True
2
344344 // For codegen passes, only passes that do IR to IR transformation are
345345 // supported.
346346 initializeCodeGenPreparePass(Registry);
347 initializeAtomicExpandLoadLinkedPass(Registry);
347 initializeAtomicExpandPass(Registry);
348348
349349 #ifdef LINK_POLLY_INTO_TOOLS
350350 polly::initializePollyPasses(Registry);