llvm.org GIT mirror llvm / 211ffd2
AArch64: remove barriers from AArch64 atomic operations. I've managed to convince myself that AArch64's acquire/release instructions are sufficient to guarantee C++11's required semantics, even in the sequentially-consistent case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179005 91177308-0d34-0410-b5e6-96231b3b80d8 Tim Northover 7 years ago
5 changed file(s) with 450 addition(s) and 375 deletion(s). Raw diff Collapse all Expand all
8686 }
8787
8888 bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
89
90 SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
8991
9092 SDNode *TrySelectToMoveImm(SDNode *N);
9193 SDNode *LowerToFPLitPool(SDNode *Node);
317319 return true;
318320 }
319321
322 SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
323 unsigned Op16,unsigned Op32,
324 unsigned Op64) {
325 // Mostly direct translation to the given operations, except that we preserve
326 // the AtomicOrdering for use later on.
327 AtomicSDNode *AN = cast(Node);
328 EVT VT = AN->getMemoryVT();
329
330 unsigned Op;
331 if (VT == MVT::i8)
332 Op = Op8;
333 else if (VT == MVT::i16)
334 Op = Op16;
335 else if (VT == MVT::i32)
336 Op = Op32;
337 else if (VT == MVT::i64)
338 Op = Op64;
339 else
340 llvm_unreachable("Unexpected atomic operation");
341
342 SmallVector Ops;
343 for (unsigned i = 1; i < AN->getNumOperands(); ++i)
344 Ops.push_back(AN->getOperand(i));
345
346 Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
347 Ops.push_back(AN->getOperand(0)); // Chain moves to the end
348
349 return CurDAG->SelectNodeTo(Node, Op,
350 AN->getValueType(0), MVT::Other,
351 &Ops[0], Ops.size());
352 }
353
320354 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
321355 // Dump information about the Node being selected
322356 DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
327361 }
328362
329363 switch (Node->getOpcode()) {
364 case ISD::ATOMIC_LOAD_ADD:
365 return SelectAtomic(Node,
366 AArch64::ATOMIC_LOAD_ADD_I8,
367 AArch64::ATOMIC_LOAD_ADD_I16,
368 AArch64::ATOMIC_LOAD_ADD_I32,
369 AArch64::ATOMIC_LOAD_ADD_I64);
370 case ISD::ATOMIC_LOAD_SUB:
371 return SelectAtomic(Node,
372 AArch64::ATOMIC_LOAD_SUB_I8,
373 AArch64::ATOMIC_LOAD_SUB_I16,
374 AArch64::ATOMIC_LOAD_SUB_I32,
375 AArch64::ATOMIC_LOAD_SUB_I64);
376 case ISD::ATOMIC_LOAD_AND:
377 return SelectAtomic(Node,
378 AArch64::ATOMIC_LOAD_AND_I8,
379 AArch64::ATOMIC_LOAD_AND_I16,
380 AArch64::ATOMIC_LOAD_AND_I32,
381 AArch64::ATOMIC_LOAD_AND_I64);
382 case ISD::ATOMIC_LOAD_OR:
383 return SelectAtomic(Node,
384 AArch64::ATOMIC_LOAD_OR_I8,
385 AArch64::ATOMIC_LOAD_OR_I16,
386 AArch64::ATOMIC_LOAD_OR_I32,
387 AArch64::ATOMIC_LOAD_OR_I64);
388 case ISD::ATOMIC_LOAD_XOR:
389 return SelectAtomic(Node,
390 AArch64::ATOMIC_LOAD_XOR_I8,
391 AArch64::ATOMIC_LOAD_XOR_I16,
392 AArch64::ATOMIC_LOAD_XOR_I32,
393 AArch64::ATOMIC_LOAD_XOR_I64);
394 case ISD::ATOMIC_LOAD_NAND:
395 return SelectAtomic(Node,
396 AArch64::ATOMIC_LOAD_NAND_I8,
397 AArch64::ATOMIC_LOAD_NAND_I16,
398 AArch64::ATOMIC_LOAD_NAND_I32,
399 AArch64::ATOMIC_LOAD_NAND_I64);
400 case ISD::ATOMIC_LOAD_MIN:
401 return SelectAtomic(Node,
402 AArch64::ATOMIC_LOAD_MIN_I8,
403 AArch64::ATOMIC_LOAD_MIN_I16,
404 AArch64::ATOMIC_LOAD_MIN_I32,
405 AArch64::ATOMIC_LOAD_MIN_I64);
406 case ISD::ATOMIC_LOAD_MAX:
407 return SelectAtomic(Node,
408 AArch64::ATOMIC_LOAD_MAX_I8,
409 AArch64::ATOMIC_LOAD_MAX_I16,
410 AArch64::ATOMIC_LOAD_MAX_I32,
411 AArch64::ATOMIC_LOAD_MAX_I64);
412 case ISD::ATOMIC_LOAD_UMIN:
413 return SelectAtomic(Node,
414 AArch64::ATOMIC_LOAD_UMIN_I8,
415 AArch64::ATOMIC_LOAD_UMIN_I16,
416 AArch64::ATOMIC_LOAD_UMIN_I32,
417 AArch64::ATOMIC_LOAD_UMIN_I64);
418 case ISD::ATOMIC_LOAD_UMAX:
419 return SelectAtomic(Node,
420 AArch64::ATOMIC_LOAD_UMAX_I8,
421 AArch64::ATOMIC_LOAD_UMAX_I16,
422 AArch64::ATOMIC_LOAD_UMAX_I32,
423 AArch64::ATOMIC_LOAD_UMAX_I64);
424 case ISD::ATOMIC_SWAP:
425 return SelectAtomic(Node,
426 AArch64::ATOMIC_SWAP_I8,
427 AArch64::ATOMIC_SWAP_I16,
428 AArch64::ATOMIC_SWAP_I32,
429 AArch64::ATOMIC_SWAP_I64);
430 case ISD::ATOMIC_CMP_SWAP:
431 return SelectAtomic(Node,
432 AArch64::ATOMIC_CMP_SWAP_I8,
433 AArch64::ATOMIC_CMP_SWAP_I16,
434 AArch64::ATOMIC_CMP_SWAP_I32,
435 AArch64::ATOMIC_CMP_SWAP_I64);
330436 case ISD::FrameIndex: {
331437 int FI = cast(Node)->getIndex();
332438 EVT PtrTy = TLI.getPointerTy();
5858
5959 computeRegisterProperties();
6060
61 // Some atomic operations can be folded into load-acquire or store-release
62 // instructions on AArch64. It's marginally simpler to let LLVM expand
63 // everything out to a barrier and then recombine the (few) barriers we can.
64 setInsertFencesForAtomic(true);
65 setTargetDAGCombine(ISD::ATOMIC_FENCE);
66 setTargetDAGCombine(ISD::ATOMIC_STORE);
61 // We have particularly efficient implementations of atomic fences if they can
62 // be combined with nearby atomic loads and stores.
63 setShouldFoldAtomicFences(true);
6764
6865 // We combine OR nodes for bitfield and NEON BSL operations.
6966 setTargetDAGCombine(ISD::OR);
274271 return VT.changeVectorElementTypeToInteger();
275272 }
276273
277 static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
278 unsigned &strOpc) {
279 switch (Size) {
280 default: llvm_unreachable("unsupported size for atomic binary op!");
281 case 1:
282 ldrOpc = AArch64::LDXR_byte;
283 strOpc = AArch64::STXR_byte;
284 break;
285 case 2:
286 ldrOpc = AArch64::LDXR_hword;
287 strOpc = AArch64::STXR_hword;
288 break;
289 case 4:
290 ldrOpc = AArch64::LDXR_word;
291 strOpc = AArch64::STXR_word;
292 break;
293 case 8:
294 ldrOpc = AArch64::LDXR_dword;
295 strOpc = AArch64::STXR_dword;
296 break;
297 }
274 static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
275 unsigned &LdrOpc,
276 unsigned &StrOpc) {
277 static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
278 AArch64::LDXR_word, AArch64::LDXR_dword};
279 static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
280 AArch64::LDAXR_word, AArch64::LDAXR_dword};
281 static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
282 AArch64::STXR_word, AArch64::STXR_dword};
283 static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
284 AArch64::STLXR_word, AArch64::STLXR_dword};
285
286 unsigned *LoadOps, *StoreOps;
287 if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
288 LoadOps = LoadAcqs;
289 else
290 LoadOps = LoadBares;
291
292 if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
293 StoreOps = StoreRels;
294 else
295 StoreOps = StoreBares;
296
297 assert(isPowerOf2_32(Size) && Size <= 8 &&
298 "unsupported size for atomic binary op!");
299
300 LdrOpc = LoadOps[Log2_32(Size)];
301 StrOpc = StoreOps[Log2_32(Size)];
298302 }
299303
300304 MachineBasicBlock *
312316 unsigned dest = MI->getOperand(0).getReg();
313317 unsigned ptr = MI->getOperand(1).getReg();
314318 unsigned incr = MI->getOperand(2).getReg();
319 AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm());
315320 DebugLoc dl = MI->getDebugLoc();
316321
317322 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
318323
319324 unsigned ldrOpc, strOpc;
320 getExclusiveOperation(Size, ldrOpc, strOpc);
325 getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
321326
322327 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
323328 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
396401 unsigned dest = MI->getOperand(0).getReg();
397402 unsigned ptr = MI->getOperand(1).getReg();
398403 unsigned incr = MI->getOperand(2).getReg();
404 AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm());
405
399406 unsigned oldval = dest;
400407 DebugLoc dl = MI->getDebugLoc();
401408
410417 }
411418
412419 unsigned ldrOpc, strOpc;
413 getExclusiveOperation(Size, ldrOpc, strOpc);
420 getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
414421
415422 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
416423 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
478485 unsigned ptr = MI->getOperand(1).getReg();
479486 unsigned oldval = MI->getOperand(2).getReg();
480487 unsigned newval = MI->getOperand(3).getReg();
488 AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm());
481489 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
482490 DebugLoc dl = MI->getDebugLoc();
483491
486494 TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
487495
488496 unsigned ldrOpc, strOpc;
489 getExclusiveOperation(Size, ldrOpc, strOpc);
497 getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
490498
491499 MachineFunction *MF = BB->getParent();
492500 const BasicBlock *LLVM_BB = BB->getBasicBlock();
23762384 DAG.getConstant(LSB + Width - 1, MVT::i64));
23772385 }
23782386
2379 static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
2380 TargetLowering::DAGCombinerInfo &DCI) {
2381 // An atomic operation followed by an acquiring atomic fence can be reduced to
2382 // an acquiring load. The atomic operation provides a convenient pointer to
2383 // load from. If the original operation was a load anyway we can actually
2384 // combine the two operations into an acquiring load.
2385 SelectionDAG &DAG = DCI.DAG;
2386 SDValue AtomicOp = FenceNode->getOperand(0);
2387 AtomicSDNode *AtomicNode = dyn_cast(AtomicOp);
2388
2389 // A fence on its own can't be optimised
2390 if (!AtomicNode)
2391 return SDValue();
2392
2393 AtomicOrdering FenceOrder
2394 = static_cast(FenceNode->getConstantOperandVal(1));
2395 SynchronizationScope FenceScope
2396 = static_cast(FenceNode->getConstantOperandVal(2));
2397
2398 if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
2399 return SDValue();
2400
2401 // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
2402 // the chain we use should be its input, otherwise we'll put our store after
2403 // it so we use its output chain.
2404 SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
2405 AtomicNode->getChain() : AtomicOp;
2406
2407 // We have an acquire fence with a handy atomic operation nearby, we can
2408 // convert the fence into a load-acquire, discarding the result.
2409 DebugLoc DL = FenceNode->getDebugLoc();
2410 SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
2411 AtomicNode->getValueType(0),
2412 Chain, // Chain
2413 AtomicOp.getOperand(1), // Pointer
2414 AtomicNode->getMemOperand(), Acquire,
2415 FenceScope);
2416
2417 if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
2418 DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
2419
2420 return Op.getValue(1);
2421 }
2422
2423 static SDValue PerformATOMIC_STORECombine(SDNode *N,
2424 TargetLowering::DAGCombinerInfo &DCI) {
2425 // A releasing atomic fence followed by an atomic store can be combined into a
2426 // single store operation.
2427 SelectionDAG &DAG = DCI.DAG;
2428 AtomicSDNode *AtomicNode = cast(N);
2429 SDValue FenceOp = AtomicNode->getOperand(0);
2430
2431 if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
2432 return SDValue();
2433
2434 AtomicOrdering FenceOrder
2435 = static_cast(FenceOp->getConstantOperandVal(1));
2436 SynchronizationScope FenceScope
2437 = static_cast(FenceOp->getConstantOperandVal(2));
2438
2439 if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
2440 return SDValue();
2441
2442 DebugLoc DL = AtomicNode->getDebugLoc();
2443 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
2444 FenceOp.getOperand(0), // Chain
2445 AtomicNode->getOperand(1), // Pointer
2446 AtomicNode->getOperand(2), // Value
2447 AtomicNode->getMemOperand(), Release,
2448 FenceScope);
2449 }
2450
24512387 /// For a true bitfield insert, the bits getting into that contiguous mask
24522388 /// should come from the low part of an existing value: they must be formed from
24532389 /// a compatible SHL operation (unless they're already low). This function
28032739 switch (N->getOpcode()) {
28042740 default: break;
28052741 case ISD::AND: return PerformANDCombine(N, DCI);
2806 case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
2807 case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
28082742 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
28092743 case ISD::SRA: return PerformSRACombine(N, DCI);
28102744 }
158158 // Atomic operation pseudo-instructions
159159 //===----------------------------------------------------------------------===//
160160
161 let usesCustomInserter = 1 in {
162 multiclass AtomicSizes {
163 def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
164 [(set i32:$dst, (!cast(opname # "_8") i64:$ptr, i32:$incr))]>;
165 def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
166 [(set i32:$dst, (!cast(opname # "_16") i64:$ptr, i32:$incr))]>;
167 def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
168 [(set i32:$dst, (!cast(opname # "_32") i64:$ptr, i32:$incr))]>;
169 def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
170 [(set i64:$dst, (!cast(opname # "_64") i64:$ptr, i64:$incr))]>;
171 }
172 }
173
174 defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">;
175 defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">;
176 defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">;
177 defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">;
178 defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">;
179 defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
180 defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">;
161 // These get selected from C++ code as a pretty much direct translation from the
162 // generic DAG nodes. The one exception is the AtomicOrdering is added as an
163 // operand so that the eventual lowering can make use of it and choose
164 // acquire/release operations when required.
165
166 let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
167 multiclass AtomicSizes {
168 def _I8 : PseudoInst<(outs GPR32:$dst),
169 (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
170 def _I16 : PseudoInst<(outs GPR32:$dst),
171 (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
172 def _I32 : PseudoInst<(outs GPR32:$dst),
173 (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
174 def _I64 : PseudoInst<(outs GPR64:$dst),
175 (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
176 }
177 }
178
179 defm ATOMIC_LOAD_ADD : AtomicSizes;
180 defm ATOMIC_LOAD_SUB : AtomicSizes;
181 defm ATOMIC_LOAD_AND : AtomicSizes;
182 defm ATOMIC_LOAD_OR : AtomicSizes;
183 defm ATOMIC_LOAD_XOR : AtomicSizes;
184 defm ATOMIC_LOAD_NAND : AtomicSizes;
185 defm ATOMIC_SWAP : AtomicSizes;
181186 let Defs = [NZCV] in {
182187 // These operations need a CMP to calculate the correct value
183 defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">;
184 defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">;
185 defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
186 defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
187 }
188
189 let usesCustomInserter = 1, Defs = [NZCV] in {
190 def ATOMIC_CMP_SWAP_I8
191 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
192 [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
193 def ATOMIC_CMP_SWAP_I16
194 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
195 [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
196 def ATOMIC_CMP_SWAP_I32
197 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
198 [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
199 def ATOMIC_CMP_SWAP_I64
200 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
201 [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
202 }
188 defm ATOMIC_LOAD_MIN : AtomicSizes;
189 defm ATOMIC_LOAD_MAX : AtomicSizes;
190 defm ATOMIC_LOAD_UMIN : AtomicSizes;
191 defm ATOMIC_LOAD_UMAX : AtomicSizes;
192 }
193
194 class AtomicCmpSwap
195 : PseudoInst<(outs GPRData:$dst),
196 (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
197 i32imm:$ordering), []> {
198 let usesCustomInserter = 1;
199 let hasCtrlDep = 1;
200 let mayLoad = 1;
201 let mayStore = 1;
202 let Defs = [NZCV];
203 }
204
205 def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap;
206 def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap;
207 def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap;
208 def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap;
203209
204210 //===----------------------------------------------------------------------===//
205211 // Add-subtract (extended register) instructions
25782584
25792585 class acquiring_load
25802586 : PatFrag<(ops node:$ptr), (base node:$ptr), [{
2581 return cast(N)->getOrdering() == Acquire;
2587 AtomicOrdering Ordering = cast(N)->getOrdering();
2588 return Ordering == Acquire || Ordering == SequentiallyConsistent;
25822589 }]>;
25832590
25842591 def atomic_load_acquire_8 : acquiring_load;
26092616
26102617 class releasing_store
26112618 : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
2612 return cast(N)->getOrdering() == Release;
2619 AtomicOrdering Ordering = cast(N)->getOrdering();
2620 return Ordering == Release || Ordering == SequentiallyConsistent;
26132621 }]>;
26142622
26152623 def atomic_store_release_8 : releasing_store;
None ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
0 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
11
22 define i32 @foo(i32* %var, i1 %cond) {
33 ; CHECK: foo:
88 store i32 %newval, i32* %var
99 br label %somewhere
1010 atomic_ver:
11 %val = atomicrmw add i32* %var, i32 -1 seq_cst
11 fence seq_cst
12 %val = atomicrmw add i32* %var, i32 -1 monotonic
13 fence seq_cst
1214 br label %somewhere
1315 ; CHECK: dmb
1416 ; CHECK: ldxr
77 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
88 ; CHECK: test_atomic_load_add_i8:
99 %old = atomicrmw add i8* @var8, i8 %offset seq_cst
10 ; CHECK: dmb ish
11 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
12 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
13
14 ; CHECK: .LBB{{[0-9]+}}_1:
15 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
10 ; CHECK-NOT: dmb
11 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
12 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
13
14 ; CHECK: .LBB{{[0-9]+}}_1:
15 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
1616 ; w0 below is a reasonable guess but could change: it certainly comes into the
1717 ; function there.
1818 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
19 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
20 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
21 ; CHECK: dmb ish
19 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
20 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
21 ; CHECK-NOT: dmb
2222
2323 ; CHECK: mov x0, x[[OLD]]
2424 ret i8 %old
2626
2727 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
2828 ; CHECK: test_atomic_load_add_i16:
29 %old = atomicrmw add i16* @var16, i16 %offset seq_cst
30 ; CHECK: dmb ish
29 %old = atomicrmw add i16* @var16, i16 %offset acquire
30 ; CHECK-NOT: dmb
3131 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
3232 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
3333
3434 ; CHECK: .LBB{{[0-9]+}}_1:
35 ; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
35 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
3636 ; w0 below is a reasonable guess but could change: it certainly comes into the
3737 ; function there.
3838 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
3939 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
4040 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
41 ; CHECK: dmb ish
41 ; CHECK-NOT: dmb
4242
4343 ; CHECK: mov x0, x[[OLD]]
4444 ret i16 %old
4646
4747 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
4848 ; CHECK: test_atomic_load_add_i32:
49 %old = atomicrmw add i32* @var32, i32 %offset seq_cst
50 ; CHECK: dmb ish
49 %old = atomicrmw add i32* @var32, i32 %offset release
50 ; CHECK-NOT: dmb
5151 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
5252 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
5353
5656 ; w0 below is a reasonable guess but could change: it certainly comes into the
5757 ; function there.
5858 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
59 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
60 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
61 ; CHECK: dmb ish
59 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
60 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
61 ; CHECK-NOT: dmb
6262
6363 ; CHECK: mov x0, x[[OLD]]
6464 ret i32 %old
6666
6767 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
6868 ; CHECK: test_atomic_load_add_i64:
69 %old = atomicrmw add i64* @var64, i64 %offset seq_cst
70 ; CHECK: dmb ish
69 %old = atomicrmw add i64* @var64, i64 %offset monotonic
70 ; CHECK-NOT: dmb
7171 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
7272 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
7373
7878 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
7979 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
8080 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
81 ; CHECK: dmb ish
81 ; CHECK-NOT: dmb
8282
8383 ; CHECK: mov x0, x[[OLD]]
8484 ret i64 %old
8686
8787 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
8888 ; CHECK: test_atomic_load_sub_i8:
89 %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
90 ; CHECK: dmb ish
89 %old = atomicrmw sub i8* @var8, i8 %offset monotonic
90 ; CHECK-NOT: dmb
9191 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
9292 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
9393
9898 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
9999 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
100100 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
101 ; CHECK: dmb ish
101 ; CHECK-NOT: dmb
102102
103103 ; CHECK: mov x0, x[[OLD]]
104104 ret i8 %old
106106
107107 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
108108 ; CHECK: test_atomic_load_sub_i16:
109 %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
110 ; CHECK: dmb ish
109 %old = atomicrmw sub i16* @var16, i16 %offset release
110 ; CHECK-NOT: dmb
111111 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
112112 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
113113
116116 ; w0 below is a reasonable guess but could change: it certainly comes into the
117117 ; function there.
118118 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
119 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
120 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
121 ; CHECK: dmb ish
119 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
120 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
121 ; CHECK-NOT: dmb
122122
123123 ; CHECK: mov x0, x[[OLD]]
124124 ret i16 %old
126126
127127 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
128128 ; CHECK: test_atomic_load_sub_i32:
129 %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
130 ; CHECK: dmb ish
129 %old = atomicrmw sub i32* @var32, i32 %offset acquire
130 ; CHECK-NOT: dmb
131131 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
132132 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
133133
134134 ; CHECK: .LBB{{[0-9]+}}_1:
135 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
135 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
136136 ; w0 below is a reasonable guess but could change: it certainly comes into the
137137 ; function there.
138138 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
139139 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
140140 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
141 ; CHECK: dmb ish
141 ; CHECK-NOT: dmb
142142
143143 ; CHECK: mov x0, x[[OLD]]
144144 ret i32 %old
147147 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
148148 ; CHECK: test_atomic_load_sub_i64:
149149 %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
150 ; CHECK: dmb ish
150 ; CHECK-NOT: dmb
151151 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
152152 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
153153
154154 ; CHECK: .LBB{{[0-9]+}}_1:
155 ; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
155 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
156156 ; x0 below is a reasonable guess but could change: it certainly comes into the
157157 ; function there.
158158 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
159 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
160 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
161 ; CHECK: dmb ish
159 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
160 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
161 ; CHECK-NOT: dmb
162162
163163 ; CHECK: mov x0, x[[OLD]]
164164 ret i64 %old
166166
167167 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
168168 ; CHECK: test_atomic_load_and_i8:
169 %old = atomicrmw and i8* @var8, i8 %offset seq_cst
170 ; CHECK: dmb ish
169 %old = atomicrmw and i8* @var8, i8 %offset release
170 ; CHECK-NOT: dmb
171171 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
172172 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
173173
176176 ; w0 below is a reasonable guess but could change: it certainly comes into the
177177 ; function there.
178178 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
179 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
180 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
181 ; CHECK: dmb ish
179 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
180 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
181 ; CHECK-NOT: dmb
182182
183183 ; CHECK: mov x0, x[[OLD]]
184184 ret i8 %old
186186
187187 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
188188 ; CHECK: test_atomic_load_and_i16:
189 %old = atomicrmw and i16* @var16, i16 %offset seq_cst
190 ; CHECK: dmb ish
189 %old = atomicrmw and i16* @var16, i16 %offset monotonic
190 ; CHECK-NOT: dmb
191191 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
192192 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
193193
198198 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
199199 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
200200 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
201 ; CHECK: dmb ish
201 ; CHECK-NOT: dmb
202202
203203 ; CHECK: mov x0, x[[OLD]]
204204 ret i16 %old
207207 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
208208 ; CHECK: test_atomic_load_and_i32:
209209 %old = atomicrmw and i32* @var32, i32 %offset seq_cst
210 ; CHECK: dmb ish
210 ; CHECK-NOT: dmb
211211 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
212212 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
213213
214214 ; CHECK: .LBB{{[0-9]+}}_1:
215 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
215 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
216216 ; w0 below is a reasonable guess but could change: it certainly comes into the
217217 ; function there.
218218 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
219 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
220 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
221 ; CHECK: dmb ish
219 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
220 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
221 ; CHECK-NOT: dmb
222222
223223 ; CHECK: mov x0, x[[OLD]]
224224 ret i32 %old
226226
227227 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
228228 ; CHECK: test_atomic_load_and_i64:
229 %old = atomicrmw and i64* @var64, i64 %offset seq_cst
230 ; CHECK: dmb ish
229 %old = atomicrmw and i64* @var64, i64 %offset acquire
230 ; CHECK-NOT: dmb
231231 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
232232 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
233233
234234 ; CHECK: .LBB{{[0-9]+}}_1:
235 ; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
235 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
236236 ; x0 below is a reasonable guess but could change: it certainly comes into the
237237 ; function there.
238238 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
239239 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
240240 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
241 ; CHECK: dmb ish
241 ; CHECK-NOT: dmb
242242
243243 ; CHECK: mov x0, x[[OLD]]
244244 ret i64 %old
247247 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
248248 ; CHECK: test_atomic_load_or_i8:
249249 %old = atomicrmw or i8* @var8, i8 %offset seq_cst
250 ; CHECK: dmb ish
251 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
252 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
253
254 ; CHECK: .LBB{{[0-9]+}}_1:
255 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
250 ; CHECK-NOT: dmb
251 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
252 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
253
254 ; CHECK: .LBB{{[0-9]+}}_1:
255 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
256256 ; w0 below is a reasonable guess but could change: it certainly comes into the
257257 ; function there.
258258 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
259 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
260 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
261 ; CHECK: dmb ish
259 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
260 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
261 ; CHECK-NOT: dmb
262262
263263 ; CHECK: mov x0, x[[OLD]]
264264 ret i8 %old
266266
267267 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
268268 ; CHECK: test_atomic_load_or_i16:
269 %old = atomicrmw or i16* @var16, i16 %offset seq_cst
270 ; CHECK: dmb ish
269 %old = atomicrmw or i16* @var16, i16 %offset monotonic
270 ; CHECK-NOT: dmb
271271 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
272272 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
273273
278278 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
279279 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
280280 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
281 ; CHECK: dmb ish
281 ; CHECK-NOT: dmb
282282
283283 ; CHECK: mov x0, x[[OLD]]
284284 ret i16 %old
286286
287287 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
288288 ; CHECK: test_atomic_load_or_i32:
289 %old = atomicrmw or i32* @var32, i32 %offset seq_cst
290 ; CHECK: dmb ish
289 %old = atomicrmw or i32* @var32, i32 %offset acquire
290 ; CHECK-NOT: dmb
291291 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
292292 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
293293
294294 ; CHECK: .LBB{{[0-9]+}}_1:
295 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
295 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
296296 ; w0 below is a reasonable guess but could change: it certainly comes into the
297297 ; function there.
298298 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
299299 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
300300 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
301 ; CHECK: dmb ish
301 ; CHECK-NOT: dmb
302302
303303 ; CHECK: mov x0, x[[OLD]]
304304 ret i32 %old
306306
307307 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
308308 ; CHECK: test_atomic_load_or_i64:
309 %old = atomicrmw or i64* @var64, i64 %offset seq_cst
310 ; CHECK: dmb ish
309 %old = atomicrmw or i64* @var64, i64 %offset release
310 ; CHECK-NOT: dmb
311311 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
312312 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
313313
316316 ; x0 below is a reasonable guess but could change: it certainly comes into the
317317 ; function there.
318318 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
319 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
320 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
321 ; CHECK: dmb ish
319 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
320 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
321 ; CHECK-NOT: dmb
322322
323323 ; CHECK: mov x0, x[[OLD]]
324324 ret i64 %old
326326
327327 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
328328 ; CHECK: test_atomic_load_xor_i8:
329 %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
330 ; CHECK: dmb ish
331 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
332 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
333
334 ; CHECK: .LBB{{[0-9]+}}_1:
335 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
329 %old = atomicrmw xor i8* @var8, i8 %offset acquire
330 ; CHECK-NOT: dmb
331 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
332 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
333
334 ; CHECK: .LBB{{[0-9]+}}_1:
335 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
336336 ; w0 below is a reasonable guess but could change: it certainly comes into the
337337 ; function there.
338338 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
339339 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
340340 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
341 ; CHECK: dmb ish
341 ; CHECK-NOT: dmb
342342
343343 ; CHECK: mov x0, x[[OLD]]
344344 ret i8 %old
346346
347347 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
348348 ; CHECK: test_atomic_load_xor_i16:
349 %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
350 ; CHECK: dmb ish
349 %old = atomicrmw xor i16* @var16, i16 %offset release
350 ; CHECK-NOT: dmb
351351 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
352352 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
353353
356356 ; w0 below is a reasonable guess but could change: it certainly comes into the
357357 ; function there.
358358 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
359 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
360 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
361 ; CHECK: dmb ish
359 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
360 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
361 ; CHECK-NOT: dmb
362362
363363 ; CHECK: mov x0, x[[OLD]]
364364 ret i16 %old
367367 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
368368 ; CHECK: test_atomic_load_xor_i32:
369369 %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
370 ; CHECK: dmb ish
370 ; CHECK-NOT: dmb
371371 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
372372 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
373373
374374 ; CHECK: .LBB{{[0-9]+}}_1:
375 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
375 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
376376 ; w0 below is a reasonable guess but could change: it certainly comes into the
377377 ; function there.
378378 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
379 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
380 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
381 ; CHECK: dmb ish
379 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
380 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
381 ; CHECK-NOT: dmb
382382
383383 ; CHECK: mov x0, x[[OLD]]
384384 ret i32 %old
386386
387387 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
388388 ; CHECK: test_atomic_load_xor_i64:
389 %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
390 ; CHECK: dmb ish
389 %old = atomicrmw xor i64* @var64, i64 %offset monotonic
390 ; CHECK-NOT: dmb
391391 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
392392 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
393393
398398 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
399399 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
400400 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
401 ; CHECK: dmb ish
401 ; CHECK-NOT: dmb
402402
403403 ; CHECK: mov x0, x[[OLD]]
404404 ret i64 %old
406406
407407 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
408408 ; CHECK: test_atomic_load_xchg_i8:
409 %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
410 ; CHECK: dmb ish
409 %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
410 ; CHECK-NOT: dmb
411411 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
412412 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
413413
417417 ; function there.
418418 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
419419 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
420 ; CHECK: dmb ish
420 ; CHECK-NOT: dmb
421421
422422 ; CHECK: mov x0, x[[OLD]]
423423 ret i8 %old
426426 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
427427 ; CHECK: test_atomic_load_xchg_i16:
428428 %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
429 ; CHECK: dmb ish
429 ; CHECK-NOT: dmb
430430 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
431431 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
432432
433433 ; CHECK: .LBB{{[0-9]+}}_1:
434 ; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
435 ; w0 below is a reasonable guess but could change: it certainly comes into the
436 ; function there.
437 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
438 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
439 ; CHECK: dmb ish
434 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
435 ; w0 below is a reasonable guess but could change: it certainly comes into the
436 ; function there.
437 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
438 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
439 ; CHECK-NOT: dmb
440440
441441 ; CHECK: mov x0, x[[OLD]]
442442 ret i16 %old
444444
445445 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
446446 ; CHECK: test_atomic_load_xchg_i32:
447 %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
448 ; CHECK: dmb ish
447 %old = atomicrmw xchg i32* @var32, i32 %offset release
448 ; CHECK-NOT: dmb
449449 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
450450 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
451451
453453 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
454454 ; w0 below is a reasonable guess but could change: it certainly comes into the
455455 ; function there.
456 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
457 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
458 ; CHECK: dmb ish
456 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
457 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
458 ; CHECK-NOT: dmb
459459
460460 ; CHECK: mov x0, x[[OLD]]
461461 ret i32 %old
463463
464464 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
465465 ; CHECK: test_atomic_load_xchg_i64:
466 %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
467 ; CHECK: dmb ish
466 %old = atomicrmw xchg i64* @var64, i64 %offset acquire
467 ; CHECK-NOT: dmb
468468 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
469469 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
470470
471471 ; CHECK: .LBB{{[0-9]+}}_1:
472 ; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
472 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
473473 ; x0 below is a reasonable guess but could change: it certainly comes into the
474474 ; function there.
475475 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
476476 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
477 ; CHECK: dmb ish
477 ; CHECK-NOT: dmb
478478
479479 ; CHECK: mov x0, x[[OLD]]
480480 ret i64 %old
483483
484484 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
485485 ; CHECK: test_atomic_load_min_i8:
486 %old = atomicrmw min i8* @var8, i8 %offset seq_cst
487 ; CHECK: dmb ish
488 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
489 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
490
491 ; CHECK: .LBB{{[0-9]+}}_1:
492 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
486 %old = atomicrmw min i8* @var8, i8 %offset acquire
487 ; CHECK-NOT: dmb
488 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
489 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
490
491 ; CHECK: .LBB{{[0-9]+}}_1:
492 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
493493 ; w0 below is a reasonable guess but could change: it certainly comes into the
494494 ; function there.
495495 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
496496 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
497497 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
498498 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
499 ; CHECK: dmb ish
499 ; CHECK-NOT: dmb
500500
501501 ; CHECK: mov x0, x[[OLD]]
502502 ret i8 %old
504504
505505 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
506506 ; CHECK: test_atomic_load_min_i16:
507 %old = atomicrmw min i16* @var16, i16 %offset seq_cst
508 ; CHECK: dmb ish
507 %old = atomicrmw min i16* @var16, i16 %offset release
508 ; CHECK-NOT: dmb
509509 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
510510 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
511511
515515 ; function there.
516516 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
517517 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
518 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
519 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
520 ; CHECK: dmb ish
518 ; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
519 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
520 ; CHECK-NOT: dmb
521521
522522 ; CHECK: mov x0, x[[OLD]]
523523 ret i16 %old
525525
526526 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
527527 ; CHECK: test_atomic_load_min_i32:
528 %old = atomicrmw min i32* @var32, i32 %offset seq_cst
529 ; CHECK: dmb ish
528 %old = atomicrmw min i32* @var32, i32 %offset monotonic
529 ; CHECK-NOT: dmb
530530 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
531531 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
532532
538538 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
539539 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
540540 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
541 ; CHECK: dmb ish
541 ; CHECK-NOT: dmb
542542
543543 ; CHECK: mov x0, x[[OLD]]
544544 ret i32 %old
547547 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
548548 ; CHECK: test_atomic_load_min_i64:
549549 %old = atomicrmw min i64* @var64, i64 %offset seq_cst
550 ; CHECK: dmb ish
550 ; CHECK-NOT: dmb
551551 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
552552 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
553553
554554 ; CHECK: .LBB{{[0-9]+}}_1:
555 ; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
555 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
556556 ; x0 below is a reasonable guess but could change: it certainly comes into the
557557 ; function there.
558558 ; CHECK-NEXT: cmp x0, x[[OLD]]
559559 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
560 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
561 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
562 ; CHECK: dmb ish
560 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
561 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
562 ; CHECK-NOT: dmb
563563
564564 ; CHECK: mov x0, x[[OLD]]
565565 ret i64 %old
568568 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
569569 ; CHECK: test_atomic_load_max_i8:
570570 %old = atomicrmw max i8* @var8, i8 %offset seq_cst
571 ; CHECK: dmb ish
572 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
573 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
574
575 ; CHECK: .LBB{{[0-9]+}}_1:
576 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
571 ; CHECK-NOT: dmb
572 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
573 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
574
575 ; CHECK: .LBB{{[0-9]+}}_1:
576 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
577577 ; w0 below is a reasonable guess but could change: it certainly comes into the
578578 ; function there.
579579 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
580580 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
581 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
582 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
583 ; CHECK: dmb ish
581 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
582 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
583 ; CHECK-NOT: dmb
584584
585585 ; CHECK: mov x0, x[[OLD]]
586586 ret i8 %old
588588
589589 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
590590 ; CHECK: test_atomic_load_max_i16:
591 %old = atomicrmw max i16* @var16, i16 %offset seq_cst
592 ; CHECK: dmb ish
591 %old = atomicrmw max i16* @var16, i16 %offset acquire
592 ; CHECK-NOT: dmb
593593 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
594594 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
595595
596596 ; CHECK: .LBB{{[0-9]+}}_1:
597 ; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
597 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
598598 ; w0 below is a reasonable guess but could change: it certainly comes into the
599599 ; function there.
600600 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
601601 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
602602 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
603603 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
604 ; CHECK: dmb ish
604 ; CHECK-NOT: dmb
605605
606606 ; CHECK: mov x0, x[[OLD]]
607607 ret i16 %old
609609
610610 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
611611 ; CHECK: test_atomic_load_max_i32:
612 %old = atomicrmw max i32* @var32, i32 %offset seq_cst
613 ; CHECK: dmb ish
612 %old = atomicrmw max i32* @var32, i32 %offset release
613 ; CHECK-NOT: dmb
614614 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
615615 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
616616
620620 ; function there.
621621 ; CHECK-NEXT: cmp w0, w[[OLD]]
622622 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
623 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
624 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
625 ; CHECK: dmb ish
623 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
624 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
625 ; CHECK-NOT: dmb
626626
627627 ; CHECK: mov x0, x[[OLD]]
628628 ret i32 %old
630630
631631 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
632632 ; CHECK: test_atomic_load_max_i64:
633 %old = atomicrmw max i64* @var64, i64 %offset seq_cst
634 ; CHECK: dmb ish
633 %old = atomicrmw max i64* @var64, i64 %offset monotonic
634 ; CHECK-NOT: dmb
635635 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
636636 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
637637
643643 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
644644 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
645645 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
646 ; CHECK: dmb ish
646 ; CHECK-NOT: dmb
647647
648648 ; CHECK: mov x0, x[[OLD]]
649649 ret i64 %old
651651
652652 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
653653 ; CHECK: test_atomic_load_umin_i8:
654 %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
655 ; CHECK: dmb ish
654 %old = atomicrmw umin i8* @var8, i8 %offset monotonic
655 ; CHECK-NOT: dmb
656656 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
657657 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
658658
664664 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
665665 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
666666 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
667 ; CHECK: dmb ish
667 ; CHECK-NOT: dmb
668668
669669 ; CHECK: mov x0, x[[OLD]]
670670 ret i8 %old
672672
673673 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
674674 ; CHECK: test_atomic_load_umin_i16:
675 %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
676 ; CHECK: dmb ish
675 %old = atomicrmw umin i16* @var16, i16 %offset acquire
676 ; CHECK-NOT: dmb
677677 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
678678 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
679679
680680 ; CHECK: .LBB{{[0-9]+}}_1:
681 ; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
681 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
682682 ; w0 below is a reasonable guess but could change: it certainly comes into the
683683 ; function there.
684684 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
685685 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
686686 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
687687 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
688 ; CHECK: dmb ish
688 ; CHECK-NOT: dmb
689689
690690 ; CHECK: mov x0, x[[OLD]]
691691 ret i16 %old
694694 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
695695 ; CHECK: test_atomic_load_umin_i32:
696696 %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
697 ; CHECK: dmb ish
697 ; CHECK-NOT: dmb
698698 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
699699 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
700700
701701 ; CHECK: .LBB{{[0-9]+}}_1:
702 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
702 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
703703 ; w0 below is a reasonable guess but could change: it certainly comes into the
704704 ; function there.
705705 ; CHECK-NEXT: cmp w0, w[[OLD]]
706706 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
707 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
708 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
709 ; CHECK: dmb ish
707 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
708 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
709 ; CHECK-NOT: dmb
710710
711711 ; CHECK: mov x0, x[[OLD]]
712712 ret i32 %old
714714
715715 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
716716 ; CHECK: test_atomic_load_umin_i64:
717 %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
718 ; CHECK: dmb ish
717 %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
718 ; CHECK-NOT: dmb
719719 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
720720 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
721721
722722 ; CHECK: .LBB{{[0-9]+}}_1:
723 ; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
723 ; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
724724 ; x0 below is a reasonable guess but could change: it certainly comes into the
725725 ; function there.
726726 ; CHECK-NEXT: cmp x0, x[[OLD]]
727727 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
728 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
729 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
730 ; CHECK: dmb ish
728 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
729 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
730 ; CHECK-NOT: dmb
731731
732732 ; CHECK: mov x0, x[[OLD]]
733733 ret i64 %old
735735
736736 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
737737 ; CHECK: test_atomic_load_umax_i8:
738 %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
739 ; CHECK: dmb ish
740 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
741 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
742
743 ; CHECK: .LBB{{[0-9]+}}_1:
744 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
738 %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
739 ; CHECK-NOT: dmb
740 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
741 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
742
743 ; CHECK: .LBB{{[0-9]+}}_1:
744 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
745745 ; w0 below is a reasonable guess but could change: it certainly comes into the
746746 ; function there.
747747 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
748748 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
749 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
750 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
751 ; CHECK: dmb ish
749 ; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
750 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
751 ; CHECK-NOT: dmb
752752
753753 ; CHECK: mov x0, x[[OLD]]
754754 ret i8 %old
756756
757757 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
758758 ; CHECK: test_atomic_load_umax_i16:
759 %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
760 ; CHECK: dmb ish
759 %old = atomicrmw umax i16* @var16, i16 %offset monotonic
760 ; CHECK-NOT: dmb
761761 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
762762 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
763763
769769 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
770770 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
771771 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
772 ; CHECK: dmb ish
772 ; CHECK-NOT: dmb
773773
774774 ; CHECK: mov x0, x[[OLD]]
775775 ret i16 %old
778778 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
779779 ; CHECK: test_atomic_load_umax_i32:
780780 %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
781 ; CHECK: dmb ish
781 ; CHECK-NOT: dmb
782782 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
783783 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
784784
785785 ; CHECK: .LBB{{[0-9]+}}_1:
786 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
786 ; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
787787 ; w0 below is a reasonable guess but could change: it certainly comes into the
788788 ; function there.
789789 ; CHECK-NEXT: cmp w0, w[[OLD]]
790790 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
791 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
792 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
793 ; CHECK: dmb ish
791 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
792 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
793 ; CHECK-NOT: dmb
794794
795795 ; CHECK: mov x0, x[[OLD]]
796796 ret i32 %old
798798
799799 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
800800 ; CHECK: test_atomic_load_umax_i64:
801 %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
802 ; CHECK: dmb ish
801 %old = atomicrmw umax i64* @var64, i64 %offset release
802 ; CHECK-NOT: dmb
803803 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
804804 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
805805
809809 ; function there.
810810 ; CHECK-NEXT: cmp x0, x[[OLD]]
811811 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
812 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
813 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
814 ; CHECK: dmb ish
812 ; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
813 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
814 ; CHECK-NOT: dmb
815815
816816 ; CHECK: mov x0, x[[OLD]]
817817 ret i64 %old
819819
820820 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
821821 ; CHECK: test_atomic_cmpxchg_i8:
822 %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
823 ; CHECK: dmb ish
822 %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
823 ; CHECK-NOT: dmb
824824 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
825825 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
826826
827827 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
828 ; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
828 ; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
829829 ; w0 below is a reasonable guess but could change: it certainly comes into the
830830 ; function there.
831831 ; CHECK-NEXT: cmp w[[OLD]], w0
833833 ; As above, w1 is a reasonable guess.
834834 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
835835 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
836 ; CHECK: dmb ish
836 ; CHECK-NOT: dmb
837837
838838 ; CHECK: mov x0, x[[OLD]]
839839 ret i8 %old
842842 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
843843 ; CHECK: test_atomic_cmpxchg_i16:
844844 %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
845 ; CHECK: dmb ish
845 ; CHECK-NOT: dmb
846846 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
847847 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
848848
849849 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
850 ; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
850 ; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
851851 ; w0 below is a reasonable guess but could change: it certainly comes into the
852852 ; function there.
853853 ; CHECK-NEXT: cmp w[[OLD]], w0
854854 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
855855 ; As above, w1 is a reasonable guess.
856 ; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
856 ; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
857857 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
858 ; CHECK: dmb ish
858 ; CHECK-NOT: dmb
859859
860860 ; CHECK: mov x0, x[[OLD]]
861861 ret i16 %old
863863
864864 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
865865 ; CHECK: test_atomic_cmpxchg_i32:
866 %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
867 ; CHECK: dmb ish
866 %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
867 ; CHECK-NOT: dmb
868868 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
869869 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
870870
875875 ; CHECK-NEXT: cmp w[[OLD]], w0
876876 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
877877 ; As above, w1 is a reasonable guess.
878 ; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
878 ; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
879879 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
880 ; CHECK: dmb ish
880 ; CHECK-NOT: dmb
881881
882882 ; CHECK: mov x0, x[[OLD]]
883883 ret i32 %old
885885
886886 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
887887 ; CHECK: test_atomic_cmpxchg_i64:
888 %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
889 ; CHECK: dmb ish
888 %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
889 ; CHECK-NOT: dmb
890890 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
891891 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
892892
899899 ; As above, w1 is a reasonable guess.
900900 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
901901 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
902 ; CHECK: dmb ish
902 ; CHECK-NOT: dmb
903903
904904 ; CHECK: mov x0, x[[OLD]]
905905 ret i64 %old
932932 define i8 @test_atomic_load_acquire_i8() nounwind {
933933 ; CHECK: test_atomic_load_acquire_i8:
934934 %val = load atomic i8* @var8 acquire, align 1
935 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
936 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
937
935 ; CHECK-NOT: dmb
936 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
937 ; CHECK-NOT: dmb
938 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
939 ; CHECK-NOT: dmb
938940 ; CHECK: ldarb w0, [x[[ADDR]]]
941 ; CHECK-NOT: dmb
939942 ret i8 %val
940943 }
941944
942945 define i8 @test_atomic_load_seq_cst_i8() nounwind {
943946 ; CHECK: test_atomic_load_seq_cst_i8:
944947 %val = load atomic i8* @var8 seq_cst, align 1
945 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
946 ; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
947 ; CHECK: dmb ish
948 ; CHECK-NOT: dmb
949 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
950 ; CHECK-NOT: dmb
951 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
952 ; CHECK-NOT: dmb
953 ; CHECK: ldarb w0, [x[[ADDR]]]
954 ; CHECK-NOT: dmb
948955 ret i8 %val
949956 }
950957
953960 %val = load atomic i16* @var16 monotonic, align 2
954961 ; CHECK-NOT: dmb
955962 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
963 ; CHECK-NOT: dmb
956964 ; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
957965 ; CHECK-NOT: dmb
958966
975983 define i64 @test_atomic_load_seq_cst_i64() nounwind {
976984 ; CHECK: test_atomic_load_seq_cst_i64:
977985 %val = load atomic i64* @var64 seq_cst, align 8
978 ; CHECK: adrp x[[HIADDR:[0-9]+]], var64
979 ; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
980 ; CHECK: dmb ish
986 ; CHECK-NOT: dmb
987 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
988 ; CHECK-NOT: dmb
989 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
990 ; CHECK-NOT: dmb
991 ; CHECK: ldar x0, [x[[ADDR]]]
992 ; CHECK-NOT: dmb
981993 ret i64 %val
982994 }
983995
10041016 define void @test_atomic_store_release_i8(i8 %val) nounwind {
10051017 ; CHECK: test_atomic_store_release_i8:
10061018 store atomic i8 %val, i8* @var8 release, align 1
1019 ; CHECK-NOT: dmb
10071020 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
1021 ; CHECK-NOT: dmb
10081022 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
1023 ; CHECK-NOT: dmb
10091024 ; CHECK: stlrb w0, [x[[ADDR]]]
1010
1025 ; CHECK-NOT: dmb
10111026 ret void
10121027 }
10131028
10141029 define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
10151030 ; CHECK: test_atomic_store_seq_cst_i8:
10161031 store atomic i8 %val, i8* @var8 seq_cst, align 1
1032 ; CHECK-NOT: dmb
10171033 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
1034 ; CHECK-NOT: dmb
10181035 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
1036 ; CHECK-NOT: dmb
10191037 ; CHECK: stlrb w0, [x[[ADDR]]]
1020 ; CHECK: dmb ish
1038 ; CHECK-NOT: dmb
10211039
10221040 ret void
10231041 }
10251043 define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
10261044 ; CHECK: test_atomic_store_monotonic_i16:
10271045 store atomic i16 %val, i16* @var16 monotonic, align 2
1046 ; CHECK-NOT: dmb
10281047 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
1048 ; CHECK-NOT: dmb
10291049 ; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
1030
1050 ; CHECK-NOT: dmb
10311051 ret void
10321052 }
10331053
10381058 %addr = inttoptr i64 %addr_int to i32*
10391059
10401060 store atomic i32 %val, i32* %addr monotonic, align 4
1061 ; CHECK-NOT: dmb
10411062 ; CHECK: str w2, [x0, x1]
1063 ; CHECK-NOT: dmb
10421064
10431065 ret void
10441066 }
10461068 define void @test_atomic_store_release_i64(i64 %val) nounwind {
10471069 ; CHECK: test_atomic_store_release_i64:
10481070 store atomic i64 %val, i64* @var64 release, align 8
1071 ; CHECK-NOT: dmb
10491072 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
1073 ; CHECK-NOT: dmb
10501074 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
1075 ; CHECK-NOT: dmb
10511076 ; CHECK: stlr x0, [x[[ADDR]]]
1052
1077 ; CHECK-NOT: dmb
10531078 ret void
10541079 }