llvm.org GIT mirror llvm / d8777df
Merging r353308 and r353383: ------------------------------------------------------------------------ r353308 | tnorthover | 2019-02-06 16:26:35 +0100 (Wed, 06 Feb 2019) | 5 lines AArch64: enforce even/odd register pairs for CASP instructions. ARMv8.1a CASP instructions need the first of the pair to be an even register (otherwise the encoding is unallocated). We enforced this during assembly, but not CodeGen before. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r353383 | tnorthover | 2019-02-07 11:35:34 +0100 (Thu, 07 Feb 2019) | 4 lines AArch64: implement copy for paired GPR registers. When doing 128-bit atomics using CASP we might need to copy a GPRPair to a different register, but that was unimplemented up to now. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@353822 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 1 year, 9 months ago
6 changed file(s) with 93 addition(s) and 6 deletion(s). Raw diff Collapse all Expand all
22912291 }
22922292 }
22932293
2294 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2295 MachineBasicBlock::iterator I,
2296 DebugLoc DL, unsigned DestReg,
2297 unsigned SrcReg, bool KillSrc,
2298 unsigned Opcode, unsigned ZeroReg,
2299 llvm::ArrayRef Indices) const {
2300 const TargetRegisterInfo *TRI = &getRegisterInfo();
2301 unsigned NumRegs = Indices.size();
2302
2303 #ifndef NDEBUG
2304 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2305 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2306 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2307 "GPR reg sequences should not be able to overlap");
2308 #endif
2309
2310 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2311 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2312 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2313 MIB.addReg(ZeroReg);
2314 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2315 MIB.addImm(0);
2316 }
2317 }
2318
22942319 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
22952320 MachineBasicBlock::iterator I,
22962321 const DebugLoc &DL, unsigned DestReg,
24272452 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
24282453 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
24292454 Indices);
2455 return;
2456 }
2457
2458 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2459 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2460 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2461 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2462 AArch64::XZR, Indices);
2463 return;
2464 }
2465
2466 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2467 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2468 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2469 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2470 AArch64::WZR, Indices);
24302471 return;
24312472 }
24322473
121121 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
122122 bool KillSrc, unsigned Opcode,
123123 llvm::ArrayRef Indices) const;
124 void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
125 DebugLoc DL, unsigned DestReg, unsigned SrcReg,
126 bool KillSrc, unsigned Opcode, unsigned ZeroReg,
127 llvm::ArrayRef Indices) const;
124128 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
125129 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
126130 bool KillSrc) const override;
648648 // ARMv8.1a atomic CASP register operands
649649
650650
651 def WSeqPairs : RegisterTuples<[sube32, subo32],
652 [(rotl GPR32, 0), (rotl GPR32, 1)]>;
653 def XSeqPairs : RegisterTuples<[sube64, subo64],
654 [(rotl GPR64, 0), (rotl GPR64, 1)]>;
651 def WSeqPairs : RegisterTuples<[sube32, subo32],
652 [(decimate (rotl GPR32, 0), 2),
653 (decimate (rotl GPR32, 1), 2)]>;
654 def XSeqPairs : RegisterTuples<[sube64, subo64],
655 [(decimate (rotl GPR64, 0), 2),
656 (decimate (rotl GPR64, 1), 2)]>;
655657
656658 def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
657659 (add WSeqPairs)>{
17781778 if (RegNo & 0x1)
17791779 return Fail;
17801780
1781 unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
1782 Inst.addOperand(MCOperand::createReg(Register));
1781 unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2);
1782 Inst.addOperand(MCOperand::createReg(Reg));
17831783 return Success;
17841784 }
17851785
0 ; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s
1
2 ; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM
3 ; doesn't allocate odd ones and that it can copy them around properly. N.b. we
4 ; don't actually check that they're sequential because FileCheck can't; odd/even
5 ; will have to be good enough.
6 define void @test_atomic_cmpxchg_i128_register_shuffling(i128* %addr, i128 %desired, i128 %new) nounwind {
7 ; CHECK-LABEL: test_atomic_cmpxchg_i128_register_shuffling:
8 ; CHECK-DAG: mov [[DESIRED_LO:x[0-9]*[02468]]], x1
9 ; CHECK-DAG: mov [[DESIRED_HI:x[0-9]*[13579]]], x2
10 ; CHECK-DAG: mov [[NEW_LO:x[0-9]*[02468]]], x3
11 ; CHECK-DAG: mov [[NEW_HI:x[0-9]*[13579]]], x4
12 ; CHECK: caspal [[DESIRED_LO]], [[DESIRED_HI]], [[NEW_LO]], [[NEW_HI]], [x0]
13
14 %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
15 ret void
16 }
0 # RUN: llc -o - %s -mtriple=aarch64-- -mattr=+v8.1a -run-pass=postrapseudos | FileCheck %s
1 ---
2 # CHECK-LABEL: name: copy_xseqpairs
3 name: copy_xseqpairs
4 body: |
5 bb.0:
6 ; CHECK: $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
7 ; CHECK: $x0 = ORRXrs $xzr, $x4, 0
8 ; CHECK: $x1 = ORRXrs $xzr, $x5, 0
9 $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
10 $x0_x1 = COPY $x4_x5
11 ...
12 ---
13 # CHECK-LABEL: name: copy_wseqpairs
14 name: copy_wseqpairs
15 body: |
16 bb.0:
17 ; CHECK: $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
18 ; CHECK: $w0 = ORRWrs $wzr, $w4, 0
19 ; CHECK: $w1 = ORRWrs $wzr, $w5, 0
20 $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
21 $w0_w1 = COPY $w4_w5
22 ...