llvm.org GIT mirror llvm / 5b2a284
[Sparc] Implement i64 load/store support for 32-bit sparc. The LDD/STD instructions can load/store a 64bit quantity from/to memory to/from a consecutive even/odd pair of (32-bit) registers. They are part of SparcV8, and also present in SparcV9. (Although deprecated there, as you can store 64bits in one register). As recommended on llvmdev in the thread "How to enable use of 64bit load/store for 32bit architecture" from Apr 2015, I've modeled the 64-bit load/store operations as working on a v2i32 type, rather than making i64 a legal type, but with few legal operations. The latter does not (currently) work, as there is much code in llvm which assumes that if i64 is legal, operations like "add" will actually work on it. The same assumption does not hold for v2i32 -- for vector types, it is workable to support only load/store, and expand everything else. This patch: - Adds a new register class, IntPair, for even/odd pairs of registers. - Modifies the list of reserved registers, the stack spilling code, and register copying code to support the IntPair register class. - Adds support in AsmParser. (note that in asm text, you write the name of the first register of the pair only. So the parser has to morph the single register into the equivalent paired register). - Adds the new instructions themselves (LDD/STD/LDDA/STDA). - Hooks up the instructions and registers as a vector type v2i32. Adds custom legalizer to transform i64 load/stores into v2i32 load/stores and bitcasts, so that the new instructions can actually be generated, and marks all operations other than load/store on v2i32 as needing to be expanded. - Copies the unfortunate SelectInlineAsm hack from ARMISelDAGToDAG. This hack undoes the transformation of i64 operands into two arbitrarily-allocated separate i32 registers in SelectionDAGBuilder. and instead passes them in a single IntPair. (Arbitrarily allocated registers are not useful, asm code expects to be receiving a pair, which can be passed to ldd/std.) Also adds a bunch of test cases covering all the bugs I've added along the way. Differential Revision: http://reviews.llvm.org/D8713 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244484 91177308-0d34-0410-b5e6-96231b3b80d8 James Y Knight 3 years ago
18 changed file(s) with 844 addition(s) and 59 deletion(s). Raw diff Collapse all Expand all
139139 SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
140140 SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
141141
142 static unsigned IntPairRegs[] = {
143 Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7,
144 Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7,
145 Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7,
146 Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7};
147
142148 /// SparcOperand - Instances of this class represent a parsed Sparc machine
143149 /// instruction.
144150 class SparcOperand : public MCParsedAsmOperand {
146152 enum RegisterKind {
147153 rk_None,
148154 rk_IntReg,
155 rk_IntPairReg,
149156 rk_FloatReg,
150157 rk_DoubleReg,
151158 rk_QuadReg,
198205 bool isMem() const override { return isMEMrr() || isMEMri(); }
199206 bool isMEMrr() const { return Kind == k_MemoryReg; }
200207 bool isMEMri() const { return Kind == k_MemoryImm; }
208
209 bool isIntReg() const {
210 return (Kind == k_Register && Reg.Kind == rk_IntReg);
211 }
201212
202213 bool isFloatReg() const {
203214 return (Kind == k_Register && Reg.Kind == rk_FloatReg);
327338 Op->StartLoc = S;
328339 Op->EndLoc = E;
329340 return Op;
341 }
342
343 static bool MorphToIntPairReg(SparcOperand &Op) {
344 unsigned Reg = Op.getReg();
345 assert(Op.Reg.Kind == rk_IntReg);
346 unsigned regIdx = 32;
347 if (Reg >= Sparc::G0 && Reg <= Sparc::G7)
348 regIdx = Reg - Sparc::G0;
349 else if (Reg >= Sparc::O0 && Reg <= Sparc::O7)
350 regIdx = Reg - Sparc::O0 + 8;
351 else if (Reg >= Sparc::L0 && Reg <= Sparc::L7)
352 regIdx = Reg - Sparc::L0 + 16;
353 else if (Reg >= Sparc::I0 && Reg <= Sparc::I7)
354 regIdx = Reg - Sparc::I0 + 24;
355 if (regIdx % 2 || regIdx > 31)
356 return false;
357 Op.Reg.RegNum = IntPairRegs[regIdx / 2];
358 Op.Reg.Kind = rk_IntPairReg;
359 return true;
330360 }
331361
332362 static bool MorphToDoubleReg(SparcOperand &Op) {
10501080 break;
10511081 }
10521082 }
1083 if (Op.isIntReg() && Kind == MCK_IntPair) {
1084 if (SparcOperand::MorphToIntPairReg(Op))
1085 return MCTargetAsmParser::Match_Success;
1086 }
10531087 return Match_InvalidOperand;
10541088 }
116116 SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
117117 SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
118118
119 static const uint16_t IntPairDecoderTable[] = {
120 SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
121 SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7,
122 SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7,
123 SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7,
124 };
125
119126 static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
120127 unsigned RegNo,
121128 uint64_t Address,
195202 return MCDisassembler::Success;
196203 }
197204
205 static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
206 uint64_t Address, const void *Decoder) {
207 DecodeStatus S = MCDisassembler::Success;
208
209 if (RegNo > 31)
210 return MCDisassembler::Fail;
211
212 if ((RegNo & 1))
213 S = MCDisassembler::SoftFail;
214
215 unsigned RegisterPair = IntPairDecoderTable[RegNo/2];
216 Inst.addOperand(MCOperand::createReg(RegisterPair));
217 return S;
218 }
198219
199220 static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
221 const void *Decoder);
222 static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
200223 const void *Decoder);
201224 static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
202225 const void *Decoder);
205228 static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
206229 const void *Decoder);
207230 static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
231 uint64_t Address, const void *Decoder);
232 static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
208233 uint64_t Address, const void *Decoder);
209234 static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
210235 uint64_t Address, const void *Decoder);
325350 DecodeIntRegsRegisterClass);
326351 }
327352
353 static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
354 const void *Decoder) {
355 return DecodeMem(Inst, insn, Address, Decoder, true,
356 DecodeIntPairRegisterClass);
357 }
358
328359 static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
329360 const void *Decoder) {
330361 return DecodeMem(Inst, insn, Address, Decoder, true,
347378 uint64_t Address, const void *Decoder) {
348379 return DecodeMem(Inst, insn, Address, Decoder, false,
349380 DecodeIntRegsRegisterClass);
381 }
382
383 static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
384 uint64_t Address, const void *Decoder) {
385 return DecodeMem(Inst, insn, Address, Decoder, false,
386 DecodeIntPairRegisterClass);
350387 }
351388
352389 static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
2020 // i32 f32 arguments get passed in integer registers if there is space.
2121 CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
2222 // f64 arguments are split and passed through registers or through stack.
23 CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
23 CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
24 // As are v2i32 arguments (this would be the default behavior for
25 // v2i32 if it wasn't allocated to the IntPair register-class)
26 CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
27
2428
2529 // Alternatively, they are assigned to the stack in 4-byte aligned units.
2630 CCAssignToStack<4, 4>
2933 def RetCC_Sparc32 : CallingConv<[
3034 CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
3135 CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
32 CCIfType<[f64], CCAssignToReg<[D0, D1]>>
36 CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
37 CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
3338 ]>;
3439
3540
212212 }
213213
214214 void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
215
216215 MachineRegisterInfo &MRI = MF.getRegInfo();
217
218216 // Remap %i[0-7] to %o[0-7].
219217 for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
220218 if (MRI.reg_nodbg_empty(reg))
221219 continue;
222 unsigned mapped_reg = (reg - SP::I0 + SP::O0);
220
221 unsigned mapped_reg = reg - SP::I0 + SP::O0;
223222 assert(MRI.reg_nodbg_empty(mapped_reg));
224223
225224 // Replace I register with O register.
226225 MRI.replaceRegWith(reg, mapped_reg);
226
227 // Also replace register pair super-registers.
228 if ((reg - SP::I0) % 2 == 0) {
229 unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1;
230 unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1;
231 MRI.replaceRegWith(preg, mapped_preg);
232 }
227233 }
228234
229235 // Rewrite MBB's Live-ins.
230236 for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
231237 MBB != E; ++MBB) {
238 for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
239 if (!MBB->isLiveIn(reg))
240 continue;
241 MBB->removeLiveIn(reg);
242 MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
243 }
232244 for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
233245 if (!MBB->isLiveIn(reg))
234246 continue;
1111 //===----------------------------------------------------------------------===//
1212
1313 #include "SparcTargetMachine.h"
14 #include "llvm/CodeGen/MachineRegisterInfo.h"
1415 #include "llvm/CodeGen/SelectionDAGISel.h"
1516 #include "llvm/IR/Intrinsics.h"
1617 #include "llvm/Support/Compiler.h"
6162
6263 private:
6364 SDNode* getGlobalBaseReg();
65 SDNode *SelectInlineAsm(SDNode *N);
6466 };
6567 } // end anonymous namespace
6668
140142 return true;
141143 }
142144
145
146 // Re-assemble i64 arguments split up in SelectionDAGBuilder's
147 // visitInlineAsm / GetRegistersForValue functions.
148 //
149 // Note: This function was copied from, and is essentially identical
150 // to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
151 // such hacking-up is necessary; a rethink of how inline asm operands
152 // are handled may be in order to make doing this more sane.
153 //
154 // TODO: fix inline asm support so I can simply tell it that 'i64'
155 // inputs to asm need to be allocated to the IntPair register type,
156 // and have that work. Then, delete this function.
157 SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
158 std::vector AsmNodeOperands;
159 unsigned Flag, Kind;
160 bool Changed = false;
161 unsigned NumOps = N->getNumOperands();
162
163 // Normally, i64 data is bounded to two arbitrary GPRs for "%r"
164 // constraint. However, some instructions (e.g. ldd/std) require
165 // (even/even+1) GPRs.
166
167 // So, here, we check for this case, and mutate the inlineasm to use
168 // a single IntPair register instead, which guarantees such even/odd
169 // placement.
170
171 SDLoc dl(N);
172 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
173 : SDValue(nullptr,0);
174
175 SmallVector OpChanged;
176 // Glue node will be appended late.
177 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
178 SDValue op = N->getOperand(i);
179 AsmNodeOperands.push_back(op);
180
181 if (i < InlineAsm::Op_FirstOperand)
182 continue;
183
184 if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) {
185 Flag = C->getZExtValue();
186 Kind = InlineAsm::getKind(Flag);
187 }
188 else
189 continue;
190
191 // Immediate operands to inline asm in the SelectionDAG are modeled with
192 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
193 // the second is a constant with the value of the immediate. If we get here
194 // and we have a Kind_Imm, skip the next operand, and continue.
195 if (Kind == InlineAsm::Kind_Imm) {
196 SDValue op = N->getOperand(++i);
197 AsmNodeOperands.push_back(op);
198 continue;
199 }
200
201 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
202 if (NumRegs)
203 OpChanged.push_back(false);
204
205 unsigned DefIdx = 0;
206 bool IsTiedToChangedOp = false;
207 // If it's a use that is tied with a previous def, it has no
208 // reg class constraint.
209 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
210 IsTiedToChangedOp = OpChanged[DefIdx];
211
212 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
213 && Kind != InlineAsm::Kind_RegDefEarlyClobber)
214 continue;
215
216 unsigned RC;
217 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
218 if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
219 || NumRegs != 2)
220 continue;
221
222 assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
223 SDValue V0 = N->getOperand(i+1);
224 SDValue V1 = N->getOperand(i+2);
225 unsigned Reg0 = cast(V0)->getReg();
226 unsigned Reg1 = cast(V1)->getReg();
227 SDValue PairedReg;
228 MachineRegisterInfo &MRI = MF->getRegInfo();
229
230 if (Kind == InlineAsm::Kind_RegDef ||
231 Kind == InlineAsm::Kind_RegDefEarlyClobber) {
232 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
233 // the original GPRs.
234
235 unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
236 PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
237 SDValue Chain = SDValue(N,0);
238
239 SDNode *GU = N->getGluedUser();
240 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
241 Chain.getValue(1));
242
243 // Extract values from a GPRPair reg and copy to the original GPR reg.
244 SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
245 RegCopy);
246 SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
247 RegCopy);
248 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
249 RegCopy.getValue(1));
250 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
251
252 // Update the original glue user.
253 std::vector Ops(GU->op_begin(), GU->op_end()-1);
254 Ops.push_back(T1.getValue(1));
255 CurDAG->UpdateNodeOperands(GU, Ops);
256 }
257 else {
258 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
259 // GPRPair and then pass the GPRPair to the inline asm.
260 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
261
262 // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
263 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
264 Chain.getValue(1));
265 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
266 T0.getValue(1));
267 SDValue Pair = SDValue(
268 CurDAG->getMachineNode(
269 TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
270 {
271 CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
272 MVT::i32),
273 T0,
274 CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
275 T1,
276 CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
277 }),
278 0);
279
280 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
281 // i32 VRs of inline asm with it.
282 unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
283 PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
284 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
285
286 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
287 Glue = Chain.getValue(1);
288 }
289
290 Changed = true;
291
292 if(PairedReg.getNode()) {
293 OpChanged[OpChanged.size() -1 ] = true;
294 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
295 if (IsTiedToChangedOp)
296 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
297 else
298 Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
299 // Replace the current flag.
300 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
301 Flag, dl, MVT::i32);
302 // Add the new register node and skip the original two GPRs.
303 AsmNodeOperands.push_back(PairedReg);
304 // Skip the next two GPRs.
305 i += 2;
306 }
307 }
308
309 if (Glue.getNode())
310 AsmNodeOperands.push_back(Glue);
311 if (!Changed)
312 return nullptr;
313
314 SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
315 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
316 New->setNodeId(-1);
317 return New.getNode();
318 }
319
143320 SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
144321 SDLoc dl(N);
145322 if (N->isMachineOpcode()) {
149326
150327 switch (N->getOpcode()) {
151328 default: break;
329 case ISD::INLINEASM: {
330 SDNode *ResNode = SelectInlineAsm(N);
331 if (ResNode)
332 return ResNode;
333 break;
334 }
152335 case SPISD::GLOBAL_BASE_REG:
153336 return getGlobalBaseReg();
154337
4848 return true;
4949 }
5050
51 static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
52 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
53 ISD::ArgFlagsTy &ArgFlags, CCState &State)
51 static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
52 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
53 ISD::ArgFlagsTy &ArgFlags, CCState &State)
5454 {
5555 static const MCPhysReg RegList[] = {
5656 SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
7676 return true;
7777 }
7878
79 static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
80 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
81 ISD::ArgFlagsTy &ArgFlags, CCState &State)
82 {
83 static const MCPhysReg RegList[] = {
84 SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
85 };
86
87 // Try to get first reg.
88 if (unsigned Reg = State.AllocateReg(RegList))
89 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
90 else
91 return false;
92
93 // Try to get second reg.
94 if (unsigned Reg = State.AllocateReg(RegList))
95 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
96 else
97 return false;
98
99 return true;
100 }
101
79102 // Allocate a full-sized argument for the 64-bit ABI.
80103 static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
81104 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
201224 RetOps.push_back(SDValue());
202225
203226 // Copy the result values into the output registers.
204 for (unsigned i = 0; i != RVLocs.size(); ++i) {
227 for (unsigned i = 0, realRVLocIdx = 0;
228 i != RVLocs.size();
229 ++i, ++realRVLocIdx) {
205230 CCValAssign &VA = RVLocs[i];
206231 assert(VA.isRegLoc() && "Can only return in registers!");
207232
208 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
209 OutVals[i], Flag);
233 SDValue Arg = OutVals[realRVLocIdx];
234
235 if (VA.needsCustom()) {
236 assert(VA.getLocVT() == MVT::v2i32);
237 // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
238 // happen by default if this wasn't a legal type)
239
240 SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
241 Arg,
242 DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
243 SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
244 Arg,
245 DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
246
247 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
248 Flag = Chain.getValue(1);
249 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
250 VA = RVLocs[++i]; // skip ahead to next loc
251 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
252 Flag);
253 } else
254 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
210255
211256 // Guarantee that all emitted copies are stuck together with flags.
212257 Flag = Chain.getValue(1);
374419
375420 if (VA.isRegLoc()) {
376421 if (VA.needsCustom()) {
377 assert(VA.getLocVT() == MVT::f64);
422 assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
423
378424 unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
379425 MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
380426 SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
397443 }
398444 SDValue WholeValue =
399445 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
400 WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
446 WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
401447 InVals.push_back(WholeValue);
402448 continue;
403449 }
421467 auto PtrVT = getPointerTy(DAG.getDataLayout());
422468
423469 if (VA.needsCustom()) {
424 assert(VA.getValVT() == MVT::f64);
470 assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
425471 // If it is double-word aligned, just load.
426472 if (Offset % 8 == 0) {
427473 int FI = MF.getFrameInfo()->CreateFixedObject(8,
453499
454500 SDValue WholeValue =
455501 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
456 WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
502 WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
457503 InVals.push_back(WholeValue);
458504 continue;
459505 }
787833 }
788834
789835 if (VA.needsCustom()) {
790 assert(VA.getLocVT() == MVT::f64);
836 assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
791837
792838 if (VA.isMemLoc()) {
793839 unsigned Offset = VA.getLocMemOffset() + StackOffset;
803849 }
804850 }
805851
806 SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
807 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
808 Arg, StackPtr, MachinePointerInfo(),
809 false, false, 0);
810 // Sparc is big-endian, so the high part comes first.
811 SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
812 MachinePointerInfo(), false, false, false, 0);
813 // Increment the pointer to the other half.
814 StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
815 DAG.getIntPtrConstant(4, dl));
816 // Load the low part.
817 SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
818 MachinePointerInfo(), false, false, false, 0);
852 if (VA.getLocVT() == MVT::f64) {
853 // Move from the float value from float registers into the
854 // integer registers.
855
856 // TODO: this conversion is done in two steps, because
857 // f64->i64 conversion is done efficiently, and i64->v2i32 is
858 // basically a no-op. But f64->v2i32 is NOT done efficiently
859 // for some reason.
860 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
861 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
862 }
863
864 SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
865 Arg,
866 DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
867 SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
868 Arg,
869 DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
819870
820871 if (VA.isRegLoc()) {
821 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
872 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
822873 assert(i+1 != e);
823874 CCValAssign &NextVA = ArgLocs[++i];
824875 if (NextVA.isRegLoc()) {
825 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
876 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
826877 } else {
827 // Store the low part in stack.
878 // Store the second part in stack.
828879 unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
829880 SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
830881 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
831882 PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
832 MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
883 MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
833884 MachinePointerInfo(),
834885 false, false, 0));
835886 }
836887 } else {
837888 unsigned Offset = VA.getLocMemOffset() + StackOffset;
838 // Store the high part.
889 // Store the first part.
839890 SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
840891 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
841892 PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
842 MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
893 MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
843894 MachinePointerInfo(),
844895 false, false, 0));
845 // Store the low part.
896 // Store the second part.
846897 PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
847898 PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
848 MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
899 MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
849900 MachinePointerInfo(),
850901 false, false, 0));
851902 }
13761427 addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
13771428 addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
13781429 addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
1379 if (Subtarget->is64Bit())
1430 if (Subtarget->is64Bit()) {
13801431 addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
1432 } else {
1433 // On 32bit sparc, we define a double-register 32bit register
1434 // class, as well. This is modeled in LLVM as a 2-vector of i32.
1435 addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
1436
1437 // ...but almost all operations must be expanded, so set that as
1438 // the default.
1439 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
1440 setOperationAction(Op, MVT::v2i32, Expand);
1441 }
1442 // Truncating/extending stores/loads are also not supported.
1443 for (MVT VT : MVT::integer_vector_valuetypes()) {
1444 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
1445 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
1446 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
1447
1448 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
1449 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
1450 setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
1451
1452 setTruncStoreAction(VT, MVT::v2i32, Expand);
1453 setTruncStoreAction(MVT::v2i32, VT, Expand);
1454 }
1455 // However, load and store *are* legal.
1456 setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
1457 setOperationAction(ISD::STORE, MVT::v2i32, Legal);
1458 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
1459 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
1460
1461 // And we need to promote i64 loads/stores into vector load/store
1462 setOperationAction(ISD::LOAD, MVT::i64, Custom);
1463 setOperationAction(ISD::STORE, MVT::i64, Custom);
1464
1465 // Sadly, this doesn't work:
1466 // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
1467 // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
1468 }
13811469
13821470 // Turn FP extload into load/fextend
13831471 for (MVT VT : MVT::fp_valuetypes()) {
26032691 return DAG.getMergeValues(Ops, dl);
26042692 }
26052693
2694 static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
2695 {
2696 LoadSDNode *LdNode = cast(Op.getNode());
2697
2698 EVT MemVT = LdNode->getMemoryVT();
2699 if (MemVT == MVT::f128)
2700 return LowerF128Load(Op, DAG);
2701
2702 return Op;
2703 }
2704
26062705 // Lower a f128 store into two f64 stores.
26072706 static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
26082707 SDLoc dl(Op);
26472746 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
26482747 }
26492748
2749 static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
2750 {
2751 SDLoc dl(Op);
2752 StoreSDNode *St = cast(Op.getNode());
2753
2754 EVT MemVT = St->getMemoryVT();
2755 if (MemVT == MVT::f128)
2756 return LowerF128Store(Op, DAG);
2757
2758 if (MemVT == MVT::i64) {
2759 // Custom handling for i64 stores: turn it into a bitcast and a
2760 // v2i32 store.
2761 SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
2762 SDValue Chain = DAG.getStore(
2763 St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
2764 St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
2765 St->getAAInfo());
2766 return Chain;
2767 }
2768
2769 return SDValue();
2770 }
2771
26502772 static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
26512773 assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
26522774 && "invalid opcode");
27842906 // Otherwise, expand with a fence.
27852907 return SDValue();
27862908 }
2787
27882909
27892910 SDValue SparcTargetLowering::
27902911 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
28202941 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
28212942 Subtarget);
28222943
2823 case ISD::LOAD: return LowerF128Load(Op, DAG);
2824 case ISD::STORE: return LowerF128Store(Op, DAG);
2944 case ISD::LOAD: return LowerLOAD(Op, DAG);
2945 case ISD::STORE: return LowerSTORE(Op, DAG);
28252946 case ISD::FADD: return LowerF128Op(Op, DAG,
28262947 getLibcallName(RTLIB::ADD_F128), 2);
28272948 case ISD::FSUB: return LowerF128Op(Op, DAG,
31513272 if (Constraint.size() == 1) {
31523273 switch (Constraint[0]) {
31533274 case 'r':
3154 return std::make_pair(0U, &SP::IntRegsRegClass);
3275 if (VT == MVT::v2i32)
3276 return std::make_pair(0U, &SP::IntPairRegClass);
3277 else
3278 return std::make_pair(0U, &SP::IntRegsRegClass);
31553279 }
3156 } else if (!Constraint.empty() && Constraint.size() <= 5
3280 } else if (!Constraint.empty() && Constraint.size() <= 5
31573281 && Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
31583282 // constraint = '{r}'
31593283 // Remove the braces from around the name.
32293353 getLibcallName(libCall),
32303354 1));
32313355 return;
3232 }
3233 }
3356 case ISD::LOAD: {
3357 LoadSDNode *Ld = cast(N);
3358 // Custom handling only for i64: turn i64 load into a v2i32 load,
3359 // and a bitcast.
3360 if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
3361 return;
3362
3363 SDLoc dl(N);
3364 SDValue LoadRes = DAG.getExtLoad(
3365 Ld->getExtensionType(), dl, MVT::v2i32,
3366 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
3367 MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
3368 Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());
3369
3370 SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
3371 Results.push_back(Res);
3372 Results.push_back(LoadRes.getValue(1));
3373 return;
3374 }
3375 }
3376 }
166166 }
167167
168168 void ReplaceNodeResults(SDNode *N,
169 SmallVectorImpl& Results,
170 SelectionDAG &DAG) const override;
169 SmallVectorImpl& Results,
170 SelectionDAG &DAG) const override;
171171
172172 MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
173173 unsigned BROpcode) const;
283283 unsigned numSubRegs = 0;
284284 unsigned movOpc = 0;
285285 const unsigned *subRegIdx = nullptr;
286
286 bool ExtraG0 = false;
287
288 const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
287289 const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
288290 const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
289291 const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
293295 if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
294296 BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
295297 .addReg(SrcReg, getKillRegState(KillSrc));
296 else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
298 else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
299 subRegIdx = DW_SubRegsIdx;
300 numSubRegs = 2;
301 movOpc = SP::ORrr;
302 ExtraG0 = true;
303 } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
297304 BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
298305 .addReg(SrcReg, getKillRegState(KillSrc));
299306 else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
346353 unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
347354 assert(Dst && Src && "Bad sub-register");
348355
349 MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
356 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
357 if (ExtraG0)
358 MIB.addReg(SP::G0);
359 MIB.addReg(Src);
360 MovMI = MIB.getInstr();
350361 }
351362 // Add implicit super-register defs and kills to the last MovMI.
352363 MovMI->addRegisterDefined(DestReg, TRI);
371382 MFI.getObjectAlignment(FI));
372383
373384 // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
374 if (RC == &SP::I64RegsRegClass)
385 if (RC == &SP::I64RegsRegClass)
375386 BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
376387 .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
377388 else if (RC == &SP::IntRegsRegClass)
378389 BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
390 .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
391 else if (RC == &SP::IntPairRegClass)
392 BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
379393 .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
380394 else if (RC == &SP::FPRegsRegClass)
381395 BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
414428 else if (RC == &SP::IntRegsRegClass)
415429 BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
416430 .addMemOperand(MMO);
431 else if (RC == &SP::IntPairRegClass)
432 BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
433 .addMemOperand(MMO);
417434 else if (RC == &SP::FPRegsRegClass)
418435 BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
419436 .addMemOperand(MMO);
407407 defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>;
408408 }
409409
410 let DecoderMethod = "DecodeLoadIntPair" in
411 defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;
412
410413 // Section B.2 - Load Floating-point Instructions, p. 92
411414 let DecoderMethod = "DecodeLoadFP" in
412415 defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
422425 defm STH : StoreA<"sth", 0b000110, 0b010110, truncstorei16, IntRegs, i32>;
423426 defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>;
424427 }
428
429 let DecoderMethod = "DecodeStoreIntPair" in
430 defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
425431
426432 // Section B.5 - Store Floating-point Instructions, p. 97
427433 let DecoderMethod = "DecodeStoreFP" in
13261332 def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
13271333 def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
13281334
1335 // extract_vector
1336 def : Pat<(vector_extract (v2i32 IntPair:$Rn), 0),
1337 (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
1338 def : Pat<(vector_extract (v2i32 IntPair:$Rn), 1),
1339 (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;
1340
1341 // build_vector
1342 def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
1343 (INSERT_SUBREG
1344 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
1345 (i32 IntRegs:$a2), sub_odd)>;
1346
13291347
13301348 include "SparcInstr64Bit.td"
13311349 include "SparcInstrVIS.td"
7373 Reserved.set(SP::G0);
7474 Reserved.set(SP::G6);
7575 Reserved.set(SP::G7);
76
77 // Also reserve the register pair aliases covering the above
78 // registers, with the same conditions.
79 Reserved.set(SP::G0_G1);
80 if (ReserveAppRegisters)
81 Reserved.set(SP::G2_G3);
82 if (ReserveAppRegisters || !Subtarget.is64Bit())
83 Reserved.set(SP::G4_G5);
84
85 Reserved.set(SP::O6_O7);
86 Reserved.set(SP::I6_I7);
87 Reserved.set(SP::G6_G7);
7688
7789 // Unaliased double registers are not available in non-V9 targets.
7890 if (!Subtarget.isV9()) {
209221 unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
210222 return SP::I6;
211223 }
212
3131 // Ri - 32-bit integer registers
3232 class Ri Enc, string n> : SparcReg;
3333
34 // Rdi - pairs of 32-bit integer registers
35 class Rdi Enc, string n, list subregs> : SparcReg {
36 let SubRegs = subregs;
37 let SubRegIndices = [sub_even, sub_odd];
38 let CoveredBySubRegs = 1;
39 }
3440 // Rf - 32-bit floating-point registers
3541 class Rf Enc, string n> : SparcReg;
3642
216222 def Q14 : Rq<25, "F56", [D28, D29]>;
217223 def Q15 : Rq<29, "F60", [D30, D31]>;
218224
225 // Aliases of the integer registers used for LDD/STD double-word operations
226 def G0_G1 : Rdi<0, "G0", [G0, G1]>;
227 def G2_G3 : Rdi<2, "G2", [G2, G3]>;
228 def G4_G5 : Rdi<4, "G4", [G4, G5]>;
229 def G6_G7 : Rdi<6, "G6", [G6, G7]>;
230 def O0_O1 : Rdi<8, "O0", [O0, O1]>;
231 def O2_O3 : Rdi<10, "O2", [O2, O3]>;
232 def O4_O5 : Rdi<12, "O4", [O4, O5]>;
233 def O6_O7 : Rdi<14, "O6", [O6, O7]>;
234 def L0_L1 : Rdi<16, "L0", [L0, L1]>;
235 def L2_L3 : Rdi<18, "L2", [L2, L3]>;
236 def L4_L5 : Rdi<20, "L4", [L4, L5]>;
237 def L6_L7 : Rdi<22, "L6", [L6, L7]>;
238 def I0_I1 : Rdi<24, "I0", [I0, I1]>;
239 def I2_I3 : Rdi<26, "I2", [I2, I3]>;
240 def I4_I5 : Rdi<28, "I4", [I4, I5]>;
241 def I6_I7 : Rdi<30, "I6", [I6, I7]>;
242
219243 // Register classes.
220244 //
221245 // FIXME: the register order should be defined in terms of the preferred
230254 (sequence "L%u", 0, 7),
231255 (sequence "O%u", 0, 7))>;
232256
257 // Should be in the same order as IntRegs.
258 def IntPair : RegisterClass<"SP", [v2i32], 64,
259 (add I0_I1, I2_I3, I4_I5, I6_I7,
260 G0_G1, G2_G3, G4_G5, G6_G7,
261 L0_L1, L2_L3, L4_L5, L6_L7,
262 O0_O1, O2_O3, O4_O5, O6_O7)>;
263
233264 // Register class for 64-bit mode, with a 64-bit spill slot size.
234265 // These are the same as the 32-bit registers, so TableGen will consider this
235266 // to be a sub-class of IntRegs. That works out because requiring a 64-bit
6464 * Mon Feb 09 2003 Brian R. Gaeke
6565 - Initial working version of RPM spec file.
6666
67
8383 ret i64 %r
8484 }
8585
86 ; CHECK-LABEL: load_store_64bit:
87 ; CHECK: ldd [%o0], %o2
88 ; CHECK: addcc %o3, 3, %o5
89 ; CHECK: addxcc %o2, 0, %o4
90 ; CHECK: retl
91 ; CHECK: std %o4, [%o1]
92 define void @load_store_64bit(i64* %x, i64* %y) {
93 entry:
94 %0 = load i64, i64* %x
95 %add = add nsw i64 %0, 3
96 store i64 %add, i64* %y
97 ret void
98 }
None ; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
0 ; RUN: llc -march=sparc <%s | FileCheck %s
11
22 ; CHECK-LABEL: test_constraint_r
33 ; CHECK: add %o1, %o0, %o0
77 ret i32 %0
88 }
99
10 ; CHECK-LABEL: test_constraint_I
10 ; CHECK-LABEL: test_constraint_I:
1111 ; CHECK: add %o0, 1023, %o0
1212 define i32 @test_constraint_I(i32 %a) {
1313 entry:
1515 ret i32 %0
1616 }
1717
18 ; CHECK-LABEL: test_constraint_I_neg
18 ; CHECK-LABEL: test_constraint_I_neg:
1919 ; CHECK: add %o0, -4096, %o0
2020 define i32 @test_constraint_I_neg(i32 %a) {
2121 entry:
2323 ret i32 %0
2424 }
2525
26 ; CHECK-LABEL: test_constraint_I_largeimm
26 ; CHECK-LABEL: test_constraint_I_largeimm:
2727 ; CHECK: sethi 9, [[R0:%[gilo][0-7]]]
2828 ; CHECK: or [[R0]], 784, [[R1:%[gilo][0-7]]]
2929 ; CHECK: add %o0, [[R1]], %o0
3333 ret i32 %0
3434 }
3535
36 ; CHECK-LABEL: test_constraint_reg
36 ; CHECK-LABEL: test_constraint_reg:
3737 ; CHECK: ldda [%o1] 43, %g2
38 ; CHECK: ldda [%o1] 43, %g3
38 ; CHECK: ldda [%o1] 43, %g4
3939 define void @test_constraint_reg(i32 %s, i32* %ptr) {
4040 entry:
4141 %0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43)
42 %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43)
42 %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43)
4343 ret void
4444 }
45
46 ;; Ensure that i64 args to asm are allocated to the IntPair register class.
47 ;; Also checks that register renaming for leaf proc works.
48 ; CHECK-LABEL: test_constraint_r_i64:
49 ; CHECK: mov %o0, %o5
50 ; CHECK: sra %o5, 31, %o4
51 ; CHECK: std %o4, [%o1]
52 define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) {
53 entry:
54 %conv = sext i32 %foo to i64
55 tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
56 ret i32 %o
57 }
58
59 ;; Same test without leaf-proc opt
60 ; CHECK-LABEL: test_constraint_r_i64_noleaf:
61 ; CHECK: mov %i0, %i5
62 ; CHECK: sra %i5, 31, %i4
63 ; CHECK: std %i4, [%i1]
64 define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 {
65 entry:
66 %conv = sext i32 %foo to i64
67 tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
68 ret i32 %o
69 }
70 attributes #0 = { "no-frame-pointer-elim"="true" }
71
72 ;; Ensures that tied in and out gets allocated properly.
73 ; CHECK-LABEL: test_i64_inout:
74 ; CHECK: sethi 0, %o2
75 ; CHECK: mov 5, %o3
76 ; CHECK: xor %o2, %g0, %o2
77 ; CHECK: mov %o2, %o0
78 ; CHECK: ret
79 define i64 @test_i64_inout() {
80 entry:
81 %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5);
82 ret i64 %0
83 }
0 ; RUN: llc -march=sparc < %s | FileCheck %s
1
2 @g = common global [32 x i32] zeroinitializer, align 16
3 @h = common global [16 x i64] zeroinitializer, align 16
4
5 ;; Ensures that we don't use registers which are supposed to be reserved.
6
7 ; CHECK-LABEL: use_all_i32_regs:
8 ; CHECK-NOT: %g0
9 ; CHECK-NOT: %g1
10 ; CHECK-NOT: %g5
11 ; CHECK-NOT: %g6
12 ; CHECK-NOT: %g7
13 ; CHECK-NOT: %o6
14 ; CHECK-NOT: %i6
15 ; CHECK-NOT: %i7
16 ; CHECK: ret
17 define void @use_all_i32_regs() {
18 entry:
19 %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
20 %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
21 %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
22 %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
23 %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
24 %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
25 %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
26 %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
27 %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
28 %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
29 %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
30 %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
31 %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
32 %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
33 %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
34 %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
35 %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
36 %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
37 %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
38 %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
39 %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
40 %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
41 %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
42 %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
43 %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
44 %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
45 %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
46 %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
47 %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
48 %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
49 %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
50 %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
51 store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
52 store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
53 store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
54 store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
55 store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
56 store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
57 store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
58 store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
59 store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
60 store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
61 store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
62 store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
63 store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
64 store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
65 store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
66 store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
67 store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
68 store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
69 store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
70 store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
71 store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
72 store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
73 store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
74 store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
75 store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
76 store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
77 store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
78 store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
79 store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
80 store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
81 store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
82 store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
83 ret void
84 }
85
86
87 ; CHECK-LABEL: use_all_i64_regs:
88 ; CHECK-NOT: %g0
89 ; CHECK-NOT: %g1
90 ; CHECK-NOT: %g4
91 ; CHECK-NOT: %g5
92 ; CHECK-NOT: %g6
93 ; CHECK-NOT: %g7
94 ; CHECK-NOT: %o6
95 ; CHECK-NOT: %o7
96 ; CHECK-NOT: %i6
97 ; CHECK-NOT: %i7
98 ; CHECK: ret
99 define void @use_all_i64_regs() {
100 entry:
101 %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
102 %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
103 %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
104 %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
105 %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
106 %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
107 %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
108 %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
109 %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
110 %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
111 %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
112 %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
113 %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
114 %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
115 %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
116 %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
117 store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
118 store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
119 store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
120 store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
121 store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
122 store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
123 store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
124 store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
125 store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
126 store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
127 store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
128 store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
129 store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
130 store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
131 store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
132 store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
133 ret void
134 }
0 ; RUN: llc -march=sparc < %s | FileCheck %s
1
2 ;; Ensure that spills and reloads work for various types on
3 ;; sparcv8.
4
5 ;; For i32/i64 tests, use an asm statement which clobbers most
6 ;; registers to ensure the spill will happen.
7
8 ; CHECK-LABEL: test_i32_spill:
9 ; CHECK: and %i0, %i1, %o0
10 ; CHECK: st %o0, [%fp+{{.+}}]
11 ; CHECK: add %o0, %o0, %g0
12 ; CHECK: ld [%fp+{{.+}}, %i0
13 define i32 @test_i32_spill(i32 %a, i32 %b) {
14 entry:
15 %r0 = and i32 %a, %b
16 ; The clobber list has all registers except g0/o0. (Only o0 is usable.)
17 %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0)
18 ret i32 %r0
19 }
20
21 ; CHECK-LABEL: test_i64_spill:
22 ; CHECK: and %i0, %i2, %o0
23 ; CHECK: and %i1, %i3, %o1
24 ; CHECK: std %o0, [%fp+{{.+}}]
25 ; CHECK: add %o0, %o0, %g0
26 ; CHECK: ldd [%fp+{{.+}}, %i0
27 define i64 @test_i64_spill(i64 %a, i64 %b) {
28 entry:
29 %r0 = and i64 %a, %b
30 ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair)
31 ; So, o0/o1 must be used.
32 %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0)
33 ret i64 %r0
34 }
35
36 ;; For float/double tests, a call is a suitable clobber as *all* FPU
37 ;; registers are caller-save on sparcv8.
38
39 ; CHECK-LABEL: test_float_spill:
40 ; CHECK: fadds %f1, %f0, [[R:%[f][0-31]]]
41 ; CHECK: st [[R]], [%fp+{{.+}}]
42 ; CHECK: call
43 ; CHECK: ld [%fp+{{.+}}, %f0
44 declare float @foo_float(float)
45 define float @test_float_spill(float %a, float %b) {
46 entry:
47 %r0 = fadd float %a, %b
48 %0 = call float @foo_float(float %r0)
49 ret float %r0
50 }
51
52 ; CHECK-LABEL: test_double_spill:
53 ; CHECK: faddd %f2, %f0, [[R:%[f][0-31]]]
54 ; CHECK: std [[R]], [%fp+{{.+}}]
55 ; CHECK: call
56 ; CHECK: ldd [%fp+{{.+}}, %f0
57 declare double @foo_double(double)
58 define double @test_double_spill(double %a, double %b) {
59 entry:
60 %r0 = fadd double %a, %b
61 %0 = call double @foo_double(double %r0)
62 ret double %r0
63 }
220220
221221 # CHECK: swapa [%g1] 131, %o2
222222 0xd4 0xf8 0x50 0x60
223
224 # CHECK: ldd [%i0+%l6], %o2
225 0xd4 0x1e 0x00 0x16
226
227 # CHECK: ldd [%i0+32], %o2
228 0xd4 0x1e 0x20 0x20
229
230 # CHECK: ldd [%g1], %o2
231 0xd4 0x18 0x60 0x00
232
233 # CHECK: ldd [%g1], %o2
234 0xd4 0x18 0x40 0x00
235
236 # CHECK: std %o2, [%i0+%l6]
237 0xd4 0x3e 0x00 0x16
238
239 # CHECK: std %o2, [%i0+32]
240 0xd4 0x3e 0x20 0x20
241
242 # CHECK: std %o2, [%g1]
243 0xd4 0x38 0x60 0x00
244
245 # CHECK: std %o2, [%g1]
246 0xd4 0x38 0x40 0x00
4545 ! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
4646 lda [%i0 + %l6] 131, %o2
4747
48 ! CHECK: ldd [%i0+%l6], %o2 ! encoding: [0xd4,0x1e,0x00,0x16]
49 ldd [%i0 + %l6], %o2
50 ! CHECK: ldd [%i0+32], %o2 ! encoding: [0xd4,0x1e,0x20,0x20]
51 ldd [%i0 + 32], %o2
52 ! CHECK: ldd [%g1], %o2 ! encoding: [0xd4,0x18,0x40,0x00]
53 ldd [%g1], %o2
54 ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76]
55 ldda [%i0 + %l6] 131, %o2
56
4857 ! CHECK: stb %o2, [%i0+%l6] ! encoding: [0xd4,0x2e,0x00,0x16]
4958 stb %o2, [%i0 + %l6]
5059 ! CHECK: stb %o2, [%i0+32] ! encoding: [0xd4,0x2e,0x20,0x20]
7281 ! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76]
7382 sta %o2, [%i0 + %l6] 131
7483
84 ! CHECK: std %o2, [%i0+%l6] ! encoding: [0xd4,0x3e,0x00,0x16]
85 std %o2, [%i0 + %l6]
86 ! CHECK: std %o2, [%i0+32] ! encoding: [0xd4,0x3e,0x20,0x20]
87 std %o2, [%i0 + 32]
88 ! CHECK: std %o2, [%g1] ! encoding: [0xd4,0x38,0x40,0x00]
89 std %o2, [%g1]
90 ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76]
91 stda %o2, [%i0 + %l6] 131
92
7593 ! CHECK: flush %g1+%g2 ! encoding: [0x81,0xd8,0x40,0x02]
7694 flush %g1 + %g2
7795 ! CHECK: flush %g1+8 ! encoding: [0x81,0xd8,0x60,0x08]