llvm.org GIT mirror llvm / b52bf6a
R600/SI: Prefer SALU instructions for bit shift operations All shift operations will be selected as SALU instructions and then if necessary lowered to VALU instructions in the SIFixSGPRCopies pass. This allows us to do more operations on the SALU which will improve performance and is also required for implementing private memory using indirect addressing, since the private memory pointers must stay in the scalar registers. This patch includes some fixes from Matt Arsenault. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194625 91177308-0d34-0410-b5e6-96231b3b80d8 Tom Stellard 6 years ago
17 changed file(s) with 671 addition(s) and 159 deletion(s). Raw diff Collapse all Expand all
1717 VOP1 = 1 << 5,
1818 VOP2 = 1 << 6,
1919 VOP3 = 1 << 7,
20 VOPC = 1 << 8
20 VOPC = 1 << 8,
21 SALU = 1 << 9
2122 };
2223 }
2324
6464 /// ultimately led to the creation of an illegal COPY.
6565 //===----------------------------------------------------------------------===//
6666
67 #define DEBUG_TYPE "sgpr-copies"
6768 #include "AMDGPU.h"
6869 #include "SIInstrInfo.h"
6970 #include "llvm/CodeGen/MachineFunctionPass.h"
71 #include "llvm/CodeGen/MachineInstrBuilder.h"
7072 #include "llvm/CodeGen/MachineRegisterInfo.h"
73 #include "llvm/Support/Debug.h"
7174 #include "llvm/Target/TargetMachine.h"
7275
7376 using namespace llvm;
7881
7982 private:
8083 static char ID;
81 const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI,
84 const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
8285 const MachineRegisterInfo &MRI,
83 unsigned Reg) const;
86 unsigned Reg,
87 unsigned SubReg) const;
88 const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
89 const MachineRegisterInfo &MRI,
90 unsigned Reg,
91 unsigned SubReg) const;
92 bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
93 const MachineRegisterInfo &MRI) const;
8494
8595 public:
8696 SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
101111 return new SIFixSGPRCopies(tm);
102112 }
103113
104 /// This functions walks the use/def chains starting with the definition of
105 /// \p Reg until it finds an Instruction that isn't a COPY returns
106 /// the register class of that instruction.
107 const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
108 const TargetRegisterInfo *TRI,
114 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
115 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
116 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
117 if (!MI.getOperand(i).isReg() ||
118 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
119 continue;
120
121 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
122 return true;
123 }
124 return false;
125 }
126
127 /// This functions walks the use list of Reg until it finds an Instruction
128 /// that isn't a COPY returns the register class of that instruction.
129 /// \param[out] The register defined by the first non-COPY instruction.
130 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
131 const SIRegisterInfo *TRI,
109132 const MachineRegisterInfo &MRI,
110 unsigned Reg) const {
133 unsigned Reg,
134 unsigned SubReg) const {
111135 // The Reg parameter to the function must always be defined by either a PHI
112136 // or a COPY, therefore it cannot be a physical register.
113137 assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
114138 "Reg cannot be a physical register");
115139
116140 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
141 RC = TRI->getSubRegClass(RC, SubReg);
117142 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
118143 E = MRI.use_end(); I != E; ++I) {
119144 switch (I->getOpcode()) {
120145 case AMDGPU::COPY:
121 RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI,
122 I->getOperand(0).getReg()));
146 RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
147 I->getOperand(0).getReg(),
148 I->getOperand(0).getSubReg()));
123149 break;
124150 }
125151 }
127153 return RC;
128154 }
129155
156 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
157 const SIRegisterInfo *TRI,
158 const MachineRegisterInfo &MRI,
159 unsigned Reg,
160 unsigned SubReg) const {
161 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
162 const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
163 return TRI->getSubRegClass(RC, SubReg);
164 }
165 MachineInstr *Def = MRI.getVRegDef(Reg);
166 if (Def->getOpcode() != AMDGPU::COPY) {
167 return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
168 }
169
170 return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
171 Def->getOperand(1).getSubReg());
172 }
173
174 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
175 const SIRegisterInfo *TRI,
176 const MachineRegisterInfo &MRI) const {
177
178 unsigned DstReg = Copy.getOperand(0).getReg();
179 unsigned SrcReg = Copy.getOperand(1).getReg();
180 unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
181 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
182
183 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
184 DstRC == &AMDGPU::M0RegRegClass)
185 return false;
186
187 const TargetRegisterClass *SrcRC = TRI->getSubRegClass(
188 MRI.getRegClass(SrcReg), SrcSubReg);
189
190 return TRI->isSGPRClass(DstRC) &&
191 !TRI->getCommonSubClass(DstRC, SrcRC);
192 }
193
130194 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
131195 MachineRegisterInfo &MRI = MF.getRegInfo();
132 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
196 const SIRegisterInfo *TRI = static_cast(
197 MF.getTarget().getRegisterInfo());
198 const SIInstrInfo *TII = static_cast(
199 MF.getTarget().getInstrInfo());
133200 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
134201 BI != BE; ++BI) {
135202
137204 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
138205 I != E; ++I) {
139206 MachineInstr &MI = *I;
140 if (MI.getOpcode() != AMDGPU::PHI) {
141 continue;
142 }
143 unsigned Reg = MI.getOperand(0).getReg();
144 const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg);
145 if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
146 MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
207 if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
208 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
209 DEBUG(MI.print(dbgs()));
210 TII->moveToVALU(MI);
211
212 }
213
214 switch (MI.getOpcode()) {
215 default: continue;
216 case AMDGPU::PHI: {
217 DEBUG(dbgs() << " Fixing PHI:\n");
218 DEBUG(MI.print(dbgs()));
219
220 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
221 unsigned Reg = MI.getOperand(i).getReg();
222 const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
223 MI.getOperand(0).getSubReg());
224 MRI.constrainRegClass(Reg, RC);
225 }
226 unsigned Reg = MI.getOperand(0).getReg();
227 const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
228 MI.getOperand(0).getSubReg());
229 if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
230 MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
231 }
232
233 if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
234 break;
235
236 // If a PHI node defines an SGPR and any of its operands are VGPRs,
237 // then we need to move it to the VALU.
238 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
239 unsigned Reg = MI.getOperand(i).getReg();
240 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
241 TII->moveToVALU(MI);
242 break;
243 }
244 }
245
246 break;
247 }
248 case AMDGPU::REG_SEQUENCE: {
249 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
250 !hasVGPROperands(MI, TRI))
251 continue;
252
253 DEBUG(dbgs() << "Fixing REG_SEQUENCE: \n");
254 DEBUG(MI.print(dbgs()));
255
256 TII->moveToVALU(MI);
257 TII->legalizeOperands(&MI);
258 break;
259 }
147260 }
148261 }
149262 }
2222 field bits<1> VOP2 = 0;
2323 field bits<1> VOP3 = 0;
2424 field bits<1> VOPC = 0;
25 field bits<1> SALU = 0;
2526
2627 let TSFlags{0} = VM_CNT;
2728 let TSFlags{1} = EXP_CNT;
3233 let TSFlags{6} = VOP2;
3334 let TSFlags{7} = VOP3;
3435 let TSFlags{8} = VOPC;
36 let TSFlags{9} = SALU;
3537 }
3638
3739 class Enc32 pattern> :
6668 let mayLoad = 0;
6769 let mayStore = 0;
6870 let hasSideEffects = 0;
71 let SALU = 1;
6972 }
7073
7174 class SOP2 op, dag outs, dag ins, string asm, list pattern> :
8487 let mayLoad = 0;
8588 let mayStore = 0;
8689 let hasSideEffects = 0;
90 let SALU = 1;
8791 }
8892
8993 class SOPC op, dag outs, dag ins, string asm, list pattern> :
101105 let mayLoad = 0;
102106 let mayStore = 0;
103107 let hasSideEffects = 0;
108 let SALU = 1;
104109 }
105110
106111 class SOPK op, dag outs, dag ins, string asm, list pattern> :
117122 let mayLoad = 0;
118123 let mayStore = 0;
119124 let hasSideEffects = 0;
125 let SALU = 1;
120126 }
121127
122128 class SOPP op, dag ins, string asm, list pattern> : Enc32 <
134140 let mayLoad = 0;
135141 let mayStore = 0;
136142 let hasSideEffects = 0;
143 let SALU = 1;
137144 }
138145
139146 class SMRD op, bits<1> imm, dag outs, dag ins, string asm,
3030 return RI;
3131 }
3232
33 //===----------------------------------------------------------------------===//
34 // TargetInstrInfo callbacks
35 //===----------------------------------------------------------------------===//
36
3337 void
3438 SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
3539 MachineBasicBlock::iterator MI, DebugLoc DL,
184188 MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
185189 bool NewMI) const {
186190
187 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
188 !MI->getOperand(2).isReg())
191 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
192 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
189193 return 0;
190194
191 MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
195 // Cannot commute VOP2 if src0 is SGPR.
196 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
197 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
198 return 0;
199
200 if (!MI->getOperand(2).isReg()) {
201 // XXX: Commute instructions with FPImm operands
202 if (NewMI || MI->getOperand(2).isFPImm() ||
203 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
204 return 0;
205 }
206
207 // XXX: Commute VOP3 instructions with abs and neg set.
208 if (isVOP3(MI->getOpcode()) &&
209 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
210 AMDGPU::OpName::abs)).getImm() ||
211 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
212 AMDGPU::OpName::neg)).getImm()))
213 return 0;
214
215 unsigned Reg = MI->getOperand(1).getReg();
216 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
217 MI->getOperand(2).ChangeToRegister(Reg, false);
218 } else {
219 MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
220 }
192221
193222 if (MI)
194223 MI->setDesc(get(commuteOpcode(MI->getOpcode())));
241270
242271 bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
243272 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
273 }
274
275 bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
276 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
244277 }
245278
246279 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
309342 // Verify SRC1 for VOP2 and VOPC
310343 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
311344 const MachineOperand &Src1 = MI->getOperand(Src1Idx);
312 if (Src1.isImm()) {
345 if (Src1.isImm() || Src1.isFPImm()) {
313346 ErrInfo = "VOP[2C] src1 cannot be an immediate.";
314347 return false;
315348 }
331364 }
332365 }
333366 return true;
367 }
368
369 unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
370 switch (MI.getOpcode()) {
371 default: return AMDGPU::INSTRUCTION_LIST_END;
372 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
373 case AMDGPU::COPY: return AMDGPU::COPY;
374 case AMDGPU::PHI: return AMDGPU::PHI;
375 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
376 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
377 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
378 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
379 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
380 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
381 }
382 }
383
384 bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
385 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
386 }
387
388 const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
389 unsigned OpNo) const {
390 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
391 const MCInstrDesc &Desc = get(MI.getOpcode());
392 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
393 Desc.OpInfo[OpNo].RegClass == -1)
394 return MRI.getRegClass(MI.getOperand(OpNo).getReg());
395
396 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
397 return RI.getRegClass(RCID);
398 }
399
400 bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
401 switch (MI.getOpcode()) {
402 case AMDGPU::COPY:
403 case AMDGPU::REG_SEQUENCE:
404 return RI.hasVGPRs(getOpRegClass(MI, 0));
405 default:
406 return RI.hasVGPRs(getOpRegClass(MI, OpNo));
407 }
408 }
409
410 void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
411 MachineBasicBlock::iterator I = MI;
412 MachineOperand &MO = MI->getOperand(OpIdx);
413 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
414 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
415 const TargetRegisterClass *RC = RI.getRegClass(RCID);
416 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
417 if (MO.isReg()) {
418 Opcode = AMDGPU::COPY;
419 } else if (RI.isSGPRClass(RC)) {
420 Opcode = AMDGPU::S_MOV_B32;
421 }
422
423 unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID));
424 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
425 Reg).addOperand(MO);
426 MO.ChangeToRegister(Reg, false);
427 }
428
429 void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
430 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
431 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
432 AMDGPU::OpName::src0);
433 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
434 AMDGPU::OpName::src1);
435 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
436 AMDGPU::OpName::src2);
437
438 // Legalize VOP2
439 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
440 MachineOperand &Src1 = MI->getOperand(Src1Idx);
441 // Legalize VOP2 instructions where src1 is not a VGPR.
442 if (Src1.isImm() || Src1.isFPImm() ||
443 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
444 if (MI->isCommutable()) {
445 if (commuteInstruction(MI))
446 return;
447 }
448 legalizeOpWithMove(MI, Src1Idx);
449 }
450 }
451
452 // Legalize VOP3
453 if (isVOP3(MI->getOpcode())) {
454 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
455 unsigned SGPRReg = AMDGPU::NoRegister;
456 for (unsigned i = 0; i < 3; ++i) {
457 int Idx = VOP3Idx[i];
458 if (Idx == -1)
459 continue;
460 MachineOperand &MO = MI->getOperand(Idx);
461
462 if (MO.isReg()) {
463 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
464 continue; // VGPRs are legal
465
466 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
467 SGPRReg = MO.getReg();
468 // We can use one SGPR in each VOP3 instruction.
469 continue;
470 }
471 } else if (!isLiteralConstant(MO)) {
472 // If it is not a register and not a literal constant, then it must be
473 // an inline constant which is always legal.
474 continue;
475 }
476 // If we make it this far, then the operand is not legal and we must
477 // legalize it.
478 legalizeOpWithMove(MI, Idx);
479 }
480 }
481
482 // Legalize REG_SEQUENCE
483 // The register class of the operands much be the same type as the register
484 // class of the output.
485 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
486 const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
487 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
488 if (!MI->getOperand(i).isReg() ||
489 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
490 continue;
491 const TargetRegisterClass *OpRC =
492 MRI.getRegClass(MI->getOperand(i).getReg());
493 if (RI.hasVGPRs(OpRC)) {
494 VRC = OpRC;
495 } else {
496 SRC = OpRC;
497 }
498 }
499
500 // If any of the operands are VGPR registers, then they all most be
501 // otherwise we will create illegal VGPR->SGPR copies when legalizing
502 // them.
503 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
504 if (!VRC) {
505 assert(SRC);
506 VRC = RI.getEquivalentVGPRClass(SRC);
507 }
508 RC = VRC;
509 } else {
510 RC = SRC;
511 }
512
513 // Update all the operands so they have the same type.
514 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
515 if (!MI->getOperand(i).isReg() ||
516 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
517 continue;
518 unsigned DstReg = MRI.createVirtualRegister(RC);
519 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
520 get(AMDGPU::COPY), DstReg)
521 .addOperand(MI->getOperand(i));
522 MI->getOperand(i).setReg(DstReg);
523 }
524 }
525 }
526
527 void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
528 SmallVector Worklist;
529 Worklist.push_back(&TopInst);
530
531 while (!Worklist.empty()) {
532 MachineInstr *Inst = Worklist.pop_back_val();
533 unsigned NewOpcode = getVALUOp(*Inst);
534 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
535 continue;
536
537 MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
538
539 // Use the new VALU Opcode.
540 const MCInstrDesc &NewDesc = get(NewOpcode);
541 Inst->setDesc(NewDesc);
542
543 // Add the implict and explicit register definitions.
544 if (NewDesc.ImplicitUses) {
545 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
546 Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitUses[i],
547 false, true));
548 }
549 }
550
551 if (NewDesc.ImplicitDefs) {
552 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
553 Inst->addOperand(MachineOperand::CreateReg(NewDesc.ImplicitDefs[i],
554 true, true));
555 }
556 }
557
558 legalizeOperands(Inst);
559
560 // Update the destination register class.
561 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
562
563 switch (Inst->getOpcode()) {
564 // For target instructions, getOpRegClass just returns the virtual
565 // register class associated with the operand, so we need to find an
566 // equivalent VGPR register class in order to move the instruction to the
567 // VALU.
568 case AMDGPU::COPY:
569 case AMDGPU::PHI:
570 case AMDGPU::REG_SEQUENCE:
571 if (RI.hasVGPRs(NewDstRC))
572 continue;
573 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
574 if (!NewDstRC)
575 continue;
576 break;
577 default:
578 break;
579 }
580
581 unsigned DstReg = Inst->getOperand(0).getReg();
582 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
583 MRI.replaceRegWith(DstReg, NewDstReg);
584
585 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
586 E = MRI.use_end(); I != E; ++I) {
587 MachineInstr &UseMI = *I;
588 if (!canReadVGPR(UseMI, I.getOperandNo())) {
589 Worklist.push_back(&UseMI);
590 }
591 }
592 }
334593 }
335594
336595 //===----------------------------------------------------------------------===//
6161
6262 virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
6363
64 bool isSALUInstr(const MachineInstr &MI) const;
65 unsigned getVALUOp(const MachineInstr &MI) const;
66 bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
67
68 /// \brief Return the correct register class for \p OpNo. For target-specific
69 /// instructions, this will return the register class that has been defined
70 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
71 /// the register class of its machine operand.
72 /// to infer the correct register class base on the other operands.
73 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
74 unsigned OpNo) const;\
75
76 /// \returns true if it is legal for the operand at index \p OpNo
77 /// to read a VGPR.
78 bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
79
80 /// \brief Legalize the \p OpIndex operand of this instruction by inserting
81 /// a MOV. For example:
82 /// ADD_I32_e32 VGPR0, 15
83 /// to
84 /// MOV VGPR1, 15
85 /// ADD_I32_e32 VGPR0, VGPR1
86 ///
87 /// If the operand being legalized is a register, then a COPY will be used
88 /// instead of MOV.
89 void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
90
91 /// \brief Legalize all operands in this instruction. This function may
92 /// create new instruction and insert them before \p MI.
93 void legalizeOperands(MachineInstr *MI) const;
94
95 /// \brief Replace this instruction's opcode with the equivalent VALU
96 /// opcode. This function will also move the users of \p MI to the
97 /// VALU if necessary.
98 void moveToVALU(MachineInstr &MI) const;
99
64100 virtual unsigned calculateIndirectAddress(unsigned RegIndex,
65101 unsigned Channel) const;
66102
171171 opName#" $dst, $src0, $src1", pattern
172172 >;
173173
174 class SOP2_SHIFT_64 op, string opName, list pattern> : SOP2 <
175 op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
176 opName#" $dst, $src0, $src1", pattern
177 >;
178
174179 class SOPC_32 op, string opName, list pattern> : SOPC <
175180 op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
176181 opName#" $dst, $src0, $src1", pattern
940940 >;
941941 defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
942942
943 let hasPostISelHook = 1 in {
944
943945 defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
944946 [(set i32:$dst, (shl i32:$src0, i32:$src1))]
945947 >;
948
949 }
946950 defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
947951
948952 defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
11711175 def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
11721176 def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
11731177 def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
1174 def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
1175 def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
1176 def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
1177 def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
1178 def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
1179 def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
1178
1179 // Use added complexity so these patterns are preferred to the VALU patterns.
1180 let AddedComplexity = 1 in {
1181
1182 def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
1183 [(set i32:$dst, (shl i32:$src0, i32:$src1))]
1184 >;
1185 def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
1186 [(set i64:$dst, (shl i64:$src0, i32:$src1))]
1187 >;
1188 def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
1189 [(set i32:$dst, (srl i32:$src0, i32:$src1))]
1190 >;
1191 def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
1192 [(set i64:$dst, (srl i64:$src0, i32:$src1))]
1193 >;
1194 def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
1195 [(set i32:$dst, (sra i32:$src0, i32:$src1))]
1196 >;
1197 def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
1198 [(set i64:$dst, (sra i64:$src0, i32:$src1))]
1199 >;
1200
1201 } // End AddedComplexity = 1
1202
11801203 def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
11811204 def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
11821205 def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
7171 return NULL;
7272 }
7373
74 bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) {
74 bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
7575 if (!RC) {
7676 return false;
7777 }
78 return RC == &AMDGPU::SReg_32RegClass ||
79 RC == &AMDGPU::SReg_64RegClass ||
80 RC == &AMDGPU::SReg_128RegClass ||
81 RC == &AMDGPU::SReg_256RegClass ||
82 RC == &AMDGPU::SReg_512RegClass;
78 return !hasVGPRs(RC);
8379 }
80
81 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
82 return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
83 getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
84 getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
85 getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
86 getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
87 }
88
89 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
90 const TargetRegisterClass *SRC) const {
91 if (hasVGPRs(SRC)) {
92 return SRC;
93 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
94 return &AMDGPU::VReg_32RegClass;
95 } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
96 return &AMDGPU::VReg_64RegClass;
97 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
98 return &AMDGPU::VReg_128RegClass;
99 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
100 return &AMDGPU::VReg_256RegClass;
101 } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
102 return &AMDGPU::VReg_512RegClass;
103 }
104 return NULL;
105 }
106
107 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
108 const TargetRegisterClass *RC, unsigned SubIdx) const {
109 if (SubIdx == AMDGPU::NoSubRegister)
110 return RC;
111
112 // If this register has a sub-register, we can safely assume it is a 32-bit
113 // register, becuase all of SI's sub-registers are 32-bit.
114 if (isSGPRClass(RC)) {
115 return &AMDGPU::SGPR_32RegClass;
116 } else {
117 return &AMDGPU::VGPR_32RegClass;
118 }
119 }
4646 const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
4747
4848 /// \returns true if this class contains only SGPR registers
49 static bool isSGPRClass(const TargetRegisterClass *RC);
49 bool isSGPRClass(const TargetRegisterClass *RC) const;
50
51 /// \returns true if this class contains VGPR registers.
52 bool hasVGPRs(const TargetRegisterClass *RC) const;
53
54 /// \returns A VGPR reg class with the same width as \p SRC
55 const TargetRegisterClass *getEquivalentVGPRClass(
56 const TargetRegisterClass *SRC) const;
57
58 /// \returns The register class that is used for a sub-register of \p RC for
59 /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
60 /// be returned.
61 const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
62 unsigned SubIdx) const;
5063 };
5164
5265 } // End namespace llvm
None ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
1 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
12
2 ; CHECK: @fneg_v2
3 ; CHECK: -PV
4 ; CHECK: -PV
3 ; R600-CHECK-LABEL: @fneg
4 ; R600-CHECK: -PV
5 ; SI-CHECK-LABEL: @fneg
6 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
7 define void @fneg(float addrspace(1)* %out, float %in) {
8 entry:
9 %0 = fsub float -0.000000e+00, %in
10 store float %0, float addrspace(1)* %out
11 ret void
12 }
13
14 ; R600-CHECK-LABEL: @fneg_v2
15 ; R600-CHECK: -PV
16 ; R600-CHECK: -PV
17 ; SI-CHECK-LABEL: @fneg_v2
18 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
19 ; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
520 define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
621 entry:
722 %0 = fsub <2 x float> , %in
3045 ; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
3146 ; unless the target returns true for isNegFree()
3247
33 ; CHECK-NOT: XOR
34 ; CHECK: -KC0[2].Z
35
48 ; R600-CHECK-LABEL: @fneg_free
49 ; R600-CHECK-NOT: XOR
50 ; R600-CHECK: -KC0[2].Z
51 ; SI-CHECK-LABEL: @fneg_free
52 ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
53 ; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0, 0, 0
3654 define void @fneg_free(float addrspace(1)* %out, i32 %in) {
3755 entry:
3856 %0 = bitcast i32 %in to float
66 ;===------------------------------------------------------------------------===;
77
88 ; Load an i8 value from the global address space.
9 ; R600-CHECK: @load_i8
9 ; R600-CHECK-LABEL: @load_i8
1010 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
1111
12 ; SI-CHECK: @load_i8
12 ; SI-CHECK-LABEL: @load_i8
1313 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
1414 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
1515 %1 = load i8 addrspace(1)* %in
1818 ret void
1919 }
2020
21 ; R600-CHECK: @load_i8_sext
21 ; R600-CHECK-LABEL: @load_i8_sext
2222 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
2323 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
2424 ; R600-CHECK: 24
2525 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
2626 ; R600-CHECK: 24
27 ; SI-CHECK: @load_i8_sext
27 ; SI-CHECK-LABEL: @load_i8_sext
2828 ; SI-CHECK: BUFFER_LOAD_SBYTE
2929 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
3030 entry:
3434 ret void
3535 }
3636
37 ; R600-CHECK: @load_v2i8
37 ; R600-CHECK-LABEL: @load_v2i8
3838 ; R600-CHECK: VTX_READ_8
3939 ; R600-CHECK: VTX_READ_8
40 ; SI-CHECK: @load_v2i8
40 ; SI-CHECK-LABEL: @load_v2i8
4141 ; SI-CHECK: BUFFER_LOAD_UBYTE
4242 ; SI-CHECK: BUFFER_LOAD_UBYTE
4343 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
4848 ret void
4949 }
5050
51 ; R600-CHECK: @load_v2i8_sext
51 ; R600-CHECK-LABEL: @load_v2i8_sext
5252 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
5353 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
5454 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
5959 ; R600-CHECK-DAG: 24
6060 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
6161 ; R600-CHECK-DAG: 24
62 ; SI-CHECK: @load_v2i8_sext
62 ; SI-CHECK-LABEL: @load_v2i8_sext
6363 ; SI-CHECK: BUFFER_LOAD_SBYTE
6464 ; SI-CHECK: BUFFER_LOAD_SBYTE
6565 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
7070 ret void
7171 }
7272
73 ; R600-CHECK: @load_v4i8
73 ; R600-CHECK-LABEL: @load_v4i8
7474 ; R600-CHECK: VTX_READ_8
7575 ; R600-CHECK: VTX_READ_8
7676 ; R600-CHECK: VTX_READ_8
7777 ; R600-CHECK: VTX_READ_8
78 ; SI-CHECK: @load_v4i8
78 ; SI-CHECK-LABEL: @load_v4i8
7979 ; SI-CHECK: BUFFER_LOAD_UBYTE
8080 ; SI-CHECK: BUFFER_LOAD_UBYTE
8181 ; SI-CHECK: BUFFER_LOAD_UBYTE
8888 ret void
8989 }
9090
91 ; R600-CHECK: @load_v4i8_sext
91 ; R600-CHECK-LABEL: @load_v4i8_sext
9292 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
9393 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
9494 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
109109 ; R600-CHECK-DAG: 24
110110 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
111111 ; R600-CHECK-DAG: 24
112 ; SI-CHECK: @load_v4i8_sext
112 ; SI-CHECK-LABEL: @load_v4i8_sext
113113 ; SI-CHECK: BUFFER_LOAD_SBYTE
114114 ; SI-CHECK: BUFFER_LOAD_SBYTE
115115 ; SI-CHECK: BUFFER_LOAD_SBYTE
123123 }
124124
125125 ; Load an i16 value from the global address space.
126 ; R600-CHECK: @load_i16
126 ; R600-CHECK-LABEL: @load_i16
127127 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
128 ; SI-CHECK: @load_i16
128 ; SI-CHECK-LABEL: @load_i16
129129 ; SI-CHECK: BUFFER_LOAD_USHORT
130130 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
131131 entry:
135135 ret void
136136 }
137137
138 ; R600-CHECK: @load_i16_sext
138 ; R600-CHECK-LABEL: @load_i16_sext
139139 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
140140 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
141141 ; R600-CHECK: 16
142142 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
143143 ; R600-CHECK: 16
144 ; SI-CHECK: @load_i16_sext
144 ; SI-CHECK-LABEL: @load_i16_sext
145145 ; SI-CHECK: BUFFER_LOAD_SSHORT
146146 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
147147 entry:
151151 ret void
152152 }
153153
154 ; R600-CHECK: @load_v2i16
154 ; R600-CHECK-LABEL: @load_v2i16
155155 ; R600-CHECK: VTX_READ_16
156156 ; R600-CHECK: VTX_READ_16
157 ; SI-CHECK: @load_v2i16
157 ; SI-CHECK-LABEL: @load_v2i16
158158 ; SI-CHECK: BUFFER_LOAD_USHORT
159159 ; SI-CHECK: BUFFER_LOAD_USHORT
160160 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
165165 ret void
166166 }
167167
168 ; R600-CHECK: @load_v2i16_sext
168 ; R600-CHECK-LABEL: @load_v2i16_sext
169169 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
170170 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
171171 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
176176 ; R600-CHECK-DAG: 16
177177 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
178178 ; R600-CHECK-DAG: 16
179 ; SI-CHECK: @load_v2i16_sext
179 ; SI-CHECK-LABEL: @load_v2i16_sext
180180 ; SI-CHECK: BUFFER_LOAD_SSHORT
181181 ; SI-CHECK: BUFFER_LOAD_SSHORT
182182 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
187187 ret void
188188 }
189189
190 ; R600-CHECK: @load_v4i16
190 ; R600-CHECK-LABEL: @load_v4i16
191191 ; R600-CHECK: VTX_READ_16
192192 ; R600-CHECK: VTX_READ_16
193193 ; R600-CHECK: VTX_READ_16
194194 ; R600-CHECK: VTX_READ_16
195 ; SI-CHECK: @load_v4i16
195 ; SI-CHECK-LABEL: @load_v4i16
196196 ; SI-CHECK: BUFFER_LOAD_USHORT
197197 ; SI-CHECK: BUFFER_LOAD_USHORT
198198 ; SI-CHECK: BUFFER_LOAD_USHORT
205205 ret void
206206 }
207207
208 ; R600-CHECK: @load_v4i16_sext
208 ; R600-CHECK-LABEL: @load_v4i16_sext
209209 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
210210 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
211211 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
226226 ; R600-CHECK-DAG: 16
227227 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
228228 ; R600-CHECK-DAG: 16
229 ; SI-CHECK: @load_v4i16_sext
229 ; SI-CHECK-LABEL: @load_v4i16_sext
230230 ; SI-CHECK: BUFFER_LOAD_SSHORT
231231 ; SI-CHECK: BUFFER_LOAD_SSHORT
232232 ; SI-CHECK: BUFFER_LOAD_SSHORT
240240 }
241241
242242 ; load an i32 value from the global address space.
243 ; R600-CHECK: @load_i32
243 ; R600-CHECK-LABEL: @load_i32
244244 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
245245
246 ; SI-CHECK: @load_i32
246 ; SI-CHECK-LABEL: @load_i32
247247 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
248248 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
249249 entry:
253253 }
254254
255255 ; load a f32 value from the global address space.
256 ; R600-CHECK: @load_f32
256 ; R600-CHECK-LABEL: @load_f32
257257 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
258258
259 ; SI-CHECK: @load_f32
259 ; SI-CHECK-LABEL: @load_f32
260260 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
261261 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
262262 entry:
266266 }
267267
268268 ; load a v2f32 value from the global address space
269 ; R600-CHECK: @load_v2f32
269 ; R600-CHECK-LABEL: @load_v2f32
270270 ; R600-CHECK: VTX_READ_64
271271
272 ; SI-CHECK: @load_v2f32
272 ; SI-CHECK-LABEL: @load_v2f32
273273 ; SI-CHECK: BUFFER_LOAD_DWORDX2
274274 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
275275 entry:
278278 ret void
279279 }
280280
281 ; R600-CHECK: @load_i64
281 ; R600-CHECK-LABEL: @load_i64
282282 ; R600-CHECK: MEM_RAT
283283 ; R600-CHECK: MEM_RAT
284284
285 ; SI-CHECK: @load_i64
285 ; SI-CHECK-LABEL: @load_i64
286286 ; SI-CHECK: BUFFER_LOAD_DWORDX2
287287 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
288288 entry:
291291 ret void
292292 }
293293
294 ; R600-CHECK: @load_i64_sext
294 ; R600-CHECK-LABEL: @load_i64_sext
295295 ; R600-CHECK: MEM_RAT
296296 ; R600-CHECK: MEM_RAT
297297 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x
298298 ; R600-CHECK: 31
299 ; SI-CHECK: @load_i64_sext
299 ; SI-CHECK-LABEL: @load_i64_sext
300300 ; SI-CHECK: BUFFER_LOAD_DWORDX2 [[VAL:v\[[0-9]:[0-9]\]]]
301301 ; SI-CHECK: V_LSHL_B64 [[LSHL:v\[[0-9]:[0-9]\]]], [[VAL]], 32
302302 ; SI-CHECK: V_ASHR_I64 v{{\[[0-9]:[0-9]\]}}, [[LSHL]], 32
309309 ret void
310310 }
311311
312 ; R600-CHECK: @load_i64_zext
312 ; R600-CHECK-LABEL: @load_i64_zext
313313 ; R600-CHECK: MEM_RAT
314314 ; R600-CHECK: MEM_RAT
315315 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
325325 ;===------------------------------------------------------------------------===;
326326
327327 ; Load a sign-extended i8 value
328 ; R600-CHECK: @load_const_i8_sext
328 ; R600-CHECK-LABEL: @load_const_i8_sext
329329 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
330330 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
331331 ; R600-CHECK: 24
332332 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
333333 ; R600-CHECK: 24
334 ; SI-CHECK: @load_const_i8_sext
334 ; SI-CHECK-LABEL: @load_const_i8_sext
335335 ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
336336 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
337337 entry:
342342 }
343343
344344 ; Load an aligned i8 value
345 ; R600-CHECK: @load_const_i8_aligned
345 ; R600-CHECK-LABEL: @load_const_i8_aligned
346346 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
347 ; SI-CHECK: @load_const_i8_aligned
347 ; SI-CHECK-LABEL: @load_const_i8_aligned
348348 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
349349 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
350350 entry:
355355 }
356356
357357 ; Load an un-aligned i8 value
358 ; R600-CHECK: @load_const_i8_unaligned
358 ; R600-CHECK-LABEL: @load_const_i8_unaligned
359359 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
360 ; SI-CHECK: @load_const_i8_unaligned
360 ; SI-CHECK-LABEL: @load_const_i8_unaligned
361361 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
362362 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
363363 entry:
369369 }
370370
371371 ; Load a sign-extended i16 value
372 ; R600-CHECK: @load_const_i16_sext
372 ; R600-CHECK-LABEL: @load_const_i16_sext
373373 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
374374 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
375375 ; R600-CHECK: 16
376376 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
377377 ; R600-CHECK: 16
378 ; SI-CHECK: @load_const_i16_sext
378 ; SI-CHECK-LABEL: @load_const_i16_sext
379379 ; SI-CHECK: BUFFER_LOAD_SSHORT
380380 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
381381 entry:
386386 }
387387
388388 ; Load an aligned i16 value
389 ; R600-CHECK: @load_const_i16_aligned
389 ; R600-CHECK-LABEL: @load_const_i16_aligned
390390 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
391 ; SI-CHECK: @load_const_i16_aligned
391 ; SI-CHECK-LABEL: @load_const_i16_aligned
392392 ; SI-CHECK: BUFFER_LOAD_USHORT
393393 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
394394 entry:
399399 }
400400
401401 ; Load an un-aligned i16 value
402 ; R600-CHECK: @load_const_i16_unaligned
402 ; R600-CHECK-LABEL: @load_const_i16_unaligned
403403 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
404 ; SI-CHECK: @load_const_i16_unaligned
404 ; SI-CHECK-LABEL: @load_const_i16_unaligned
405405 ; SI-CHECK: BUFFER_LOAD_USHORT
406406 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
407407 entry:
413413 }
414414
415415 ; Load an i32 value from the constant address space.
416 ; R600-CHECK: @load_const_addrspace_i32
416 ; R600-CHECK-LABEL: @load_const_addrspace_i32
417417 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
418418
419 ; SI-CHECK: @load_const_addrspace_i32
419 ; SI-CHECK-LABEL: @load_const_addrspace_i32
420420 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
421421 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
422422 entry:
426426 }
427427
428428 ; Load a f32 value from the constant address space.
429 ; R600-CHECK: @load_const_addrspace_f32
429 ; R600-CHECK-LABEL: @load_const_addrspace_f32
430430 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
431431
432 ; SI-CHECK: @load_const_addrspace_f32
432 ; SI-CHECK-LABEL: @load_const_addrspace_f32
433433 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
434434 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
435435 %1 = load float addrspace(2)* %in
442442 ;===------------------------------------------------------------------------===;
443443
444444 ; Load an i8 value from the local address space.
445 ; R600-CHECK: @load_i8_local
446 ; R600-CHECK: LDS_UBYTE_READ_RET
447 ; SI-CHECK: @load_i8_local
445 ; R600-CHECK-LABEL: @load_i8_local
446 ; R600-CHECK: LDS_UBYTE_READ_RET
447 ; SI-CHECK-LABEL: @load_i8_local
448448 ; SI-CHECK-NOT: S_WQM_B64
449449 ; SI-CHECK: DS_READ_U8
450450 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
454454 ret void
455455 }
456456
457 ; R600-CHECK: @load_i8_sext_local
457 ; R600-CHECK-LABEL: @load_i8_sext_local
458458 ; R600-CHECK: LDS_UBYTE_READ_RET
459459 ; R600-CHECK: ASHR
460 ; SI-CHECK: @load_i8_sext_local
460 ; SI-CHECK-LABEL: @load_i8_sext_local
461461 ; SI-CHECK-NOT: S_WQM_B64
462462 ; SI-CHECK: DS_READ_I8
463463 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
468468 ret void
469469 }
470470
471 ; R600-CHECK: @load_v2i8_local
472 ; R600-CHECK: LDS_UBYTE_READ_RET
473 ; R600-CHECK: LDS_UBYTE_READ_RET
474 ; SI-CHECK: @load_v2i8_local
471 ; R600-CHECK-LABEL: @load_v2i8_local
472 ; R600-CHECK: LDS_UBYTE_READ_RET
473 ; R600-CHECK: LDS_UBYTE_READ_RET
474 ; SI-CHECK-LABEL: @load_v2i8_local
475475 ; SI-CHECK-NOT: S_WQM_B64
476476 ; SI-CHECK: DS_READ_U8
477477 ; SI-CHECK: DS_READ_U8
483483 ret void
484484 }
485485
486 ; R600-CHECK: @load_v2i8_sext_local
486 ; R600-CHECK-LABEL: @load_v2i8_sext_local
487487 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
488488 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
489489 ; R600-CHECK-DAG: ASHR
490490 ; R600-CHECK-DAG: ASHR
491 ; SI-CHECK: @load_v2i8_sext_local
491 ; SI-CHECK-LABEL: @load_v2i8_sext_local
492492 ; SI-CHECK-NOT: S_WQM_B64
493493 ; SI-CHECK: DS_READ_I8
494494 ; SI-CHECK: DS_READ_I8
500500 ret void
501501 }
502502
503 ; R600-CHECK: @load_v4i8_local
504 ; R600-CHECK: LDS_UBYTE_READ_RET
505 ; R600-CHECK: LDS_UBYTE_READ_RET
506 ; R600-CHECK: LDS_UBYTE_READ_RET
507 ; R600-CHECK: LDS_UBYTE_READ_RET
508 ; SI-CHECK: @load_v4i8_local
503 ; R600-CHECK-LABEL: @load_v4i8_local
504 ; R600-CHECK: LDS_UBYTE_READ_RET
505 ; R600-CHECK: LDS_UBYTE_READ_RET
506 ; R600-CHECK: LDS_UBYTE_READ_RET
507 ; R600-CHECK: LDS_UBYTE_READ_RET
508 ; SI-CHECK-LABEL: @load_v4i8_local
509509 ; SI-CHECK-NOT: S_WQM_B64
510510 ; SI-CHECK: DS_READ_U8
511511 ; SI-CHECK: DS_READ_U8
519519 ret void
520520 }
521521
522 ; R600-CHECK: @load_v4i8_sext_local
522 ; R600-CHECK-LABEL: @load_v4i8_sext_local
523523 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
524524 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
525525 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
528528 ; R600-CHECK-DAG: ASHR
529529 ; R600-CHECK-DAG: ASHR
530530 ; R600-CHECK-DAG: ASHR
531 ; SI-CHECK: @load_v4i8_sext_local
531 ; SI-CHECK-LABEL: @load_v4i8_sext_local
532532 ; SI-CHECK-NOT: S_WQM_B64
533533 ; SI-CHECK: DS_READ_I8
534534 ; SI-CHECK: DS_READ_I8
543543 }
544544
545545 ; Load an i16 value from the local address space.
546 ; R600-CHECK: @load_i16_local
547 ; R600-CHECK: LDS_USHORT_READ_RET
548 ; SI-CHECK: @load_i16_local
546 ; R600-CHECK-LABEL: @load_i16_local
547 ; R600-CHECK: LDS_USHORT_READ_RET
548 ; SI-CHECK-LABEL: @load_i16_local
549549 ; SI-CHECK-NOT: S_WQM_B64
550550 ; SI-CHECK: DS_READ_U16
551551 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
556556 ret void
557557 }
558558
559 ; R600-CHECK: @load_i16_sext_local
559 ; R600-CHECK-LABEL: @load_i16_sext_local
560560 ; R600-CHECK: LDS_USHORT_READ_RET
561561 ; R600-CHECK: ASHR
562 ; SI-CHECK: @load_i16_sext_local
562 ; SI-CHECK-LABEL: @load_i16_sext_local
563563 ; SI-CHECK-NOT: S_WQM_B64
564564 ; SI-CHECK: DS_READ_I16
565565 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
570570 ret void
571571 }
572572
573 ; R600-CHECK: @load_v2i16_local
574 ; R600-CHECK: LDS_USHORT_READ_RET
575 ; R600-CHECK: LDS_USHORT_READ_RET
576 ; SI-CHECK: @load_v2i16_local
573 ; R600-CHECK-LABEL: @load_v2i16_local
574 ; R600-CHECK: LDS_USHORT_READ_RET
575 ; R600-CHECK: LDS_USHORT_READ_RET
576 ; SI-CHECK-LABEL: @load_v2i16_local
577577 ; SI-CHECK-NOT: S_WQM_B64
578578 ; SI-CHECK: DS_READ_U16
579579 ; SI-CHECK: DS_READ_U16
585585 ret void
586586 }
587587
588 ; R600-CHECK: @load_v2i16_sext_local
588 ; R600-CHECK-LABEL: @load_v2i16_sext_local
589589 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
590590 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
591591 ; R600-CHECK-DAG: ASHR
592592 ; R600-CHECK-DAG: ASHR
593 ; SI-CHECK: @load_v2i16_sext_local
593 ; SI-CHECK-LABEL: @load_v2i16_sext_local
594594 ; SI-CHECK-NOT: S_WQM_B64
595595 ; SI-CHECK: DS_READ_I16
596596 ; SI-CHECK: DS_READ_I16
602602 ret void
603603 }
604604
605 ; R600-CHECK: @load_v4i16_local
606 ; R600-CHECK: LDS_USHORT_READ_RET
607 ; R600-CHECK: LDS_USHORT_READ_RET
608 ; R600-CHECK: LDS_USHORT_READ_RET
609 ; R600-CHECK: LDS_USHORT_READ_RET
610 ; SI-CHECK: @load_v4i16_local
605 ; R600-CHECK-LABEL: @load_v4i16_local
606 ; R600-CHECK: LDS_USHORT_READ_RET
607 ; R600-CHECK: LDS_USHORT_READ_RET
608 ; R600-CHECK: LDS_USHORT_READ_RET
609 ; R600-CHECK: LDS_USHORT_READ_RET
610 ; SI-CHECK-LABEL: @load_v4i16_local
611611 ; SI-CHECK-NOT: S_WQM_B64
612612 ; SI-CHECK: DS_READ_U16
613613 ; SI-CHECK: DS_READ_U16
621621 ret void
622622 }
623623
624 ; R600-CHECK: @load_v4i16_sext_local
624 ; R600-CHECK-LABEL: @load_v4i16_sext_local
625625 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
626626 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
627627 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
630630 ; R600-CHECK-DAG: ASHR
631631 ; R600-CHECK-DAG: ASHR
632632 ; R600-CHECK-DAG: ASHR
633 ; SI-CHECK: @load_v4i16_sext_local
633 ; SI-CHECK-LABEL: @load_v4i16_sext_local
634634 ; SI-CHECK-NOT: S_WQM_B64
635635 ; SI-CHECK: DS_READ_I16
636636 ; SI-CHECK: DS_READ_I16
645645 }
646646
647647 ; load an i32 value from the glocal address space.
648 ; R600-CHECK: @load_i32_local
648 ; R600-CHECK-LABEL: @load_i32_local
649649 ; R600-CHECK: LDS_READ_RET
650 ; SI-CHECK: @load_i32_local
650 ; SI-CHECK-LABEL: @load_i32_local
651651 ; SI-CHECK-NOT: S_WQM_B64
652652 ; SI-CHECK: DS_READ_B32
653653 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
658658 }
659659
660660 ; load a f32 value from the global address space.
661 ; R600-CHECK: @load_f32_local
661 ; R600-CHECK-LABEL: @load_f32_local
662662 ; R600-CHECK: LDS_READ_RET
663 ; SI-CHECK: @load_f32_local
663 ; SI-CHECK-LABEL: @load_f32_local
664664 ; SI-CHECK: DS_READ_B32
665665 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
666666 entry:
670670 }
671671
672672 ; load a v2f32 value from the local address space
673 ; R600-CHECK: @load_v2f32_local
673 ; R600-CHECK-LABEL: @load_v2f32_local
674674 ; R600-CHECK: LDS_READ_RET
675675 ; R600-CHECK: LDS_READ_RET
676 ; SI-CHECK: @load_v2f32_local
676 ; SI-CHECK-LABEL: @load_v2f32_local
677677 ; SI-CHECK: DS_READ_B32
678678 ; SI-CHECK: DS_READ_B32
679679 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
0 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
11
2 ;CHECK: V_LSHL_B32_e64 v{{[0-9]}}, s{{[0-9]}}, 1
2 ;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
33
44 define void @test(i32 %p) {
55 %i = mul i32 %p, 2
0 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
11
2 ;CHECK: V_LSHR_B32_e64 {{v[0-9]}}, s{{[0-9]}}, 1
2 ;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
33
44 define void @test(i32 %p) {
55 %i = udiv i32 %p, 2
11 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
22 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
33
4 ; EG-CHECK: @u32_mad24
4 ; EG-CHECK-LABEL: @u32_mad24
55 ; EG-CHECK: MULADD_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W, KC0[3].X
6 ; SI-CHECK: @u32_mad24
6 ; SI-CHECK-LABEL: @u32_mad24
77 ; SI-CHECK: V_MAD_U32_U24
88
99 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
1818 ret void
1919 }
2020
21 ; EG-CHECK: @i16_mad24
21 ; EG-CHECK-LABEL: @i16_mad24
2222 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
2323 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
2424 ; EG-CHECK-DAG: VTX_READ_16 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
2929 ; EG-CHECK: 16
3030 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
3131 ; EG-CHECK: 16
32 ; SI-CHECK: @i16_mad24
32 ; SI-CHECK-LABEL: @i16_mad24
3333 ; SI-CHECK: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
3434 ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MAD]]
3535 ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
4343 ret void
4444 }
4545
46 ; EG-CHECK: @i8_mad24
46 ; EG-CHECK-LABEL: @i8_mad24
4747 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
4848 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
4949 ; EG-CHECK-DAG: VTX_READ_8 [[C:T[0-9]\.X]], T{{[0-9]}}.X, 48
5454 ; EG-CHECK: 24
5555 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
5656 ; EG-CHECK: 24
57 ; SI-CHECK: @i8_mad24
57 ; SI-CHECK-LABEL: @i8_mad24
5858 ; SI-CHECK: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
5959 ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
6060 ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
11 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
22 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
33
4 ; EG-CHECK: @u32_mul24
4 ; EG-CHECK-LABEL: @u32_mul24
55 ; EG-CHECK: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
6 ; SI-CHECK: @u32_mul24
6 ; SI-CHECK-LABEL: @u32_mul24
77 ; SI-CHECK: V_MUL_U32_U24
88
99 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
1717 ret void
1818 }
1919
20 ; EG-CHECK: @i16_mul24
20 ; EG-CHECK-LABEL: @i16_mul24
2121 ; EG-CHECK-DAG: VTX_READ_16 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
2222 ; EG-CHECK-DAG: VTX_READ_16 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
2323 ; The order of A and B does not matter.
2727 ; EG-CHECK: 16
2828 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
2929 ; EG-CHECK: 16
30 ; SI-CHECK: @i16_mul24
30 ; SI-CHECK-LABEL: @i16_mul24
3131 ; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
3232 ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 16, [[MUL]]
3333 ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 16, [[LSHL]]
4040 ret void
4141 }
4242
43 ; EG-CHECK: @i8_mul24
43 ; EG-CHECK-LABEL: @i8_mul24
4444 ; EG-CHECK-DAG: VTX_READ_8 [[A:T[0-9]\.X]], T{{[0-9]}}.X, 40
4545 ; EG-CHECK-DAG: VTX_READ_8 [[B:T[0-9]\.X]], T{{[0-9]}}.X, 44
4646 ; The order of A and B does not matter.
5050 ; EG-CHECK: 24
5151 ; EG-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]], literal.x
5252 ; EG-CHECK: 24
53 ; SI-CHECK: @i8_mul24
53 ; SI-CHECK-LABEL: @i8_mul24
5454 ; SI-CHECK: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
5555 ; SI-CHECK: V_LSHLREV_B32_e32 [[LSHL:v[0-9]]], 24, [[MUL]]
5656 ; SI-CHECK: V_ASHRREV_I32_e32 v{{[0-9]}}, 24, [[LSHL]]
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
11 ;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
22
3 ;EG-CHECK: @ashr_v2i32
3 ;EG-CHECK-LABEL: @ashr_v2i32
44 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
7 ;SI-CHECK: @ashr_v2i32
7 ;SI-CHECK-LABEL: @ashr_v2i32
88 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
99 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
1010
1717 ret void
1818 }
1919
20 ;EG-CHECK: @ashr_v4i32
20 ;EG-CHECK-LABEL: @ashr_v4i32
2121 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2222 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2323 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2424 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
26 ;SI-CHECK: @ashr_v4i32
26 ;SI-CHECK-LABEL: @ashr_v4i32
2727 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
2828 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
2929 ;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
3838 ret void
3939 }
4040
41 ;EG-CHECK: @ashr_i64
41 ;EG-CHECK-LABEL: @ashr_i64
4242 ;EG-CHECK: ASHR
4343
44 ;SI-CHECK: @ashr_i64
44 ;SI-CHECK-LABEL: @ashr_i64
4545 ;SI-CHECK: V_ASHR_I64
4646 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
4747 entry:
1818 ; SI-LABEL: @trunc_shl_i64:
1919 ; SI: S_LOAD_DWORDX2
2020 ; SI: S_LOAD_DWORDX2 [[SREG:s\[[0-9]+:[0-9]+\]]]
21 ; SI: V_LSHL_B64 v{{\[}}[[LO_VREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2
21 ; SI: S_LSHL_B64 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, [[SREG]], 2
22 ; SI: MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
2223 ; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
2324 define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
2425 %b = shl i64 %a, 2