llvm.org GIT mirror llvm / bb25a01
R600: Non vector only instruction can be scheduled on trans unit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189980 91177308-0d34-0410-b5e6-96231b3b80d8 Vincent Lejeune 6 years ago
28 changed file(s) with 299 addition(s) and 146 deletion(s). Raw diff Collapse all Expand all
476476 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
477477 const std::vector > &TransOps,
478478 unsigned ConstCount) {
479 // TransALU can't read 3 constants
480 if (ConstCount > 2)
481 return false;
479482 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
480483 const std::pair &Src = TransOps[i];
481484 unsigned Cycle = getTransSwizzle(TransSwz, i);
88 //
99 /// \file
1010 /// \brief R600 Machine Scheduler interface
11 // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
1211 //
1312 //===----------------------------------------------------------------------===//
1413
2827 DAG = dag;
2928 TII = static_cast(DAG->TII);
3029 TRI = static_cast(DAG->TRI);
30 VLIW5 = !DAG->MF.getTarget().getSubtarget().hasCaymanISA();
3131 MRI = &DAG->MRI;
3232 CurInstKind = IDOther;
3333 CurEmitted = 0;
341341 }
342342 }
343343
344 SUnit *R600SchedStrategy::PopInst(std::vector &Q) {
344 SUnit *R600SchedStrategy::PopInst(std::vector &Q, bool AnyALU) {
345345 if (Q.empty())
346346 return NULL;
347347 for (std::vector::reverse_iterator It = Q.rbegin(), E = Q.rend();
348348 It != E; ++It) {
349349 SUnit *SU = *It;
350350 InstructionsGroupCandidate.push_back(SU->getInstr());
351 if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
351 if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
352 && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
353 ) {
352354 InstructionsGroupCandidate.pop_back();
353355 Q.erase((It + 1).base());
354356 return SU;
372374 DEBUG(dbgs() << "New Slot\n");
373375 assert (OccupedSlotsMask && "Slot wasn't filled");
374376 OccupedSlotsMask = 0;
377 // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
378 // OccupedSlotsMask |= 16;
375379 InstructionsGroupCandidate.clear();
376380 LoadAlu();
377381 }
408412 }
409413 }
410414
411 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
415 SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
412416 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
413 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
417 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
414418 if (SlotedSU)
415419 return SlotedSU;
416 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
420 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
417421 if (UnslotedSU)
418422 AssignSlot(UnslotedSU->getInstr(), Slot);
419423 return UnslotedSU;
433437 // Bottom up scheduling : predX must comes first
434438 if (!AvailableAlus[AluPredX].empty()) {
435439 OccupedSlotsMask |= 31;
436 return PopInst(AvailableAlus[AluPredX]);
440 return PopInst(AvailableAlus[AluPredX], false);
437441 }
438442 // Flush physical reg copies (RA will discard them)
439443 if (!AvailableAlus[AluDiscarded].empty()) {
440444 OccupedSlotsMask |= 31;
441 return PopInst(AvailableAlus[AluDiscarded]);
445 return PopInst(AvailableAlus[AluDiscarded], false);
442446 }
443447 // If there is a T_XYZW alu available, use it
444448 if (!AvailableAlus[AluT_XYZW].empty()) {
445449 OccupedSlotsMask |= 15;
446 return PopInst(AvailableAlus[AluT_XYZW]);
450 return PopInst(AvailableAlus[AluT_XYZW], false);
447451 }
448452 }
449453 bool TransSlotOccuped = OccupedSlotsMask & 16;
450 if (!TransSlotOccuped) {
454 if (!TransSlotOccuped && VLIW5) {
451455 if (!AvailableAlus[AluTrans].empty()) {
452456 OccupedSlotsMask |= 16;
453 return PopInst(AvailableAlus[AluTrans]);
457 return PopInst(AvailableAlus[AluTrans], false);
458 }
459 SUnit *SU = AttemptFillSlot(3, true);
460 if (SU) {
461 OccupedSlotsMask |= 16;
462 return SU;
454463 }
455464 }
456465 for (int Chan = 3; Chan > -1; --Chan) {
457466 bool isOccupied = OccupedSlotsMask & (1 << Chan);
458467 if (!isOccupied) {
459 SUnit *SU = AttemptFillSlot(Chan);
468 SUnit *SU = AttemptFillSlot(Chan, false);
460469 if (SU) {
461470 OccupedSlotsMask |= (1 << Chan);
462471 InstructionsGroupCandidate.push_back(SU->getInstr());
8383
8484 private:
8585 std::vector InstructionsGroupCandidate;
86 bool VLIW5;
8687
8788 int getInstKind(SUnit *SU);
8889 bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
8990 AluKind getAluKind(SUnit *SU) const;
9091 void LoadAlu();
9192 unsigned AvailablesAluCount() const;
92 SUnit *AttemptFillSlot (unsigned Slot);
93 SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
9394 void PrepareNextSlot();
94 SUnit *PopInst(std::vector &Q);
95 SUnit *PopInst(std::vector &Q, bool AnyALU);
9596
9697 void AssignSlot(MachineInstr *MI, unsigned Slot);
9798 SUnit* pickAlu();
5757 private:
5858 const R600InstrInfo *TII;
5959 const R600RegisterInfo &TRI;
60 bool VLIW5;
61 bool ConsideredInstUsesAlreadyWrittenVectorElement;
6062
6163 unsigned getSlot(const MachineInstr *MI) const {
6264 return TRI.getHWRegChan(MI->getOperand(0).getReg());
7375 MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
7476 if (I->isBundle())
7577 BI++;
78 int LastDstChan = -1;
7679 do {
80 bool isTrans = false;
81 int BISlot = getSlot(BI);
82 if (LastDstChan >= BISlot)
83 isTrans = true;
84 LastDstChan = BISlot;
7785 if (TII->isPredicated(BI))
7886 continue;
7987 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
8492 continue;
8593 }
8694 unsigned Dst = BI->getOperand(DstIdx).getReg();
87 if (TII->isTransOnly(BI)) {
95 if (isTrans || TII->isTransOnly(BI)) {
8896 Result[Dst] = AMDGPU::PS;
8997 continue;
9098 }
141149 MachineDominatorTree &MDT)
142150 : VLIWPacketizerList(MF, MLI, MDT, true),
143151 TII (static_cast(MF.getTarget().getInstrInfo())),
144 TRI(TII->getRegisterInfo()) { }
152 TRI(TII->getRegisterInfo()) {
153 VLIW5 = !MF.getTarget().getSubtarget().hasCaymanISA();
154 }
145155
146156 // initPacketizerState - initialize some internal flags.
147 void initPacketizerState() { }
157 void initPacketizerState() {
158 ConsideredInstUsesAlreadyWrittenVectorElement = false;
159 }
148160
149161 // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
150162 bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
171183 // together.
172184 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
173185 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
174 if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
175 return false;
186 if (getSlot(MII) == getSlot(MIJ))
187 ConsideredInstUsesAlreadyWrittenVectorElement = true;
176188 // Does MII and MIJ share the same pred_sel ?
177189 int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
178190 OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
210222 std::vector &BS,
211223 bool &isTransSlot) {
212224 isTransSlot = TII->isTransOnly(MI);
225 assert (!isTransSlot || VLIW5);
226
227 // Is the dst reg sequence legal ?
228 if (!isTransSlot && !CurrentPacketMIs.empty()) {
229 if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
230 if (ConsideredInstUsesAlreadyWrittenVectorElement &&
231 !TII->isVectorOnly(MI) && VLIW5) {
232 isTransSlot = true;
233 DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
234 }
235 else
236 return false;
237 }
238 }
213239
214240 // Are the Constants limitations met ?
215241 CurrentPacketMIs.push_back(MI);
277303 return It;
278304 }
279305 endPacket(MI->getParent(), MI);
306 if (TII->isTransOnly(MI))
307 return MI;
280308 return VLIWPacketizerList::addToPacket(MI);
281309 }
282310 };
1818 }
1919
2020 ;EG-CHECK: @test4
21 ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21 ;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
2626 ;SI-CHECK: @test4
2727 ;SI-CHECK: V_AND_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @fadd_f32
4 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
4 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
55 ; SI-CHECK: @fadd_f32
66 ; SI-CHECK: V_ADD_F32
77 define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
1212 }
1313
1414 ; R600-CHECK: @fadd_v2f32
15 ; R600-CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
16 ; R600-CHECK-DAG: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
15 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
16 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
1717 ; SI-CHECK: @fadd_v2f32
1818 ; SI-CHECK: V_ADD_F32
1919 ; SI-CHECK: V_ADD_F32
2525 }
2626
2727 ; R600-CHECK: @fadd_v4f32
28 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
30 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
31 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
28 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
30 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
31 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3232 ; SI-CHECK: @fadd_v4f32
3333 ; SI-CHECK: V_ADD_F32
3434 ; SI-CHECK: V_ADD_F32
11
22 ;Not checking arguments 2 and 3 to CNDE, because they may change between
33 ;registers and literal.x depending on what the optimizer does.
4 ;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4 ;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
55
66 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
77 entry:
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
22 ; CHECK: @fcmp_sext
3 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3 ; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
44
55 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
66 entry:
77 ; R600-CHECK: @fdiv_v2f32
88 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
99 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
10 ; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
11 ; R600-CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
10 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
11 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
1212 ; SI-CHECK: @fdiv_v2f32
1313 ; SI-CHECK-DAG: V_RCP_F32
1414 ; SI-CHECK-DAG: V_MUL_F32
2626 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2727 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2828 ; R600-CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
29 ; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
30 ; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
31 ; R600-CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
32 ; R600-CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
29 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
30 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
31 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
32 ; R600-CHECK-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
3333 ; SI-CHECK: @fdiv_v4f32
3434 ; SI-CHECK-DAG: V_RCP_F32
3535 ; SI-CHECK-DAG: V_MUL_F32
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @fmul_f32
4 ; R600-CHECK: MUL_IEEE * {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
4 ; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
55 ; SI-CHECK: @fmul_f32
66 ; SI-CHECK: V_MUL_F32
77 define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
1616 declare void @llvm.AMDGPU.store.output(float, i32)
1717
1818 ; R600-CHECK: @fmul_v2f32
19 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
20 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
19 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
20 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
2121 ; SI-CHECK: @fmul_v2f32
2222 ; SI-CHECK: V_MUL_F32
2323 ; SI-CHECK: V_MUL_F32
2929 }
3030
3131 ; R600-CHECK: @fmul_v4f32
32 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
34 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
35 ; R600-CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
32 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
33 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
34 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
35 ; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
3636 ; SI-CHECK: @fmul_v4f32
3737 ; SI-CHECK: V_MUL_F32
3838 ; SI-CHECK: V_MUL_F32
1111
1212 ; CHECK: @fneg_v4
1313 ; CHECK: -PV
14 ; CHECK: -PV
14 ; CHECK: -T
1515 ; CHECK: -PV
1616 ; CHECK: -PV
1717 define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @fp_to_sint_v2i32
4 ; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; R600-CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
4 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
66 ; SI-CHECK: @fp_to_sint_v2i32
77 ; SI-CHECK: V_CVT_I32_F32_e32
88 ; SI-CHECK: V_CVT_I32_F32_e32
1313 }
1414
1515 ; R600-CHECK: @fp_to_sint_v4i32
16 ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
17 ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
18 ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
19 ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
16 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
17 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
18 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
19 ; R600-CHECK: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
2020 ; SI-CHECK: @fp_to_sint_v4i32
2121 ; SI-CHECK: V_CVT_I32_F32_e32
2222 ; SI-CHECK: V_CVT_I32_F32_e32
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @fp_to_uint_v2i32
4 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
4 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
5 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66 ; SI-CHECK: @fp_to_uint_v2i32
77 ; SI-CHECK: V_CVT_U32_F32_e32
88 ; SI-CHECK: V_CVT_U32_F32_e32
1414 }
1515
1616 ; R600-CHECK: @fp_to_uint_v4i32
17 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
18 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
19 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
20 ; R600-CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
17 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
18 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
19 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
20 ; R600-CHECK: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
2121 ; SI-CHECK: @fp_to_uint_v4i32
2222 ; SI-CHECK: V_CVT_U32_F32_e32
2323 ; SI-CHECK: V_CVT_U32_F32_e32
11 ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
22
33 ; R600-CHECK: @fsub_f32
4 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
4 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
55 ; SI-CHECK: @fsub_f32
66 ; SI-CHECK: V_SUB_F32
77 define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) {
1616 declare void @llvm.AMDGPU.store.output(float, i32)
1717
1818 ; R600-CHECK: @fsub_v2f32
19 ; R600-CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
20 ; R600-CHECK-DAG: ADD * T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
19 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
20 ; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
2121 ; SI-CHECK: @fsub_v2f32
2222 ; SI-CHECK: V_SUB_F32
2323 ; SI-CHECK: V_SUB_F32
2929 }
3030
3131 ; R600-CHECK: @fsub_v4f32
32 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
33 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
34 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
35 ; R600-CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
32 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
33 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
34 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
35 ; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
3636 ; SI-CHECK: @fsub_v4f32
3737 ; SI-CHECK: V_SUB_F32
3838 ; SI-CHECK: V_SUB_F32
22 ;Test that a select with reversed True/False values is correctly lowered
33 ;to a SETNE_INT. There should only be one SETNE_INT instruction.
44
5 ;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66 ;CHECK-NOT: SETNE_INT
77
88 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
66 ; ADD_INT literal.x KC0[2].Z, 5
77
88 ; CHECK: @i32_literal
9 ; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
9 ; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
10 ; CHECK-NEXT: LSHR
1011 ; CHECK-NEXT: 5
1112 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
1213 entry:
2223 ; ADD literal.x KC0[2].Z, 5.0
2324
2425 ; CHECK: @float_literal
25 ; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
26 ; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
27 ; CHECK-NEXT: LSHR
2628 ; CHECK-NEXT: 1084227584(5.0
2729 define void @float_literal(float addrspace(1)* %out, float %in) {
2830 entry:
3335
3436 ; Make sure inline literals are folded into REG_SEQUENCE instructions.
3537 ; CHECK: @inline_literal_reg_sequence
36 ; CHECK: MOV T[[GPR:[0-9]]].X, 0.0
37 ; CHECK-NEXT: MOV T[[GPR]].Y, 0.0
38 ; CHECK-NEXT: MOV T[[GPR]].Z, 0.0
39 ; CHECK-NEXT: MOV * T[[GPR]].W, 0.0
38 ; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
39 ; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
40 ; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
41 ; CHECK-NEXT: MOV {{\** *}}T[[GPR]].W, 0.0
4042
4143 define void @inline_literal_reg_sequence(<4 x i32> addrspace(1)* %out) {
4244 entry:
11 ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
22
33 ; R600-CHECK: @amdgpu_trunc
4 ; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
4 ; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
55 ; SI-CHECK: @amdgpu_trunc
66 ; SI-CHECK: V_TRUNC_F32
77
1818 ; R600-CHECK: @rotl
1919 ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
2020 ; R600-CHECK-NEXT: 32
21 ; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
21 ; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
22
2223
2324 ; SI-CHECK: @rotl
2425 ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
22 ;CHECK-NOT: SETE
3 ;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
4 ;CHECK-NEXT: {{[-0-9]+\(2.0}}
3 ;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
4 ;CHECK: 1073741824
55 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
66 %1 = load float addrspace(1)* %in
77 %2 = fcmp oeq float %1, 0.0
0 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
11
22 ;CHECK-NOT: SETE_INT
3 ;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
3 ;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
44 ;CHECK-NEXT: 2
55 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
66 %1 = load i32 addrspace(1)* %in
44 ; SET*DX10 instructions.
55
66 ; CHECK: @fcmp_une_select_fptosi
7 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
7 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
8 ; CHECK-NEXT: LSHR
89 ; CHECK-NEXT: 1084227584(5.000000e+00)
910 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
1011 entry:
1718 }
1819
1920 ; CHECK: @fcmp_une_select_i32
20 ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
21 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
22 ; CHECK-NEXT: LSHR
2123 ; CHECK-NEXT: 1084227584(5.000000e+00)
2224 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
2325 entry:
2830 }
2931
3032 ; CHECK: @fcmp_ueq_select_fptosi
31 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
33 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
34 ; CHECK-NEXT: LSHR
3235 ; CHECK-NEXT: 1084227584(5.000000e+00)
3336 define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
3437 entry:
4144 }
4245
4346 ; CHECK: @fcmp_ueq_select_i32
44 ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
47 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
48 ; CHECK-NEXT: LSHR
4549 ; CHECK-NEXT: 1084227584(5.000000e+00)
4650 define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
4751 entry:
5256 }
5357
5458 ; CHECK: @fcmp_ugt_select_fptosi
55 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
59 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
60 ; CHECK-NEXT: LSHR
5661 ; CHECK-NEXT: 1084227584(5.000000e+00)
5762 define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
5863 entry:
6570 }
6671
6772 ; CHECK: @fcmp_ugt_select_i32
68 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
73 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
74 ; CHECK-NEXT: LSHR
6975 ; CHECK-NEXT: 1084227584(5.000000e+00)
7076 define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
7177 entry:
7682 }
7783
7884 ; CHECK: @fcmp_uge_select_fptosi
79 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
85 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
86 ; CHECK-NEXT: LSHR
8087 ; CHECK-NEXT: 1084227584(5.000000e+00)
8188 define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
8289 entry:
8996 }
9097
9198 ; CHECK: @fcmp_uge_select_i32
92 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
99 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
100 ; CHECK-NEXT: LSHR
93101 ; CHECK-NEXT: 1084227584(5.000000e+00)
94102 define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
95103 entry:
100108 }
101109
102110 ; CHECK: @fcmp_ule_select_fptosi
103 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
111 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
112 ; CHECK-NEXT: LSHR
104113 ; CHECK-NEXT: 1084227584(5.000000e+00)
105114 define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
106115 entry:
113122 }
114123
115124 ; CHECK: @fcmp_ule_select_i32
116 ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
125 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
126 ; CHECK-NEXT: LSHR
117127 ; CHECK-NEXT: 1084227584(5.000000e+00)
118128 define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
119129 entry:
124134 }
125135
126136 ; CHECK: @fcmp_ult_select_fptosi
127 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
137 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
138 ; CHECK-NEXT: LSHR
128139 ; CHECK-NEXT: 1084227584(5.000000e+00)
129140 define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
130141 entry:
137148 }
138149
139150 ; CHECK: @fcmp_ult_select_i32
140 ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
151 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
152 ; CHECK-NEXT: LSHR
141153 ; CHECK-NEXT: 1084227584(5.000000e+00)
142154 define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
143155 entry:
99 ; EG-CHECK: @store_i8
1010 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
1111 ; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
12 ; IG 0: Get the byte index
13 ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
12 ; IG 0: Get the byte index and truncate the value
13 ; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
14 ; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
15 ; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
16 ; IG 1: Truncate the calculated the shift amount for the mask
17 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
1418 ; EG-CHECK-NEXT: 3
15 ; IG 1: Truncate the value and calculated the shift amount for the mask
16 ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x
17 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y
18 ; EG-CHECK: 255(3.573311e-43), 3
1919 ; IG 2: Shift the value and the mask
20 ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
20 ; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
2121 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
2222 ; EG-CHECK-NEXT: 255
2323 ; IG 3: Initialize the Y and Z channels to zero
3838 ; EG-CHECK: @store_i16
3939 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
4040 ; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
41 ; IG 0: Get the byte index
42 ; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
43 ; EG-CHECK-NEXT: 3
44 ; IG 1: Truncate the value and calculated the shift amount for the mask
45 ; EG-CHECK: AND_INT T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.x
46 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.y
47 ; EG-CHECK: 65535(9.183409e-41), 3
41 ; IG 0: Get the byte index and truncate the value
42 ; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
43 ; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
44 ; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
45 ; IG 1: Truncate the calculated the shift amount for the mask
46 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
47 ; EG-CHECK: 3
4848 ; IG 2: Shift the value and the mask
49 ; EG-CHECK: LSHL T[[RW_GPR]].X, PV.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
49 ; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
5050 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
5151 ; EG-CHECK-NEXT: 65535
5252 ; IG 3: Initialize the Y and Z channels to zero
11 ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
22
33 ;EG-CHECK: @test2
4 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
77 ;SI-CHECK: @test2
88 ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
1818 }
1919
2020 ;EG-CHECK: @test4
21 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
2626 ;SI-CHECK: @test4
2727 ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
22 ; These tests are for condition codes that are not supported by the hardware
33
44 ; CHECK: @slt
5 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
5 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
6 ; CHECK-NEXT: LSHR
67 ; CHECK-NEXT: 5(7.006492e-45)
78 define void @slt(i32 addrspace(1)* %out, i32 %in) {
89 entry:
1314 }
1415
1516 ; CHECK: @ult_i32
16 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
17 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
18 ; CHECK-NEXT: LSHR
1719 ; CHECK-NEXT: 5(7.006492e-45)
1820 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
1921 entry:
2426 }
2527
2628 ; CHECK: @ult_float
27 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
29 ; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
30 ; CHECK-NEXT: LSHR
2831 ; CHECK-NEXT: 1084227584(5.000000e+00)
2932 define void @ult_float(float addrspace(1)* %out, float %in) {
3033 entry:
3538 }
3639
3740 ; CHECK: @olt
38 ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
41 ; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
42 ; CHECK-NEXT: LSHR
3943 ;CHECK-NEXT: 1084227584(5.000000e+00)
4044 define void @olt(float addrspace(1)* %out, float %in) {
4145 entry:
4650 }
4751
4852 ; CHECK: @sle
49 ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
53 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
54 ; CHECK-NEXT: LSHR
5055 ; CHECK-NEXT: 6(8.407791e-45)
5156 define void @sle(i32 addrspace(1)* %out, i32 %in) {
5257 entry:
5762 }
5863
5964 ; CHECK: @ule_i32
60 ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
65 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
66 ; CHECK-NEXT: LSHR
6167 ; CHECK-NEXT: 6(8.407791e-45)
6268 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
6369 entry:
6874 }
6975
7076 ; CHECK: @ule_float
71 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
77 ; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
78 ; CHECK-NEXT: LSHR
7279 ; CHECK-NEXT: 1084227584(5.000000e+00)
7380 define void @ule_float(float addrspace(1)* %out, float %in) {
7481 entry:
7986 }
8087
8188 ; CHECK: @ole
82 ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
89 ; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
90 ; CHECK-NEXT: LSHR
8391 ; CHECK-NEXT:1084227584(5.000000e+00)
8492 define void @ole(float addrspace(1)* %out, float %in) {
8593 entry:
11 ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
22
33 ;EG-CHECK: @test_select_v2i32
4 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
77 ;SI-CHECK: @test_select_v2i32
88 ;SI-CHECK: V_CNDMASK_B32_e64
1919 }
2020
2121 ;EG-CHECK: @test_select_v2f32
22 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2424
2525 ;SI-CHECK: @test_select_v2f32
2626 ;SI-CHECK: V_CNDMASK_B32_e64
3737 }
3838
3939 ;EG-CHECK: @test_select_v4i32
40 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
41 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
42 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
43 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
40 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
41 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
42 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
43 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4444
4545 ;SI-CHECK: @test_select_v4i32
4646 ;SI-CHECK: V_CNDMASK_B32_e64
5959 }
6060
6161 ;EG-CHECK: @test_select_v4f32
62 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
63 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
64 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
65 ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
62 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
63 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
64 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
65 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
6666
6767 define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
6868 entry:
22
33 ; R600-CHECK: @ngroups_x
44 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
5 ; R600-CHECK: MOV * [[VAL]], KC0[0].X
5 ; R600-CHECK: MOV [[VAL]], KC0[0].X
66 ; SI-CHECK: @ngroups_x
77 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
88 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
1616
1717 ; R600-CHECK: @ngroups_y
1818 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
19 ; R600-CHECK: MOV * [[VAL]], KC0[0].Y
19 ; R600-CHECK: MOV [[VAL]], KC0[0].Y
2020 ; SI-CHECK: @ngroups_y
2121 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
2222 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
3030
3131 ; R600-CHECK: @ngroups_z
3232 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
33 ; R600-CHECK: MOV * [[VAL]], KC0[0].Z
33 ; R600-CHECK: MOV [[VAL]], KC0[0].Z
3434 ; SI-CHECK: @ngroups_z
3535 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
3636 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
4444
4545 ; R600-CHECK: @global_size_x
4646 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
47 ; R600-CHECK: MOV * [[VAL]], KC0[0].W
47 ; R600-CHECK: MOV [[VAL]], KC0[0].W
4848 ; SI-CHECK: @global_size_x
4949 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
5050 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
5858
5959 ; R600-CHECK: @global_size_y
6060 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
61 ; R600-CHECK: MOV * [[VAL]], KC0[1].X
61 ; R600-CHECK: MOV [[VAL]], KC0[1].X
6262 ; SI-CHECK: @global_size_y
6363 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
6464 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
7272
7373 ; R600-CHECK: @global_size_z
7474 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
75 ; R600-CHECK: MOV * [[VAL]], KC0[1].Y
75 ; R600-CHECK: MOV [[VAL]], KC0[1].Y
7676 ; SI-CHECK: @global_size_z
7777 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
7878 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
8686
8787 ; R600-CHECK: @local_size_x
8888 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89 ; R600-CHECK: MOV * [[VAL]], KC0[1].Z
89 ; R600-CHECK: MOV [[VAL]], KC0[1].Z
9090 ; SI-CHECK: @local_size_x
9191 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
9292 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
100100
101101 ; R600-CHECK: @local_size_y
102102 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
103 ; R600-CHECK: MOV * [[VAL]], KC0[1].W
103 ; R600-CHECK: MOV [[VAL]], KC0[1].W
104104 ; SI-CHECK: @local_size_y
105105 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
106106 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
114114
115115 ; R600-CHECK: @local_size_z
116116 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
117 ; R600-CHECK: MOV * [[VAL]], KC0[2].X
117 ; R600-CHECK: MOV [[VAL]], KC0[2].X
118118 ; SI-CHECK: @local_size_z
119119 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
120120 ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
0 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
1
2 ; We want all MULLO_INT inst to be last in their instruction group
3 ;CHECK: @fill3d
4 ;CHECK-NOT: MULLO_INT T[0-9]+
5
6 ; ModuleID = 'radeon'
7 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
8 target triple = "r600--"
9
10 ; Function Attrs: nounwind
11 define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
12 entry:
13 %x.i = tail call i32 @llvm.r600.read.global.size.x() #1
14 %y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
15 %mul = mul i32 %y.i18, %x.i
16 %z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
17 %mul3 = mul i32 %mul, %z.i17
18 %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
19 %x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
20 %mul26.i = mul i32 %x.i12.i, %x.i.i
21 %x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
22 %add.i16 = add i32 %x.i4.i, %mul26.i
23 %mul7 = mul i32 %add.i16, %y.i18
24 %y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
25 %y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
26 %mul30.i = mul i32 %y.i14.i, %y.i.i
27 %y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
28 %add.i14 = add i32 %mul30.i, %mul7
29 %mul819 = add i32 %add.i14, %y.i6.i
30 %add = mul i32 %mul819, %z.i17
31 %z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
32 %z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
33 %mul33.i = mul i32 %z.i16.i, %z.i.i
34 %z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
35 %add.i = add i32 %z.i8.i, %mul33.i
36 %add13 = add i32 %add.i, %add
37 %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
38 store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4, !tbaa !3
39 ret void
40 }
41
42 ; Function Attrs: nounwind readnone
43 declare i32 @llvm.r600.read.tgid.x() #1
44
45 ; Function Attrs: nounwind readnone
46 declare i32 @llvm.r600.read.tgid.y() #1
47
48 ; Function Attrs: nounwind readnone
49 declare i32 @llvm.r600.read.tgid.z() #1
50
51 ; Function Attrs: nounwind readnone
52 declare i32 @llvm.r600.read.local.size.x() #1
53
54 ; Function Attrs: nounwind readnone
55 declare i32 @llvm.r600.read.local.size.y() #1
56
57 ; Function Attrs: nounwind readnone
58 declare i32 @llvm.r600.read.local.size.z() #1
59
60 ; Function Attrs: nounwind readnone
61 declare i32 @llvm.r600.read.tidig.x() #1
62
63 ; Function Attrs: nounwind readnone
64 declare i32 @llvm.r600.read.tidig.y() #1
65
66 ; Function Attrs: nounwind readnone
67 declare i32 @llvm.r600.read.tidig.z() #1
68
69 ; Function Attrs: nounwind readnone
70 declare i32 @llvm.r600.read.global.size.x() #1
71
72 ; Function Attrs: nounwind readnone
73 declare i32 @llvm.r600.read.global.size.y() #1
74
75 ; Function Attrs: nounwind readnone
76 declare i32 @llvm.r600.read.global.size.z() #1
77
78 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
79 attributes #1 = { nounwind readnone }
80
81 !opencl.kernels = !{!0, !1, !2}
82
83 !0 = metadata !{null}
84 !1 = metadata !{null}
85 !2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
86 !3 = metadata !{metadata !"int", metadata !4}
87 !4 = metadata !{metadata !"omnipotent char", metadata !5}
88 !5 = metadata !{metadata !"Simple C/C++ TBAA"}
11 ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
22
33 ;EG-CHECK: @xor_v2i32
4 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
4 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
5 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
66
77 ;SI-CHECK: @xor_v2i32
88 ;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
1818 }
1919
2020 ;EG-CHECK: @xor_v4i32
21 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
21 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
22 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
23 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
24 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
2525
2626 ;SI-CHECK: @xor_v4i32
2727 ;SI-CHECK: V_XOR_B32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
3838 }
3939
4040 ;EG-CHECK: @xor_i1
41 ;EG-CHECK: XOR_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PV\.[XYZW]}}
41 ;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
4242
4343 ;SI-CHECK: @xor_i1
4444 ;SI-CHECK: S_XOR_B64 {{SGPR[0-9]+_SGPR[0-9]+, SGPR[0-9]+_SGPR[0-9]+, SGPR[0-9]+_SGPR[0-9]+}}