llvm.org GIT mirror llvm / 033f871
Revert "ScheduleDAGInstrs: Rework schedule graph builder." This works mostly fine but breaks some stage 1 builders when compiling compiler-rt on i386. Revert for further investigation as I can't see an obvious cause/fix. This reverts commit r254577. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254586 91177308-0d34-0410-b5e6-96231b3b80d8 Matthias Braun 4 years ago
16 changed file(s) with 160 addition(s) and 281 deletion(s). Raw diff Collapse all Expand all
3232 /// An individual mapping from virtual register number to SUnit.
3333 struct VReg2SUnit {
3434 unsigned VirtReg;
35 LaneBitmask LaneMask;
3635 SUnit *SU;
3736
38 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
39 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
37 VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
4038
4139 unsigned getSparseSetIndex() const {
4240 return TargetRegisterInfo::virtReg2Index(VirtReg);
4341 }
44 };
45
46 /// Mapping from virtual register to SUnit including an operand index.
47 struct VReg2SUnitOperIdx : public VReg2SUnit {
48 unsigned OperandIndex;
49
50 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
51 unsigned OperandIndex, SUnit *SU)
52 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
5342 };
5443
5544 /// Record a physical register access.
7968 /// Track local uses of virtual registers. These uses are gathered by the DAG
8069 /// builder and may be consulted by the scheduler to avoid iterating an entire
8170 /// vreg use list.
82 typedef SparseMultiSet VReg2SUnitMultiMap;
83
84 typedef SparseMultiSet
85 VReg2SUnitOperIdxMultiMap;
71 typedef SparseMultiSet VReg2UseMap;
8672
8773 /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
8874 /// MachineInstrs.
10894 /// it has taken responsibility for scheduling the terminator correctly.
10995 bool CanHandleTerminators;
11096
111 /// Whether lane masks should get tracked.
112 bool TrackLaneMasks;
113
11497 /// State specific to the current scheduling region.
11598 /// ------------------------------------------------
11699
133116 /// After calling BuildSchedGraph, each vreg used in the scheduling region
134117 /// is mapped to a set of SUnits. These include all local vreg uses, not
135118 /// just the uses for a singly defined vreg.
136 VReg2SUnitMultiMap VRegUses;
119 VReg2UseMap VRegUses;
137120
138121 /// State internal to DAG building.
139122 /// -------------------------------
145128 Reg2SUnitsMap Defs;
146129 Reg2SUnitsMap Uses;
147130
148 /// Tracks the last instruction(s) in this region defining each virtual
149 /// register. There may be multiple current definitions for a register with
150 /// disjunct lanemasks.
151 VReg2SUnitMultiMap CurrentVRegDefs;
152 /// Tracks the last instructions in this region using each virtual register.
153 VReg2SUnitOperIdxMultiMap CurrentVRegUses;
131 /// Track the last instruction in this region defining each virtual register.
132 VReg2SUnitMap VRegDefs;
154133
155134 /// PendingLoads - Remember where unknown loads are after the most recent
156135 /// unknown store, as we iterate. As with Defs and Uses, this is here
220199 /// input.
221200 void buildSchedGraph(AliasAnalysis *AA,
222201 RegPressureTracker *RPTracker = nullptr,
223 PressureDiffs *PDiffs = nullptr,
224 bool TrackLaneMasks = false);
202 PressureDiffs *PDiffs = nullptr);
225203
226204 /// addSchedBarrierDeps - Add dependencies from instructions in the current
227205 /// list of instructions being scheduled to scheduling barrier. We want to
268246 /// Other adjustments may be made to the instruction if necessary. Return
269247 /// true if the operand has been deleted, false if not.
270248 bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
271
272 /// Returns a mask for which lanes get read/written by the given (register)
273 /// machine operand.
274 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
275
276 void collectVRegUses(SUnit *SU);
277249 };
278250
279251 /// newSUnit - Creates a new SUnit and return a ptr to it.
1212 //===----------------------------------------------------------------------===//
1313
1414 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
15 #include "llvm/ADT/IntEqClasses.h"
1615 #include "llvm/ADT/MapVector.h"
1716 #include "llvm/ADT/SmallPtrSet.h"
1817 #include "llvm/ADT/SmallSet.h"
1918 #include "llvm/Analysis/AliasAnalysis.h"
2019 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
2121 #include "llvm/CodeGen/MachineFunctionPass.h"
2222 #include "llvm/CodeGen/MachineFrameInfo.h"
2323 #include "llvm/CodeGen/MachineInstrBuilder.h"
5454 bool RemoveKillFlags)
5555 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS),
5656 RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
57 TrackLaneMasks(false), FirstDbgValue(nullptr) {
57 FirstDbgValue(nullptr) {
5858 DbgValues.clear();
5959
6060 const TargetSubtargetInfo &ST = mf.getSubtarget();
362362 }
363363 }
364364
365 LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
366 {
367 unsigned Reg = MO.getReg();
368 // No point in tracking lanemasks if we don't have interesting subregisters.
369 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
370 if (!RC.HasDisjunctSubRegs)
371 return ~0u;
372
373 unsigned SubReg = MO.getSubReg();
374 if (SubReg == 0)
375 return RC.getLaneMask();
376 return TRI->getSubRegIndexLaneMask(SubReg);
377 }
378
379365 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
380366 /// to instructions that occur later in the same scheduling region if they read
381367 /// from or write to the virtual register defined at OperIdx.
383369 /// TODO: Hoist loop induction variable increments. This has to be
384370 /// reevaluated. Generally, IV scheduling should be done before coalescing.
385371 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
386 MachineInstr *MI = SU->getInstr();
387 MachineOperand &MO = MI->getOperand(OperIdx);
388 unsigned Reg = MO.getReg();
389
390 LaneBitmask DefLaneMask;
391 LaneBitmask KillLaneMask;
392 if (TrackLaneMasks) {
393 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
394 DefLaneMask = getLaneMaskForMO(MO);
395 // If we have a flag, none of the lane values comes from an
396 // earlier instruction.
397 KillLaneMask = IsKill ? ~0u : DefLaneMask;
398
399 // Clear undef flag, we'll re-add it later once we know which subregister
400 // Def is first.
401 MO.setIsUndef(false);
402 } else {
403 DefLaneMask = ~0u;
404 KillLaneMask = ~0u;
405 }
406
407 if (MO.isDead()) {
408 assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
409 "Dead defs should have no uses");
410 } else {
411 // Add data dependence to all uses we found so far.
412 const TargetSubtargetInfo &ST = MF.getSubtarget();
413 for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
414 E = CurrentVRegUses.end(); I != E; /*empty*/) {
415 LaneBitmask LaneMask = I->LaneMask;
416 // Ignore uses of other lanes.
417 if ((LaneMask & KillLaneMask) == 0) {
418 ++I;
419 continue;
420 }
421
422 if ((LaneMask & DefLaneMask) != 0) {
423 SUnit *UseSU = I->SU;
424 MachineInstr *Use = UseSU->getInstr();
425 SDep Dep(SU, SDep::Data, Reg);
426 Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
427 I->OperandIndex));
428 ST.adjustSchedDependency(SU, UseSU, Dep);
429 UseSU->addPred(Dep);
430 }
431
432 LaneMask &= ~KillLaneMask;
433 // If we found a Def for all lanes of this use, remove it from the list.
434 if (LaneMask != 0) {
435 I->LaneMask = LaneMask;
436 ++I;
437 } else
438 I = CurrentVRegUses.erase(I);
439 }
440 }
441
442 // Shortcut: Singly defined vregs do not have output/anti dependencies.
372 const MachineInstr *MI = SU->getInstr();
373 unsigned Reg = MI->getOperand(OperIdx).getReg();
374
375 // Singly defined vregs do not have output/anti dependencies.
376 // The current operand is a def, so we have at least one.
377 // Check here if there are any others...
443378 if (MRI.hasOneDef(Reg))
444379 return;
445380
446 // Add output dependence to the next nearest defs of this vreg.
381 // Add output dependence to the next nearest def of this vreg.
447382 //
448383 // Unless this definition is dead, the output dependence should be
449384 // transitively redundant with antidependencies from this definition's
450385 // uses. We're conservative for now until we have a way to guarantee the uses
451386 // are not eliminated sometime during scheduling. The output dependence edge
452387 // is also useful if output latency exceeds def-use latency.
453 LaneBitmask LaneMask = DefLaneMask;
454 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
455 CurrentVRegDefs.end())) {
456 // Ignore defs for other lanes.
457 if ((V2SU.LaneMask & LaneMask) == 0)
458 continue;
459 // Add an output dependence.
460 SUnit *DefSU = V2SU.SU;
461 // Ignore additional defs of the same lanes in one instruction. This can
462 // happen because lanemasks are shared for targets with too many
463 // subregisters. We also use some representration tricks/hacks where we
464 // add super-register defs/uses, to imply that although we only access parts
465 // of the reg we care about the full one.
466 if (DefSU == SU)
467 continue;
468 SDep Dep(SU, SDep::Output, Reg);
469 Dep.setLatency(
470 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
471 DefSU->addPred(Dep);
472
473 // Update current definition. This can get tricky if the def was about a
474 // bigger lanemask before. We then have to shrink it and create a new
475 // VReg2SUnit for the non-overlapping part.
476 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
477 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
478 if (NonOverlapMask != 0)
479 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
480 V2SU.SU = SU;
481 V2SU.LaneMask = OverlapMask;
482 }
483 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
484 if (LaneMask != 0)
485 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
388 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
389 if (DefI == VRegDefs.end())
390 VRegDefs.insert(VReg2SUnit(Reg, SU));
391 else {
392 SUnit *DefSU = DefI->SU;
393 if (DefSU != SU && DefSU != &ExitSU) {
394 SDep Dep(SU, SDep::Output, Reg);
395 Dep.setLatency(
396 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
397 DefSU->addPred(Dep);
398 }
399 DefI->SU = SU;
400 }
486401 }
487402
488403 /// addVRegUseDeps - Add a register data dependency if the instruction that
492407 ///
493408 /// TODO: Handle ExitSU "uses" properly.
494409 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
495 const MachineInstr *MI = SU->getInstr();
496 const MachineOperand &MO = MI->getOperand(OperIdx);
497 unsigned Reg = MO.getReg();
498
499 // Remember the use. Data dependencies will be added when we find the def.
500 LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
501 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
502
503 // Add antidependences to the following defs of the vreg.
504 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
505 CurrentVRegDefs.end())) {
506 // Ignore defs for unrelated lanes.
507 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
508 if ((PrevDefLaneMask & LaneMask) == 0)
509 continue;
510 if (V2SU.SU == SU)
511 continue;
512
513 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
514 }
410 MachineInstr *MI = SU->getInstr();
411 unsigned Reg = MI->getOperand(OperIdx).getReg();
412
413 // Record this local VReg use.
414 VReg2UseMap::iterator UI = VRegUses.find(Reg);
415 for (; UI != VRegUses.end(); ++UI) {
416 if (UI->SU == SU)
417 break;
418 }
419 if (UI == VRegUses.end())
420 VRegUses.insert(VReg2SUnit(Reg, SU));
421
422 // Lookup this operand's reaching definition.
423 assert(LIS && "vreg dependencies requires LiveIntervals");
424 LiveQueryResult LRQ
425 = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
426 VNInfo *VNI = LRQ.valueIn();
427
428 // VNI will be valid because MachineOperand::readsReg() is checked by caller.
429 assert(VNI && "No value to read by operand");
430 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
431 // Phis and other noninstructions (after coalescing) have a NULL Def.
432 if (Def) {
433 SUnit *DefSU = getSUnit(Def);
434 if (DefSU) {
435 // The reaching Def lives within this scheduling region.
436 // Create a data dependence.
437 SDep dep(DefSU, SDep::Data, Reg);
438 // Adjust the dependence latency using operand def/use information, then
439 // allow the target to perform its own adjustments.
440 int DefOp = Def->findRegisterDefOperandIdx(Reg);
441 dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
442
443 const TargetSubtargetInfo &ST = MF.getSubtarget();
444 ST.adjustSchedDependency(DefSU, SU, const_cast(dep));
445 SU->addPred(dep);
446 }
447 }
448
449 // Add antidependence to the following def of the vreg it uses.
450 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
451 if (DefI != VRegDefs.end() && DefI->SU != SU)
452 DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
515453 }
516454
517455 /// Return true if MI is an instruction we are unable to reason about
794732 }
795733 }
796734
797 void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
798 const MachineInstr *MI = SU->getInstr();
799 for (const MachineOperand &MO : MI->operands()) {
800 if (!MO.isReg())
801 continue;
802 if (!MO.isUse() && (MO.getSubReg() == 0 || !TrackLaneMasks))
803 continue;
804
805 unsigned Reg = MO.getReg();
806 if (!TargetRegisterInfo::isVirtualRegister(Reg))
807 continue;
808
809 // Record this local VReg use.
810 VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
811 for (; UI != VRegUses.end(); ++UI) {
812 if (UI->SU == SU)
813 break;
814 }
815 if (UI == VRegUses.end())
816 VRegUses.insert(VReg2SUnit(Reg, 0, SU));
817 }
818 }
819
820735 /// If RegPressure is non-null, compute register pressure as a side effect. The
821736 /// DAG builder is an efficient place to do it because it already visits
822737 /// operands.
823738 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
824739 RegPressureTracker *RPTracker,
825 PressureDiffs *PDiffs,
826 bool TrackLaneMasks) {
740 PressureDiffs *PDiffs) {
827741 const TargetSubtargetInfo &ST = MF.getSubtarget();
828742 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
829743 : ST.useAA();
830744 AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
831745
832 this->TrackLaneMasks = TrackLaneMasks;
833746 MISUnitMap.clear();
834747 ScheduleDAG::clearDAG();
835748
863776 Defs.setUniverse(TRI->getNumRegs());
864777 Uses.setUniverse(TRI->getNumRegs());
865778
866 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
867 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
868 unsigned NumVirtRegs = MRI.getNumVirtRegs();
869 CurrentVRegDefs.setUniverse(NumVirtRegs);
870 CurrentVRegUses.setUniverse(NumVirtRegs);
871
779 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
872780 VRegUses.clear();
873 VRegUses.setUniverse(NumVirtRegs);
781 VRegDefs.setUniverse(MRI.getNumVirtRegs());
782 VRegUses.setUniverse(MRI.getNumVirtRegs());
874783
875784 // Model data dependencies between instructions being scheduled and the
876785 // ExitSU.
898807 RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
899808 assert(RPTracker->getPos() == std::prev(MII) &&
900809 "RPTracker can't find MI");
901 collectVRegUses(SU);
902810 }
903811
904812 assert(
11481056
11491057 Defs.clear();
11501058 Uses.clear();
1151 CurrentVRegDefs.clear();
1152 CurrentVRegUses.clear();
1059 VRegDefs.clear();
11531060 PendingLoads.clear();
11541061 }
11551062
55
66 ; FUNC-LABEL: {{^}}width_2d:
77 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
8 ; EG: MOV * [[VAL]], KC0[2].Z
8 ; EG: MOV [[VAL]], KC0[2].Z
99 define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
1010 i32 addrspace(1)* %out) {
1111 entry:
1818
1919 ; FUNC-LABEL: {{^}}width_3d:
2020 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
21 ; EG: MOV * [[VAL]], KC0[2].Z
21 ; EG: MOV [[VAL]], KC0[2].Z
2222 define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
2323 i32 addrspace(1)* %out) {
2424 entry:
3535
3636 ; FUNC-LABEL: {{^}}height_2d:
3737 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
38 ; EG: MOV * [[VAL]], KC0[2].W
38 ; EG: MOV [[VAL]], KC0[2].W
3939 define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
4040 i32 addrspace(1)* %out) {
4141 entry:
4848
4949 ; FUNC-LABEL: {{^}}height_3d:
5050 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
51 ; EG: MOV * [[VAL]], KC0[2].W
51 ; EG: MOV [[VAL]], KC0[2].W
5252 define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
5353 i32 addrspace(1)* %out) {
5454 entry:
6565
6666 ; FUNC-LABEL: {{^}}depth_3d:
6767 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
68 ; EG: MOV * [[VAL]], KC0[3].X
68 ; EG: MOV [[VAL]], KC0[3].X
6969 define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
7070 i32 addrspace(1)* %out) {
7171 entry:
8282
8383 ; FUNC-LABEL: {{^}}data_type_2d:
8484 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
85 ; EG: MOV * [[VAL]], KC0[3].Y
85 ; EG: MOV [[VAL]], KC0[3].Y
8686 define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
8787 i32 addrspace(1)* %out) {
8888 entry:
9595
9696 ; FUNC-LABEL: {{^}}data_type_3d:
9797 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
98 ; EG: MOV * [[VAL]], KC0[3].Y
98 ; EG: MOV [[VAL]], KC0[3].Y
9999 define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
100100 i32 addrspace(1)* %out) {
101101 entry:
112112
113113 ; FUNC-LABEL: {{^}}channel_order_2d:
114114 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
115 ; EG: MOV * [[VAL]], KC0[3].Z
115 ; EG: MOV [[VAL]], KC0[3].Z
116116 define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
117117 i32 addrspace(1)* %out) {
118118 entry:
125125
126126 ; FUNC-LABEL: {{^}}channel_order_3d:
127127 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
128 ; EG: MOV * [[VAL]], KC0[3].Z
128 ; EG: MOV [[VAL]], KC0[3].Z
129129 define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
130130 i32 addrspace(1)* %out) {
131131 entry:
144144 ;
145145 ; FUNC-LABEL: {{^}}image_arg_2nd:
146146 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
147 ; EG: MOV * [[VAL]], KC0[4].Z
147 ; EG: MOV [[VAL]], KC0[4].Z
148148 define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
149149 i32 %x,
150150 %opencl.image2d_t addrspace(1)* %in2,
66 ; ADD_INT literal.x KC0[2].Z, 5
77
88 ; CHECK: {{^}}i32_literal:
9 ; CHECK: LSHR
10 ; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
9 ; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
10 ; CHECK-NEXT: LSHR
1111 ; CHECK-NEXT: 5
1212 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
1313 entry:
2323 ; ADD literal.x KC0[2].Z, 5.0
2424
2525 ; CHECK: {{^}}float_literal:
26 ; CHECK: LSHR
27 ; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
26 ; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
27 ; CHECK-NEXT: LSHR
2828 ; CHECK-NEXT: 1084227584(5.0
2929 define void @float_literal(float addrspace(1)* %out, float %in) {
3030 entry:
33
44 ; FUNC-LABEL: {{^}}read_workdim:
55 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
6 ; EG: MOV * [[VAL]], KC0[2].Z
6 ; EG: MOV [[VAL]], KC0[2].Z
77
88 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
99 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
22 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
33
44 ; R600: {{^}}amdgpu_trunc:
5 ; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
5 ; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
66 ; SI: {{^}}amdgpu_trunc:
77 ; SI: v_trunc_f32
88
44
55 ; FUNC-LABEL: {{^}}local_size_x:
66 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
7 ; EG: MOV * [[VAL]], KC0[1].Z
7 ; EG: MOV [[VAL]], KC0[1].Z
88
99 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
1010 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
2222
2323 ; FUNC-LABEL: {{^}}local_size_y:
2424 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
25 ; EG: MOV * [[VAL]], KC0[1].W
25 ; EG: MOV [[VAL]], KC0[1].W
2626
2727 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
2828 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
3737
3838 ; FUNC-LABEL: {{^}}local_size_z:
3939 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
40 ; EG: MOV * [[VAL]], KC0[2].X
40 ; EG: MOV [[VAL]], KC0[2].X
4141
4242 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
4343 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
152152 }
153153
154154 ; FUNC-LABEL: {{^}}or_i1:
155 ; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
155 ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
156156
157157 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
158158 define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
44 ; SET*DX10 instructions.
55
66 ; CHECK: {{^}}fcmp_une_select_fptosi:
7 ; CHECK: LSHR
8 ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
7 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
8 ; CHECK-NEXT: LSHR
99 ; CHECK-NEXT: 1084227584(5.000000e+00)
1010 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
1111 entry:
1818 }
1919
2020 ; CHECK: {{^}}fcmp_une_select_i32:
21 ; CHECK: LSHR
22 ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
21 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
22 ; CHECK-NEXT: LSHR
2323 ; CHECK-NEXT: 1084227584(5.000000e+00)
2424 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
2525 entry:
3030 }
3131
3232 ; CHECK: {{^}}fcmp_oeq_select_fptosi:
33 ; CHECK: LSHR
34 ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
33 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
34 ; CHECK-NEXT: LSHR
3535 ; CHECK-NEXT: 1084227584(5.000000e+00)
3636 define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
3737 entry:
4444 }
4545
4646 ; CHECK: {{^}}fcmp_oeq_select_i32:
47 ; CHECK: LSHR
48 ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
47 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
48 ; CHECK-NEXT: LSHR
4949 ; CHECK-NEXT: 1084227584(5.000000e+00)
5050 define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
5151 entry:
5656 }
5757
5858 ; CHECK: {{^}}fcmp_ogt_select_fptosi:
59 ; CHECK: LSHR
60 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
59 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
60 ; CHECK-NEXT: LSHR
6161 ; CHECK-NEXT: 1084227584(5.000000e+00)
6262 define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
6363 entry:
7070 }
7171
7272 ; CHECK: {{^}}fcmp_ogt_select_i32:
73 ; CHECK: LSHR
74 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
73 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
74 ; CHECK-NEXT: LSHR
7575 ; CHECK-NEXT: 1084227584(5.000000e+00)
7676 define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
7777 entry:
8282 }
8383
8484 ; CHECK: {{^}}fcmp_oge_select_fptosi:
85 ; CHECK: LSHR
86 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
85 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
86 ; CHECK-NEXT: LSHR
8787 ; CHECK-NEXT: 1084227584(5.000000e+00)
8888 define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
8989 entry:
9696 }
9797
9898 ; CHECK: {{^}}fcmp_oge_select_i32:
99 ; CHECK: LSHR
100 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
99 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
100 ; CHECK-NEXT: LSHR
101101 ; CHECK-NEXT: 1084227584(5.000000e+00)
102102 define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
103103 entry:
108108 }
109109
110110 ; CHECK: {{^}}fcmp_ole_select_fptosi:
111 ; CHECK: LSHR
112 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
111 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
112 ; CHECK-NEXT: LSHR
113113 ; CHECK-NEXT: 1084227584(5.000000e+00)
114114 define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
115115 entry:
122122 }
123123
124124 ; CHECK: {{^}}fcmp_ole_select_i32:
125 ; CHECK: LSHR
126 ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
125 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
126 ; CHECK-NEXT: LSHR
127127 ; CHECK-NEXT: 1084227584(5.000000e+00)
128128 define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
129129 entry:
134134 }
135135
136136 ; CHECK: {{^}}fcmp_olt_select_fptosi:
137 ; CHECK: LSHR
138 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
137 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
138 ; CHECK-NEXT: LSHR
139139 ; CHECK-NEXT: 1084227584(5.000000e+00)
140140 define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
141141 entry:
148148 }
149149
150150 ; CHECK: {{^}}fcmp_olt_select_i32:
151 ; CHECK: LSHR
152 ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
151 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
152 ; CHECK-NEXT: LSHR
153153 ; CHECK-NEXT: 1084227584(5.000000e+00)
154154 define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
155155 entry:
1111 ; SI: buffer_store_dword [[EXTRACT]],
1212
1313 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
14 ; EG: LSHR * [[ADDR]]
15 ; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
14 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
15 ; EG-NEXT: LSHR * [[ADDR]]
1616 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
1717 %shl = shl i32 %in, 31
1818 %sext = ashr i32 %shl, 31
5252 ret void
5353 }
5454
55 ;EG-LABEL: {{^}}shl_i64:
55 ;EG: {{^}}shl_i64:
5656 ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
5757 ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
58 ;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
59 ;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
58 ;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
59 ;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
6060 ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
61 ;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
62 ;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}}
61 ;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
62 ;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
6363 ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
6464 ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
6565 ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
7979 ret void
8080 }
8181
82 ;EG-LABEL: {{^}}shl_v2i64:
82 ;EG: {{^}}shl_v2i64:
8383 ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
8484 ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
8585 ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
6969 ;EG-LABEL: {{^}}ashr_i64_2:
7070 ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
7171 ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
72 ;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
73 ;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
72 ;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
73 ;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
7474 ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
75 ;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
76 ;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}}
75 ;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
76 ;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
7777 ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
7878 ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
7979 ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
6464
6565 ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
6666 ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
67 ; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
6768 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
68 ; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
6969 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
70 ; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
71 ; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}}
70 ; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
71 ; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
72 ; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
7273 ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
73 ; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
74 ; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
74 ; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
7575 ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
7676 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
7777 %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
22 ; These tests are for condition codes that are not supported by the hardware
33
44 ; CHECK-LABEL: {{^}}slt:
5 ; CHECK: LSHR
6 ; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
5 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
6 ; CHECK-NEXT: LSHR
77 ; CHECK-NEXT: 5(7.006492e-45)
88 define void @slt(i32 addrspace(1)* %out, i32 %in) {
99 entry:
1414 }
1515
1616 ; CHECK-LABEL: {{^}}ult_i32:
17 ; CHECK: LSHR
18 ; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
17 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
18 ; CHECK-NEXT: LSHR
1919 ; CHECK-NEXT: 5(7.006492e-45)
2020 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
2121 entry:
3939 }
4040
4141 ; CHECK-LABEL: {{^}}ult_float_native:
42 ; CHECK: LSHR
43 ; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
42 ; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
43 ; CHECK-NEXT: LSHR *
4444 ; CHECK-NEXT: 1084227584(5.000000e+00)
4545 define void @ult_float_native(float addrspace(1)* %out, float %in) {
4646 entry:
5151 }
5252
5353 ; CHECK-LABEL: {{^}}olt:
54 ; CHECK: LSHR
55 ; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
54 ; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
55 ; CHECK-NEXT: LSHR *
5656 ; CHECK-NEXT: 1084227584(5.000000e+00)
5757 define void @olt(float addrspace(1)* %out, float %in) {
5858 entry:
6363 }
6464
6565 ; CHECK-LABEL: {{^}}sle:
66 ; CHECK: LSHR
67 ; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
66 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
67 ; CHECK-NEXT: LSHR
6868 ; CHECK-NEXT: 6(8.407791e-45)
6969 define void @sle(i32 addrspace(1)* %out, i32 %in) {
7070 entry:
7575 }
7676
7777 ; CHECK-LABEL: {{^}}ule_i32:
78 ; CHECK: LSHR
79 ; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
78 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
79 ; CHECK-NEXT: LSHR
8080 ; CHECK-NEXT: 6(8.407791e-45)
8181 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
8282 entry:
100100 }
101101
102102 ; CHECK-LABEL: {{^}}ule_float_native:
103 ; CHECK: LSHR
104 ; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
103 ; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
104 ; CHECK-NEXT: LSHR *
105105 ; CHECK-NEXT: 1084227584(5.000000e+00)
106106 define void @ule_float_native(float addrspace(1)* %out, float %in) {
107107 entry:
112112 }
113113
114114 ; CHECK-LABEL: {{^}}ole:
115 ; CHECK: LSHR
116 ; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
115 ; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
116 ; CHECK-NEXT: LSHR *
117117 ; CHECK-NEXT:1084227584(5.000000e+00)
118118 define void @ole(float addrspace(1)* %out, float %in) {
119119 entry:
66
77 ; FUNC-LABEL: {{^}}ngroups_x:
88 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
9 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
9 ; EG: MOV [[VAL]], KC0[0].X
1010
1111 ; HSA: .amd_kernel_code_t
1212
3737
3838 ; FUNC-LABEL: {{^}}ngroups_y:
3939 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
40 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
40 ; EG: MOV [[VAL]], KC0[0].Y
4141
4242 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
4343 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
5252
5353 ; FUNC-LABEL: {{^}}ngroups_z:
5454 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
55 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
55 ; EG: MOV [[VAL]], KC0[0].Z
5656
5757 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
5858 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
6767
6868 ; FUNC-LABEL: {{^}}global_size_x:
6969 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
70 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
70 ; EG: MOV [[VAL]], KC0[0].W
7171
7272 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
7373 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
8282
8383 ; FUNC-LABEL: {{^}}global_size_y:
8484 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
85 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
85 ; EG: MOV [[VAL]], KC0[1].X
8686
8787 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
8888 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
9797
9898 ; FUNC-LABEL: {{^}}global_size_z:
9999 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
100 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
100 ; EG: MOV [[VAL]], KC0[1].Y
101101
102102 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
103103 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
3737 }
3838
3939 ; FUNC-LABEL: {{^}}xor_i1:
40 ; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}}
40 ; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
4141
4242 ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
4343 ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}