llvm.org GIT mirror llvm / fd11bc0
[CodeGen] Use MachineOperand::print in the MIRPrinter for MO_Register. Work towards the unification of MIR and debug output by refactoring the interfaces. For MachineOperand::print, keep a simple version that can be easily called from `dump()`, and a more complex one which will be called from both the MIRPrinter and MachineInstr::print. Add extra checks inside MachineOperand for detached operands (operands with getParent() == nullptr). https://reviews.llvm.org/D40836 * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: ([^ ]+) ([^ ]+)<def> ([^ ]+)/kill: \1 def \2 \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: ([^ ]+) ([^ ]+) ([^ ]+)<def>/kill: \1 \2 def \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/kill: def ([^ ]+) ([^ ]+) ([^ ]+)<def>/kill: def \1 \2 def \3/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/<def>//g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<kill>/killed \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-use,kill>/implicit killed \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<dead>/dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<def[ ]*,[ ]*dead>/dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-def[ ]*,[ ]*dead>/implicit-def dead \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-def>/implicit-def \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<imp-use>/implicit \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<internal>/internal \1/g' * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" -o -name "*.s" \) -type f -print0 | xargs -0 sed -i '' -E 's/([^ ]+)<undef>/undef \1/g' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@320022 91177308-0d34-0410-b5e6-96231b3b80d8 Francis Visoiu Mistrih 2 years ago
331 changed file(s) with 4207 addition(s) and 4122 deletion(s). Raw diff Collapse all Expand all
203203 const MDNode *Variable,
204204 const MDNode *Expr);
205205
206 /// Build and insert \p Res = G_FRAME_INDEX \p Idx
206 /// Build and insert \p Res = G_FRAME_INDEX \p Idx
207207 ///
208208 /// G_FRAME_INDEX materializes the address of an alloca value or other
209209 /// stack-based object.
214214 /// \return a MachineInstrBuilder for the newly created instruction.
215215 MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx);
216216
217 /// Build and insert \p Res = G_GLOBAL_VALUE \p GV
217 /// Build and insert \p Res = G_GLOBAL_VALUE \p GV
218218 ///
219219 /// G_GLOBAL_VALUE materializes the address of the specified global
220220 /// into \p Res.
226226 /// \return a MachineInstrBuilder for the newly created instruction.
227227 MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
228228
229 /// Build and insert \p Res = G_ADD \p Op0, \p Op1
229 /// Build and insert \p Res = G_ADD \p Op0, \p Op1
230230 ///
231231 /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
232232 /// truncated to their width.
244244 return buildAdd(Res, (getRegFromArg(UseArgs))...);
245245 }
246246
247 /// Build and insert \p Res = G_SUB \p Op0, \p Op1
247 /// Build and insert \p Res = G_SUB \p Op0, \p Op1
248248 ///
249249 /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
250250 /// truncated to their width.
257257 MachineInstrBuilder buildSub(unsigned Res, unsigned Op0,
258258 unsigned Op1);
259259
260 /// Build and insert \p Res = G_MUL \p Op0, \p Op1
260 /// Build and insert \p Res = G_MUL \p Op0, \p Op1
261261 ///
262262 /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
263263 /// truncated to their width.
270270 MachineInstrBuilder buildMul(unsigned Res, unsigned Op0,
271271 unsigned Op1);
272272
273 /// Build and insert \p Res = G_GEP \p Op0, \p Op1
273 /// Build and insert \p Res = G_GEP \p Op0, \p Op1
274274 ///
275275 /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0,
276276 /// storing the resulting pointer in \p Res.
284284 MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
285285 unsigned Op1);
286286
287 /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
287 /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value)
288288 ///
289289 /// G_GEP adds \p Value bytes to the pointer specified by \p Op0,
290290 /// storing the resulting pointer in \p Res. If \p Value is zero then no
304304 const LLT &ValueTy,
305305 uint64_t Value);
306306
307 /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits
307 /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits
308308 ///
309309 /// G_PTR_MASK clears the low bits of a pointer operand without destroying its
310310 /// pointer properties. This has the effect of rounding the address *down* to
320320 MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
321321 uint32_t NumBits);
322322
323 /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
323 /// Build and insert \p Res, \p CarryOut = G_UADDE \p Op0,
324324 /// \p Op1, \p CarryIn
325325 ///
326326 /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit
337337 MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
338338 unsigned Op1, unsigned CarryIn);
339339
340 /// Build and insert \p Res = G_AND \p Op0, \p Op1
340 /// Build and insert \p Res = G_AND \p Op0, \p Op1
341341 ///
342342 /// G_AND sets \p Res to the bitwise and of integer parameters \p Op0 and \p
343343 /// Op1.
354354 MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0,
355355 unsigned Op1);
356356
357 /// Build and insert \p Res = G_OR \p Op0, \p Op1
357 /// Build and insert \p Res = G_OR \p Op0, \p Op1
358358 ///
359359 /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p
360360 /// Op1.
366366 /// \return a MachineInstrBuilder for the newly created instruction.
367367 MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1);
368368
369 /// Build and insert \p Res = G_ANYEXT \p Op0
369 /// Build and insert \p Res = G_ANYEXT \p Op0
370370 ///
371371 /// G_ANYEXT produces a register of the specified width, with bits 0 to
372372 /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are unspecified
386386 return buildAnyExt(getDestFromArg(Res), getRegFromArg(Arg));
387387 }
388388
389 /// Build and insert \p Res = G_SEXT \p Op
389 /// Build and insert \p Res = G_SEXT \p Op
390390 ///
391391 /// G_SEXT produces a register of the specified width, with bits 0 to
392392 /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are duplicated from the
400400 /// \return The newly created instruction.
401401 MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
402402
403 /// Build and insert \p Res = G_ZEXT \p Op
403 /// Build and insert \p Res = G_ZEXT \p Op
404404 ///
405405 /// G_ZEXT produces a register of the specified width, with bits 0 to
406406 /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are 0. For a vector
414414 /// \return The newly created instruction.
415415 MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
416416
417 /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
417 /// Build and insert \p Res = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
418418 /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
419419 /// ///
420420 /// \pre setBasicBlock or setMI must have been called.
424424 /// \return The newly created instruction.
425425 MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
426426
427 /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
427 /// Build and insert \p Res = G_ZEXT \p Op, \p Res = G_TRUNC \p Op, or
428428 /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
429429 /// ///
430430 /// \pre setBasicBlock or setMI must have been called.
434434 /// \return The newly created instruction.
435435 MachineInstrBuilder buildZExtOrTrunc(unsigned Res, unsigned Op);
436436
437 // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
437 // Build and insert \p Res = G_ANYEXT \p Op, \p Res = G_TRUNC \p Op, or
438438 /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
439439 /// ///
440440 /// \pre setBasicBlock or setMI must have been called.
448448 }
449449 MachineInstrBuilder buildAnyExtOrTrunc(unsigned Res, unsigned Op);
450450
451 /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p
451 /// Build and insert \p Res = \p ExtOpc, \p Res = G_TRUNC \p
452452 /// Op, or \p Res = COPY \p Op depending on the differing sizes of \p Res and
453453 /// \p Op.
454454 /// ///
533533 /// \return The newly created instruction.
534534 MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
535535
536 /// Build and insert \p Res = COPY Op
536 /// Build and insert \p Res = COPY Op
537537 ///
538538 /// Register-to-register COPY sets \p Res to \p Op.
539539 ///
546546 return buildCopy(getDestFromArg(Res), getRegFromArg(Src));
547547 }
548548
549 /// Build and insert `Res = G_LOAD Addr, MMO`.
549 /// Build and insert `Res = G_LOAD Addr, MMO`.
550550 ///
551551 /// Loads the value stored at \p Addr. Puts the result in \p Res.
552552 ///
570570 MachineInstrBuilder buildStore(unsigned Val, unsigned Addr,
571571 MachineMemOperand &MMO);
572572
573 /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
573 /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`.
574574 ///
575575 /// \pre setBasicBlock or setMI must have been called.
576576 /// \pre \p Res and \p Src must be generic virtual registers.
597597 void buildSequence(unsigned Res, ArrayRef Ops,
598598 ArrayRef Indices);
599599
600 /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
600 /// Build and insert \p Res = G_MERGE_VALUES \p Op0, ...
601601 ///
602602 /// G_MERGE_VALUES combines the input elements contiguously into a larger
603603 /// register.
610610 /// \return a MachineInstrBuilder for the newly created instruction.
611611 MachineInstrBuilder buildMerge(unsigned Res, ArrayRef Ops);
612612
613 /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
613 /// Build and insert \p Res0, ... = G_UNMERGE_VALUES \p Op
614614 ///
615615 /// G_UNMERGE_VALUES splits contiguous bits of the input into multiple
616616 ///
638638 MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
639639 bool HasSideEffects);
640640
641 /// Build and insert \p Res = G_FPTRUNC \p Op
641 /// Build and insert \p Res = G_FPTRUNC \p Op
642642 ///
643643 /// G_FPTRUNC converts a floating-point value into one with a smaller type.
644644 ///
650650 /// \return The newly created instruction.
651651 MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
652652
653 /// Build and insert \p Res = G_TRUNC \p Op
653 /// Build and insert \p Res = G_TRUNC \p Op
654654 ///
655655 /// G_TRUNC extracts the low bits of a type. For a vector type each element is
656656 /// truncated independently before being packed into the destination.
710710 MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst,
711711 unsigned Op0, unsigned Op1);
712712
713 /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
713 /// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val,
714714 /// \p Elt, \p Idx
715715 ///
716716 /// \pre setBasicBlock or setMI must have been called.
723723 MachineInstrBuilder buildInsertVectorElement(unsigned Res, unsigned Val,
724724 unsigned Elt, unsigned Idx);
725725
726 /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
726 /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
727727 ///
728728 /// \pre setBasicBlock or setMI must have been called.
729729 /// \pre \p Res must be a generic virtual register with scalar type.
734734 MachineInstrBuilder buildExtractVectorElement(unsigned Res, unsigned Val,
735735 unsigned Idx);
736736
737 /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
737 /// Build and insert `OldValRes = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal,
738738 /// MMO`.
739739 ///
740740 /// Atomically replace the value at \p Addr with \p NewVal if it is currently
1919 /// register.
2020 ///
2121 /// X86 Example:
22 /// %ymm0 = ...
23 /// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0)
22 /// %ymm0 = ...
23 /// %xmm0 = ... (Kills %xmm0, all %xmm0s sub-registers, and %ymm0)
2424 ///
25 /// %ymm0 = ...
26 /// %xmm0 = ..., %ymm0 (%ymm0 and all its sub-registers are alive)
25 /// %ymm0 = ...
26 /// %xmm0 = ..., implicit %ymm0 (%ymm0 and all its sub-registers are alive)
2727 //===----------------------------------------------------------------------===//
2828
2929 #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H
701701 LQR_Unknown ///< Register liveness not decidable from local neighborhood.
702702 };
703703
704 /// Return whether (physical) register \p Reg has been ined and not
705 /// ed as of just before \p Before.
704 /// Return whether (physical) register \p Reg has been defined and not
705 /// killed as of just before \p Before.
706706 ///
707707 /// Search is localised to a neighborhood of \p Neighborhood instructions
708708 /// before (searching for defs or kills) and \p Neighborhood instructions
4343 class ModuleSlotTracker;
4444 class raw_ostream;
4545 template class SmallVectorImpl;
46 class SmallBitVector;
4647 class StringRef;
4748 class TargetInstrInfo;
4849 class TargetRegisterClass;
12191220
12201221 /// Debugging support
12211222 /// @{
1223 /// Determine the generic type to be printed (if needed) on uses and defs.
1224 LLT getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
1225 const MachineRegisterInfo &MRI) const;
1226
1227 /// Return true when an instruction has tied register that can't be determined
1228 /// by the instruction's descriptor. This is useful for MIR printing, to
1229 /// determine whether we need to print the ties or not.
1230 bool hasComplexRegisterTies() const;
1231
12221232 /// Print this MI to \p OS.
12231233 /// Only print the defs and the opcode if \p SkipOpers is true.
12241234 /// Otherwise, also print operands if \p SkipDebugLoc is true.
149149 ///
150150 struct VirtRegInfo {
151151 /// Reads - One of the operands read the virtual register. This does not
152 /// include or use operands, see MO::readsReg().
152 /// include undef or internal use operands, see MO::readsReg().
153153 bool Reads;
154154
155155 /// Writes - One of the operands writes the virtual register.
1616 #include "llvm/ADT/DenseMap.h"
1717 #include "llvm/IR/Intrinsics.h"
1818 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/LowLevelTypeImpl.h"
1920 #include
2021
2122 namespace llvm {
115116 /// the same register. In that case, the instruction may depend on those
116117 /// operands reading the same dont-care value. For example:
117118 ///
118 /// %1 = XOR %2, %2
119 /// %1 = XOR undef %2, undef %2
119120 ///
120121 /// Any register can be used for %2, and its value doesn't matter, but
121122 /// the two operands must be the same register.
225226 ///
226227 void clearParent() { ParentMI = nullptr; }
227228
229 /// Print the MachineOperand to \p os.
230 /// Providing a valid \p TRI and \p IntrinsicInfo results in a more
231 /// target-specific printing. If \p TRI and \p IntrinsicInfo are null, the
232 /// function will try to pick it up from the parent.
228233 void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr,
229234 const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
230 void print(raw_ostream &os, ModuleSlotTracker &MST,
231 const TargetRegisterInfo *TRI = nullptr,
232 const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
235
236 /// More complex way of printing a MachineOperand.
237 /// \param TypeToPrint specifies the generic type to be printed on uses and
238 /// defs. It can be determined using MachineInstr::getTypeToPrint.
239 /// \param PrintDef - whether we want to print `def` on an operand which
240 /// isDef. Sometimes, if the operand is printed before '=', we don't print
241 /// `def`.
242 /// \param ShouldPrintRegisterTies - whether we want to print register ties.
243 /// Sometimes they are easily determined by the instruction's descriptor
244 /// (MachineInstr::hasComplexRegiterTies can determine if it's needed).
245 /// \param TiedOperandIdx - if we need to print register ties this needs to
246 /// provide the index of the tied register. If not, it will be ignored.
247 /// \param TRI - provide more target-specific information to the printer.
248 /// Unlike the previous function, this one will not try and get the
249 /// information from it's parent.
250 /// \param IntrinsicInfo - same as \p TRI.
251 void print(raw_ostream &os, ModuleSlotTracker &MST, LLT TypeToPrint,
252 bool PrintDef, bool ShouldPrintRegisterTies,
253 unsigned TiedOperandIdx, const TargetRegisterInfo *TRI,
254 const TargetIntrinsicInfo *IntrinsicInfo) const;
255
233256 void dump() const;
234257
235258 //===--------------------------------------------------------------------===//
830853 };
831854
832855 inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand &MO) {
833 MO.print(OS, nullptr);
856 MO.print(OS);
834857 return OS;
835858 }
836859
546546 /// Represents a predicate at the MachineFunction level. The control flow a
547547 /// MachineBranchPredicate represents is:
548548 ///
549 /// Reg = LHS `Predicate` RHS == ConditionDef
549 /// Reg = LHS `Predicate` RHS == ConditionDef
550550 /// if Reg then goto TrueDest else goto FalseDest
551551 ///
552552 struct MachineBranchPredicate {
14311431 /// For example, AVX instructions may copy part of a register operand into
14321432 /// the unused high bits of the destination register.
14331433 ///
1434 /// vcvtsi2sdq %rax, %xmm0, %xmm14
1434 /// vcvtsi2sdq %rax, undef %xmm0, %xmm14
14351435 ///
14361436 /// In the code above, vcvtsi2sdq copies %xmm0[127:64] into %xmm14 creating a
14371437 /// false dependence on any previous write to %xmm0.
11661166 /// registers on a \ref raw_ostream.
11671167 Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
11681168
1169 /// \brief Create Printable object to print register classes or register banks
1170 /// on a \ref raw_ostream.
1171 Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
1172 const TargetRegisterInfo *TRI);
1173
11691174 } // end namespace llvm
11701175
11711176 #endif // LLVM_CODEGEN_TARGETREGISTERINFO_H
447447 // FIXME: The issue with predicated instruction is more complex. We are being
448448 // conservatively here because the kill markers cannot be trusted after
449449 // if-conversion:
450 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
450 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
451451 // ...
452 // STR %r0, %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
453 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
454 // STR %r0, %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
452 // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
453 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
454 // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
455455 //
456456 // The first R6 kill is not really a kill since it's killed by a predicated
457457 // instruction which may not be executed. The second R6 def may or may not
814814 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
815815 const MachineOperand &Op = MI->getOperand(i);
816816 assert(Op.isReg() && "KILL instruction must have only register operands");
817 OS << ' '
818 << printReg(Op.getReg(),
819 AP.MF->getSubtarget().getRegisterInfo())
820 << (Op.isDef() ? "" : "");
817 OS << ' ' << (Op.isDef() ? "def " : "killed ")
818 << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
821819 }
822820 AP.OutStreamer->AddComment(OS.str());
823821 AP.OutStreamer->AddBlankLine();
19671967 //
19681968 // BB2:
19691969 // r1 = op2, ...
1970 // = op3, r1
1970 // = op3, killed r1
19711971 IsSafe = false;
19721972 break;
19731973 }
169169 // FIXME: The issue with predicated instruction is more complex. We are being
170170 // conservative here because the kill markers cannot be trusted after
171171 // if-conversion:
172 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
172 // %r6 = LDR %sp, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
173173 // ...
174 // STR %r0, %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
175 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
176 // STR %r0, %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
174 // STR %r0, killed %r6, %reg0, 0, pred:0, pred:%cpsr; mem:ST4[%395]
175 // %r6 = LDR %sp, %reg0, 100, pred:0, pred:%cpsr; mem:LD4[FixedStack12]
176 // STR %r0, killed %r6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
177177 //
178178 // The first R6 kill is not really a kill since it's killed by a predicated
179179 // instruction which may not be executed. The second R6 def may or may not
103103 if (DstSubReg == InsReg) {
104104 // No need to insert an identity copy instruction.
105105 // Watch out for case like this:
106 // %rax = SUBREG_TO_REG 0, %eax, 3
106 // %rax = SUBREG_TO_REG 0, killed %eax, 3
107107 // We must leave %rax live.
108108 if (DstReg != InsReg) {
109109 MI->setDesc(TII->get(TargetOpcode::KILL));
420420 // test %rcx, %rcx
421421 // je _null_block
422422 // _non_null_block:
423 // %rdx = INST
423 // %rdx = INST
424424 // ...
425425 //
426426 // This restriction does not apply to the faulting load inst because in
359359 ///
360360 /// x = def
361361 /// spill x
362 /// y = use x
362 /// y = use killed x
363363 ///
364364 /// This hoist only helps when the copy kills its source.
365365 ///
699699 //
700700 // %eax = COPY %5
701701 // FOO %5 <--- MI, cancel kill because %eax is live.
702 // BAR %eax
702 // BAR killed %eax
703703 //
704704 // There should be no kill flag on FOO when %5 is rewritten as %eax.
705705 for (auto &RUP : RU) {
720720 // Example:
721721 // %1 = ... ; R32: %1
722722 // %2:high16 = ... ; R64: %2
723 // = read %2 ; R64: %2
723 // = read killed %2 ; R64: %2
724724 // = read %1 ; R32: %1
725725 // The flag is correct for %2, but the register allocator may
726726 // assign R0L to %1, and R0 to %2 because the low 32bits of R0
234234 // Otherwise, the last sub-register def implicitly defines this register.
235235 // e.g.
236236 // AH =
237 // AL = ... ,
237 // AL = ... implicit-def EAX, implicit killed AH
238238 // = AH
239239 // ...
240240 // = EAX
320320 // AH =
321321 //
322322 // = AX
323 // = AL, AX
323 // = AL, implicit killed AX
324324 // AX =
325325 //
326326 // Or whole register is defined, but not used at all.
327 // AX =
327 // dead AX =
328328 // ...
329329 // AX =
330330 //
331331 // Or whole register is defined, but only partly used.
332 // AX = AL
333 // = AL
332 // dead AX = implicit-def AL
333 // = killed AL
334334 // AX =
335335 MachineInstr *LastPartDef = nullptr;
336336 unsigned LastPartDefDist = 0;
363363 if (!PhysRegUse[Reg]) {
364364 // Partial uses. Mark register def dead and add implicit def of
365365 // sub-registers which are used.
366 // EAX = op AL
366 // dead EAX = op implicit-def AL
367367 // That is, EAX def is dead but AL def extends pass it.
368368 PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
369369 for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
163163 void printTargetFlags(const MachineOperand &Op);
164164 void print(const MachineInstr &MI, unsigned OpIdx,
165165 const TargetRegisterInfo *TRI, bool ShouldPrintRegisterTies,
166 LLT TypeToPrint, bool IsDef = false);
166 LLT TypeToPrint, bool PrintDef = true);
167167 void print(const LLVMContext &Context, const TargetInstrInfo &TII,
168168 const MachineMemOperand &Op);
169169 void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID);
256256 OS << ')';
257257 }
258258
259 static void printRegClassOrBank(unsigned Reg, raw_ostream &OS,
260 const MachineRegisterInfo &RegInfo,
261 const TargetRegisterInfo *TRI) {
262 if (RegInfo.getRegClassOrNull(Reg))
263 OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
264 else if (RegInfo.getRegBankOrNull(Reg))
265 OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
266 else {
267 OS << "_";
268 assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
269 "Generic registers must have a valid type");
270 }
271 }
272
273259 static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest,
274260 const MachineRegisterInfo &RegInfo,
275261 const TargetRegisterInfo *TRI) {
276262 raw_string_ostream OS(Dest.Value);
277 printRegClassOrBank(Reg, OS, RegInfo, TRI);
263 OS << printRegClassOrBank(Reg, RegInfo, TRI);
278264 }
279265
280266
288274 unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
289275 yaml::VirtualRegisterDefinition VReg;
290276 VReg.ID = I;
291 printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
277 ::printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI);
292278 unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
293279 if (PreferredReg)
294280 printRegMIR(PreferredReg, VReg.PreferredRegister, TRI);
660646 OS.indent(2) << "}\n";
661647 }
662648
663 /// Return true when an instruction has tied register that can't be determined
664 /// by the instruction's descriptor.
665 static bool hasComplexRegisterTies(const MachineInstr &MI) {
666 const MCInstrDesc &MCID = MI.getDesc();
667 for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
668 const auto &Operand = MI.getOperand(I);
669 if (!Operand.isReg() || Operand.isDef())
670 // Ignore the defined registers as MCID marks only the uses as tied.
671 continue;
672 int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
673 int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
674 if (ExpectedTiedIdx != TiedIdx)
675 return true;
676 }
677 return false;
678 }
679
680 static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
681 SmallBitVector &PrintedTypes,
682 const MachineRegisterInfo &MRI) {
683 const MachineOperand &Op = MI.getOperand(OpIdx);
684 if (!Op.isReg())
685 return LLT{};
686
687 if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
688 return MRI.getType(Op.getReg());
689
690 auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
691 if (!OpInfo.isGenericType())
692 return MRI.getType(Op.getReg());
693
694 if (PrintedTypes[OpInfo.getGenericTypeIndex()])
695 return LLT{};
696
697 PrintedTypes.set(OpInfo.getGenericTypeIndex());
698 return MRI.getType(Op.getReg());
699 }
700
701649 void MIPrinter::print(const MachineInstr &MI) {
702650 const auto *MF = MI.getMF();
703651 const auto &MRI = MF->getRegInfo();
710658 assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
711659
712660 SmallBitVector PrintedTypes(8);
713 bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
661 bool ShouldPrintRegisterTies = MI.hasComplexRegisterTies();
714662 unsigned I = 0, E = MI.getNumOperands();
715663 for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
716664 !MI.getOperand(I).isImplicit();
718666 if (I)
719667 OS << ", ";
720668 print(MI, I, TRI, ShouldPrintRegisterTies,
721 getTypeToPrint(MI, I, PrintedTypes, MRI),
722 /*IsDef=*/true);
669 MI.getTypeToPrint(I, PrintedTypes, MRI),
670 /*PrintDef=*/false);
723671 }
724672
725673 if (I)
735683 if (NeedComma)
736684 OS << ", ";
737685 print(MI, I, TRI, ShouldPrintRegisterTies,
738 getTypeToPrint(MI, I, PrintedTypes, MRI));
686 MI.getTypeToPrint(I, PrintedTypes, MRI));
739687 NeedComma = true;
740688 }
741689
901849 void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
902850 const TargetRegisterInfo *TRI,
903851 bool ShouldPrintRegisterTies, LLT TypeToPrint,
904 bool IsDef) {
852 bool PrintDef) {
905853 const MachineOperand &Op = MI.getOperand(OpIdx);
906854 printTargetFlags(Op);
907855 switch (Op.getType()) {
908856 case MachineOperand::MO_Register: {
909 unsigned Reg = Op.getReg();
910 if (Op.isImplicit())
911 OS << (Op.isDef() ? "implicit-def " : "implicit ");
912 else if (!IsDef && Op.isDef())
913 // Print the 'def' flag only when the operand is defined after '='.
914 OS << "def ";
915 if (Op.isInternalRead())
916 OS << "internal ";
917 if (Op.isDead())
918 OS << "dead ";
919 if (Op.isKill())
920 OS << "killed ";
921 if (Op.isUndef())
922 OS << "undef ";
923 if (Op.isEarlyClobber())
924 OS << "early-clobber ";
925 if (Op.isDebug())
926 OS << "debug-use ";
927 OS << printReg(Reg, TRI);
928 // Print the sub register.
929 if (Op.getSubReg() != 0)
930 OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
931 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
932 const MachineRegisterInfo &MRI = Op.getParent()->getMF()->getRegInfo();
933 if (IsDef || MRI.def_empty(Reg)) {
934 OS << ':';
935 printRegClassOrBank(Reg, OS, MRI, TRI);
936 }
937 }
857 unsigned TiedOperandIdx = 0;
938858 if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
939 OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(OpIdx) << ")";
940 if (TypeToPrint.isValid())
941 OS << '(' << TypeToPrint << ')';
859 TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
860 const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
861 Op.print(OS, MST, TypeToPrint, PrintDef, ShouldPrintRegisterTies,
862 TiedOperandIdx, TRI, TII);
942863 break;
943864 }
944865 case MachineOperand::MO_Immediate:
622622 // Go through implicit defs of CSMI and MI, and clear the kill flags on
623623 // their uses in all the instructions between CSMI and MI.
624624 // We might have made some of the kill flags redundant, consider:
625 // subs ... %nzcv <- CSMI
626 // csinc ... %nzcv <- this kill flag isn't valid anymore
627 // subs ... %nzcv <- MI, to be eliminated
628 // csinc ... %nzcv
625 // subs ... implicit-def %nzcv <- CSMI
626 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
627 // subs ... implicit-def %nzcv <- MI, to be eliminated
628 // csinc ... implicit killed %nzcv
629629 // Since we eliminated MI, and reused a register imp-def'd by CSMI
630630 // (here %nzcv), that register, if it was killed before MI, should have
631631 // that kill flag removed, because it's lifetime was extended.
225225
226226 // The two copies cancel out and the source of the first copy
227227 // hasn't been overridden, eliminate the second one. e.g.
228 // %ecx = COPY %eax
228 // %ecx = COPY %eax
229229 // ... nothing clobbered eax.
230 // %eax = COPY %ecx
230 // %eax = COPY %ecx
231231 // =>
232 // %ecx = COPY %eax
232 // %ecx = COPY %eax
233233 //
234234 // or
235235 //
236 // %ecx = COPY %eax
236 // %ecx = COPY %eax
237237 // ... nothing clobbered eax.
238 // %ecx = COPY %eax
238 // %ecx = COPY %eax
239239 // =>
240 // %ecx = COPY %eax
240 // %ecx = COPY %eax
241241 if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
242242 continue;
243243
261261
262262 // If 'Def' is previously source of another copy, then this earlier copy's
263263 // source is no longer available. e.g.
264 // %xmm9 = copy %xmm2
264 // %xmm9 = copy %xmm2
265265 // ...
266 // %xmm2 = copy %xmm0
266 // %xmm2 = copy %xmm0
267267 // ...
268 // %xmm2 = copy %xmm9
268 // %xmm2 = copy %xmm9
269269 ClobberRegister(Def);
270270 for (const MachineOperand &MO : MI->implicit_operands()) {
271271 if (!MO.isReg() || !MO.isDef())
1717 #include "llvm/ADT/Hashing.h"
1818 #include "llvm/ADT/None.h"
1919 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallBitVector.h"
2021 #include "llvm/ADT/SmallString.h"
2122 #include "llvm/ADT/SmallVector.h"
2223 #include "llvm/Analysis/AliasAnalysis.h"
741742 if (MO.isUse())
742743 Use |= !MO.isUndef();
743744 else if (MO.getSubReg() && !MO.isUndef())
744 // A partial doesn't count as reading the register.
745 // A partial def undef doesn't count as reading the register.
745746 PartDef = true;
746747 else
747748 FullDef = true;
11621163 }
11631164 }
11641165
1166 bool MachineInstr::hasComplexRegisterTies() const {
1167 const MCInstrDesc &MCID = getDesc();
1168 for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
1169 const auto &Operand = getOperand(I);
1170 if (!Operand.isReg() || Operand.isDef())
1171 // Ignore the defined registers as MCID marks only the uses as tied.
1172 continue;
1173 int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
1174 int TiedIdx = Operand.isTied() ? int(findTiedOperandIdx(I)) : -1;
1175 if (ExpectedTiedIdx != TiedIdx)
1176 return true;
1177 }
1178 return false;
1179 }
1180
1181 LLT MachineInstr::getTypeToPrint(unsigned OpIdx, SmallBitVector &PrintedTypes,
1182 const MachineRegisterInfo &MRI) const {
1183 const MachineOperand &Op = getOperand(OpIdx);
1184 if (!Op.isReg())
1185 return LLT{};
1186
1187 if (isVariadic() || OpIdx >= getNumExplicitOperands())
1188 return MRI.getType(Op.getReg());
1189
1190 auto &OpInfo = getDesc().OpInfo[OpIdx];
1191 if (!OpInfo.isGenericType())
1192 return MRI.getType(Op.getReg());
1193
1194 if (PrintedTypes[OpInfo.getGenericTypeIndex()])
1195 return LLT{};
1196
1197 PrintedTypes.set(OpInfo.getGenericTypeIndex());
1198 return MRI.getType(Op.getReg());
1199 }
1200
11651201 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11661202 LLVM_DUMP_METHOD void MachineInstr::dump() const {
11671203 dbgs() << " ";
12031239 // Save a list of virtual registers.
12041240 SmallVector VirtRegs;
12051241
1242 SmallBitVector PrintedTypes(8);
1243 bool ShouldPrintRegisterTies = hasComplexRegisterTies();
1244 auto getTiedOperandIdx = [&](unsigned OpIdx) {
1245 if (!ShouldPrintRegisterTies)
1246 return 0U;
1247 const MachineOperand &MO = getOperand(OpIdx);
1248 if (MO.isReg() && MO.isTied() && !MO.isDef())
1249 return findTiedOperandIdx(OpIdx);
1250 return 0U;
1251 };
12061252 // Print explicitly defined operands on the left of an assignment syntax.
12071253 unsigned StartOp = 0, e = getNumOperands();
12081254 for (; StartOp < e && getOperand(StartOp).isReg() &&
1209 getOperand(StartOp).isDef() &&
1210 !getOperand(StartOp).isImplicit();
1255 getOperand(StartOp).isDef() && !getOperand(StartOp).isImplicit();
12111256 ++StartOp) {
1212 if (StartOp != 0) OS << ", ";
1213 getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
1257 if (StartOp != 0)
1258 OS << ", ";
1259 LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
1260 unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
1261 getOperand(StartOp).print(OS, MST, TypeToPrint, /*PrintDef=*/false,
1262 ShouldPrintRegisterTies, TiedOperandIdx, TRI,
1263 IntrinsicInfo);
12141264 unsigned Reg = getOperand(StartOp).getReg();
1215 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1265 if (TargetRegisterInfo::isVirtualRegister(Reg))
12161266 VirtRegs.push_back(Reg);
1217 LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
1218 if (Ty.isValid())
1219 OS << '(' << Ty << ')';
1220 }
12211267 }
12221268
12231269 if (StartOp != 0)
12401286 if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) {
12411287 // Print asm string.
12421288 OS << " ";
1243 getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI);
1289 const unsigned OpIdx = InlineAsm::MIOp_AsmString;
1290 LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
1291 unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
1292 getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true,
1293 ShouldPrintRegisterTies, TiedOperandIdx, TRI,
1294 IntrinsicInfo);
12441295
12451296 // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
12461297 unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
12831334 auto *DIV = dyn_cast(MO.getMetadata());
12841335 if (DIV && !DIV->getName().empty())
12851336 OS << "!\"" << DIV->getName() << '\"';
1286 else
1287 MO.print(OS, MST, TRI);
1337 else {
1338 LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
1339 unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
1340 MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true,
1341 ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
1342 }
12881343 } else if (TRI && (isInsertSubreg() || isRegSequence() ||
12891344 (isSubregToReg() && i == 3)) && MO.isImm()) {
12901345 OS << TRI->getSubRegIndexName(MO.getImm());
13461401
13471402 // Compute the index of the next operand descriptor.
13481403 AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
1349 } else
1350 MO.print(OS, MST, TRI);
1404 } else {
1405 LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
1406 unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
1407 MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, ShouldPrintRegisterTies,
1408 TiedOperandIdx, TRI, IntrinsicInfo);
1409 }
13511410 }
13521411
13531412 bool HaveSemi = false;
1414 #include "llvm/Analysis/Loads.h"
1515 #include "llvm/CodeGen/MIRPrinter.h"
1616 #include "llvm/CodeGen/MachineRegisterInfo.h"
17 #include "llvm/Target/TargetIntrinsicInfo.h"
1817 #include "llvm/CodeGen/TargetRegisterInfo.h"
1918 #include "llvm/IR/Constants.h"
2019 #include "llvm/IR/ModuleSlotTracker.h"
20 #include "llvm/Target/TargetIntrinsicInfo.h"
21 #include "llvm/Target/TargetMachine.h"
2122
2223 using namespace llvm;
2324
332333 llvm_unreachable("Invalid machine operand type");
333334 }
334335
336 // Try to crawl up to the machine function and get TRI and IntrinsicInfo from
337 // it.
338 static void tryToGetTargetInfo(const MachineOperand &MO,
339 const TargetRegisterInfo *&TRI,
340 const TargetIntrinsicInfo *&IntrinsicInfo) {
341 if (const MachineInstr *MI = MO.getParent()) {
342 if (const MachineBasicBlock *MBB = MI->getParent()) {
343 if (const MachineFunction *MF = MBB->getParent()) {
344 TRI = MF->getSubtarget().getRegisterInfo();
345 IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
346 }
347 }
348 }
349 }
350
335351 void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
336352 const TargetIntrinsicInfo *IntrinsicInfo) const {
353 tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
337354 ModuleSlotTracker DummyMST(nullptr);
338 print(OS, DummyMST, TRI, IntrinsicInfo);
355 print(OS, DummyMST, LLT{}, /*PrintDef=*/false,
356 /*ShouldPrintRegisterTies=*/true,
357 /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
339358 }
340359
341360 void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
361 LLT TypeToPrint, bool PrintDef,
362 bool ShouldPrintRegisterTies,
363 unsigned TiedOperandIdx,
342364 const TargetRegisterInfo *TRI,
343365 const TargetIntrinsicInfo *IntrinsicInfo) const {
344366 switch (getType()) {
345 case MachineOperand::MO_Register:
346 OS << printReg(getReg(), TRI, getSubReg());
347
348 if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
349 isInternalRead() || isEarlyClobber() || isTied()) {
350 OS << '<';
351 bool NeedComma = false;
352 if (isDef()) {
353 if (NeedComma)
354 OS << ',';
355 if (isEarlyClobber())
356 OS << "earlyclobber,";
357 if (isImplicit())
358 OS << "imp-";
359 OS << "def";
360 NeedComma = true;
361 // only makes sense when getSubReg() is set.
362 // Don't clutter the output otherwise.
363 if (isUndef() && getSubReg())
364 OS << ",read-undef";
365 } else if (isImplicit()) {
366 OS << "imp-use";
367 NeedComma = true;
367 case MachineOperand::MO_Register: {
368 unsigned Reg = getReg();
369 if (isImplicit())
370 OS << (isDef() ? "implicit-def " : "implicit ");
371 else if (PrintDef && isDef())
372 // Print the 'def' flag only when the operand is defined after '='.
373 OS << "def ";
374 if (isInternalRead())
375 OS << "internal ";
376 if (isDead())
377 OS << "dead ";
378 if (isKill())
379 OS << "killed ";
380 if (isUndef())
381 OS << "undef ";
382 if (isEarlyClobber())
383 OS << "early-clobber ";
384 if (isDebug())
385 OS << "debug-use ";
386 OS << printReg(Reg, TRI);
387 // Print the sub register.
388 if (unsigned SubReg = getSubReg()) {
389 if (TRI)
390 OS << '.' << TRI->getSubRegIndexName(SubReg);
391 else
392 OS << ".subreg" << SubReg;
393 }
394 // Print the register class / bank.
395 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
396 if (const MachineInstr *MI = getParent()) {
397 if (const MachineBasicBlock *MBB = MI->getParent()) {
398 if (const MachineFunction *MF = MBB->getParent()) {
399 const MachineRegisterInfo &MRI = MF->getRegInfo();
400 if (!PrintDef || MRI.def_empty(Reg)) {
401 OS << ':';
402 OS << printRegClassOrBank(Reg, MRI, TRI);
403 }
404 }
405 }
368406 }
369
370 if (isKill()) {
371 if (NeedComma)
372 OS << ',';
373 OS << "kill";
374 NeedComma = true;
375 }
376 if (isDead()) {
377 if (NeedComma)
378 OS << ',';
379 OS << "dead";
380 NeedComma = true;
381 }
382 if (isUndef() && isUse()) {
383 if (NeedComma)
384 OS << ',';
385 OS << "undef";
386 NeedComma = true;
387 }
388 if (isInternalRead()) {
389 if (NeedComma)
390 OS << ',';
391 OS << "internal";
392 NeedComma = true;
393 }
394 if (isTied()) {
395 if (NeedComma)
396 OS << ',';
397 OS << "tied";
398 if (TiedTo != 15)
399 OS << unsigned(TiedTo - 1);
400 }
401 OS << '>';
402407 }
403 break;
408 // Print ties.
409 if (ShouldPrintRegisterTies && isTied() && !isDef())
410 OS << "(tied-def " << TiedOperandIdx << ")";
411 // Print types.
412 if (TypeToPrint.isValid())
413 OS << '(' << TypeToPrint << ')';
414 break;
415 }
404416 case MachineOperand::MO_Immediate:
405417 OS << getImm();
406418 break;
474486 OS << '>';
475487 break;
476488 case MachineOperand::MO_RegisterMask: {
477 unsigned NumRegsInMask = 0;
478 unsigned NumRegsEmitted = 0;
479489 OS << "
480 for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
481 unsigned MaskWord = i / 32;
482 unsigned MaskBit = i % 32;
483 if (getRegMask()[MaskWord] & (1 << MaskBit)) {
484 if (PrintRegMaskNumRegs < 0 ||
485 NumRegsEmitted <= static_cast(PrintRegMaskNumRegs)) {
486 OS << " " << printReg(i, TRI);
487 NumRegsEmitted++;
490 if (TRI) {
491 unsigned NumRegsInMask = 0;
492 unsigned NumRegsEmitted = 0;
493 for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
494 unsigned MaskWord = i / 32;
495 unsigned MaskBit = i % 32;
496 if (getRegMask()[MaskWord] & (1 << MaskBit)) {
497 if (PrintRegMaskNumRegs < 0 ||
498 NumRegsEmitted <= static_cast(PrintRegMaskNumRegs)) {
499 OS << " " << printReg(i, TRI);
500 NumRegsEmitted++;
501 }
502 NumRegsInMask++;
488503 }
489 NumRegsInMask++;
490504 }
505 if (NumRegsEmitted != NumRegsInMask)
506 OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
507 } else {
508 OS << " ...";
491509 }
492 if (NumRegsEmitted != NumRegsInMask)
493 OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
494510 OS << ">";
495511 break;
496512 }
245245 // %bb.1: derived from LLVM BB %bb4.preheader
246246 // Predecessors according to CFG: %bb.0
247247 // ...
248 // %reg16385 = DEC64_32r %reg16437, %eflags
248 // %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
249249 // ...
250 // JE_4 <%bb.37>, %eflags
250 // JE_4 <%bb.37>, implicit %eflags
251251 // Successors according to CFG: %bb.37 %bb.2
252252 //
253253 // %bb.2: derived from LLVM BB %bb.nph
254254 // Predecessors according to CFG: %bb.0 %bb.1
255 // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
255 // %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
256256 BreakPHIEdge = true;
257257 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
258258 MachineInstr *UseInst = MO.getParent();
19601960 if (MOI->isDef()) {
19611961 if (Sub != 0) {
19621962 hasSubRegDef = true;
1963 // An operand %0:sub0 reads %0:sub1..n. Invert the lane
1963 // An operand %0:sub0 reads %0:sub1..n. Invert the lane
19641964 // mask for subregister defs. Read-undef defs will be handled by
19651965 // readsReg below.
19661966 SLM = ~SLM;
271271 // subreg of this register and given we don't track which
272272 // lanes are actually dead, we cannot insert a kill flag here.
273273 // Otherwise we may end up in a situation like this:
274 // ... = (MO) physreg:sub1, physreg
274 // ... = (MO) physreg:sub1, implicit killed physreg
275275 // ... <== Here we would allow later pass to reuse physreg:sub1
276276 // which is potentially wrong.
277277 // LR:sub0 = ...
674674 } else if (MO.isKill()) {
675675 // We must remove kill flags from uses of reloaded registers because the
676676 // register would be killed immediately, and there might be a second use:
677 // %foo = OR %x, %x
677 // %foo = OR killed %x, %x
678678 // This would cause a second reload of %x into a different register.
679679 DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
680680 MO.setIsKill(false);
666666 // its other operand is coalesced to the copy dest register, see if we can
667667 // transform the copy into a noop by commuting the definition. For example,
668668 //
669 // A3 = op A2 B0
669 // A3 = op A2 killed B0
670670 // ...
671671 // B1 = A3 <- this copy
672672 // ...
674674 //
675675 // ==>
676676 //
677 // B2 = op B0 A2
677 // B2 = op B0 killed A2
678678 // ...
679679 // B1 = B2 <- now an identity copy
680680 // ...
767767 // ...
768768 // B = A
769769 // ...
770 // C = A
770 // C = killed A
771771 // ...
772772 // = B
773773
12531253 // Make sure that the subrange for resultant undef is removed
12541254 // For example:
12551255 // %1:sub1 = LOAD CONSTANT 1
1256 // %2 = COPY %1
1256 // %2 = COPY %1
12571257 // ==>
12581258 // %2:sub1 = LOAD CONSTANT 1
12591259 // ; Correct but need to remove the subrange for %2:sub0
12961296 // = somedef %1 ; %1 GR8
12971297 // =>
12981298 // %1 = somedef ; %1 GR8
1299 // ECX = remat ; CL
1299 // dead ECX = remat ; implicit-def CL
13001300 // = somedef %1 ; %1 GR8
13011301 // %1 will see the inteferences with CL but not with CH since
13021302 // no live-ranges would have been created for ECX.
13511351 // ProcessImpicitDefs may leave some copies of values, it only removes
13521352 // local variables. When we have a copy like:
13531353 //
1354 // %1 = COPY %2
1354 // %1 = COPY undef %2
13551355 //
13561356 // We delete the copy and remove the corresponding value number from %1.
13571357 // Any uses of that value number are marked as .
19261926 //
19271927 // %dst:ssub0 = FOO
19281928 // %src = BAR
1929 // %dst:ssub1 = COPY %src
1929 // %dst:ssub1 = COPY %src
19301930 //
19311931 // The live range of %src overlaps the %dst value defined by FOO, but
19321932 // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
19411941 // is live, but never read. This can happen because we don't compute
19421942 // individual live ranges per lane.
19431943 //
1944 // %dst = FOO
1944 // %dst = FOO
19451945 // %src = BAR
1946 // %dst:ssub1 = COPY %src
1946 // %dst:ssub1 = COPY %src
19471947 //
19481948 // This kind of interference is only resolved locally. If the clobbered
19491949 // lane value escapes the block, the join is aborted.
22862286 //
22872287 // This adds ssub1 to the set of valid lanes in %src:
22882288 //
2289 // %src:ssub1 = FOO
2289 // %src:ssub1 = FOO
22902290 //
22912291 // This leaves only ssub1 valid, making any other lanes undef:
22922292 //
24242424 //
24252425 // 1 %dst:ssub0 = FOO <-- OtherVNI
24262426 // 2 %src = BAR <-- VNI
2427 // 3 %dst:ssub1 = COPY %src <-- Eliminate this copy.
2428 // 4 BAZ %dst
2429 // 5 QUUX %src
2427 // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy.
2428 // 4 BAZ killed %dst
2429 // 5 QUUX killed %src
24302430 //
24312431 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
24322432 // handles this complex value mapping.
24362436 // If the other live range is killed by DefMI and the live ranges are still
24372437 // overlapping, it must be because we're looking at an early clobber def:
24382438 //
2439 // %dst = ASM %src
2439 // %dst = ASM killed %src
24402440 //
24412441 // In this case, it is illegal to merge the two live ranges since the early
24422442 // clobber def would clobber %src before it was read.
26812681 if (!Def.isBlock()) {
26822682 if (changeInstrs) {
26832683 // Remove flags. This def is now a partial redef.
2684 // Also remove flags since the joined live range will
2684 // Also remove dead flags since the joined live range will
26852685 // continue past this instruction.
26862686 for (MachineOperand &MO :
26872687 Indexes->getInstructionFromIndex(Def)->operands()) {
212212 continue;
213213 if (!isRegUsed(Reg)) {
214214 // Check if it's partial live: e.g.
215 // D0 = insert_subreg D0, S0
215 // D0 = insert_subreg undef D0, S0
216216 // ... D0
217217 // The problem is the insert_subreg could be eliminated. The use of
218218 // D0 is using a partially undef value. This is not *incorrect* since
13781378 // for a partially defined original register. For example:
13791379 // %0:subreg_hireg = ...
13801380 // ...
1381 // %1 = COPY %0
1381 // %1 = COPY %0
13821382 if (S.empty())
13831383 continue;
13841384 SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
143143 });
144144 }
145145
146 Printable printRegClassOrBank(unsigned Reg, const MachineRegisterInfo &RegInfo,
147 const TargetRegisterInfo *TRI) {
148 return Printable([Reg, &RegInfo, TRI](raw_ostream &OS) {
149 if (RegInfo.getRegClassOrNull(Reg))
150 OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
151 else if (RegInfo.getRegBankOrNull(Reg))
152 OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
153 else {
154 OS << "_";
155 assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
156 "Generic registers must have a valid type");
157 }
158 });
159 }
160
146161 } // end namespace llvm
147162
148163 /// getAllocatableClass - Return the maximal subclass of the given register
457457 /// For example, in this code:
458458 ///
459459 /// %reg1034 = copy %reg1024
460 /// %reg1035 = copy %reg1025
461 /// %reg1036 = add %reg1034, %reg1035
460 /// %reg1035 = copy killed %reg1025
461 /// %reg1036 = add killed %reg1034, killed %reg1035
462462 ///
463463 /// %reg1034 is not considered to be killed, since it is copied from a
464464 /// register which is not killed. Treating it as not killed lets the
590590 // general, we want no uses between this instruction and the definition of
591591 // the two-address register.
592592 // e.g.
593 // %reg1028 = EXTRACT_SUBREG %reg1027, 1
594 // %reg1029 = MOV8rr %reg1028
595 // %reg1029 = SHR8ri %reg1029, 7, %eflags
596 // insert => %reg1030 = MOV8rr %reg1028
597 // %reg1030 = ADD8rr %reg1028, %reg1029, %eflags
593 // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
594 // %reg1029 = MOV8rr %reg1028
595 // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
596 // insert => %reg1030 = MOV8rr %reg1028
597 // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
598598 // In this case, it might not be possible to coalesce the second MOV8rr
599599 // instruction if the first one is coalesced. So it would be profitable to
600600 // commute it:
601 // %reg1028 = EXTRACT_SUBREG %reg1027, 1
602 // %reg1029 = MOV8rr %reg1028
603 // %reg1029 = SHR8ri %reg1029, 7, %eflags
604 // insert => %reg1030 = MOV8rr %reg1029
605 // %reg1030 = ADD8rr %reg1029, %reg1028, %eflags
601 // %reg1028 = EXTRACT_SUBREG killed %reg1027, 1
602 // %reg1029 = MOV8rr %reg1028
603 // %reg1029 = SHR8ri %reg1029, 7, implicit dead %eflags
604 // insert => %reg1030 = MOV8rr %reg1029
605 // %reg1030 = ADD8rr killed %reg1029, killed %reg1028, implicit dead %eflags
606606
607607 if (!isPlainlyKilled(MI, regC, LIS))
608608 return false;
609609
610610 // Ok, we have something like:
611 // %reg1030 = ADD8rr %reg1028, %reg1029, %eflags
611 // %reg1030 = ADD8rr killed %reg1028, killed %reg1029, implicit dead %eflags
612612 // let's see if it's worth commuting it.
613613
614614 // Look for situations like this:
615 // %reg1024 = MOV r1
616 // %reg1025 = MOV r0
617 // %reg1026 = ADD %reg1024, %reg1025
615 // %reg1024 = MOV r1
616 // %reg1025 = MOV r0
617 // %reg1026 = ADD %reg1024, %reg1025
618618 // r0 = MOV %reg1026
619619 // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
620620 unsigned ToRegA = getMappedReg(regA, DstRegMap);
712712 bool
713713 TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
714714 // Look for situations like this:
715 // %reg1024 = MOV r1
716 // %reg1025 = MOV r0
717 // %reg1026 = ADD %reg1024, %reg1025
715 // %reg1024 = MOV r1
716 // %reg1025 = MOV r0
717 // %reg1026 = ADD %reg1024, %reg1025
718718 // r2 = MOV %reg1026
719719 // Turn ADD into a 3-address instruction to avoid a copy.
720720 unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
14651465
14661466 assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
14671467
1468 // Deal with uses immediately - simply rewrite the src operand.
1468 // Deal with undef uses immediately - simply rewrite the src operand.
14691469 if (SrcMO.isUndef() && !DstMO.getSubReg()) {
14701470 // Constrain the DstReg register class if required.
14711471 if (TargetRegisterInfo::isVirtualRegister(DstReg))
17771777 ///
17781778 /// Becomes:
17791779 ///
1780 /// %dst:ssub0 = COPY %v1
1781 /// %dst:ssub1 = COPY %v2
1780 /// undef %dst:ssub0 = COPY %v1
1781 /// %dst:ssub1 = COPY %v2
17821782 void TwoAddressInstructionPass::
17831783 eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
17841784 MachineInstr &MI = *MBBI;
18021802 MachineOperand &UseMO = MI.getOperand(i);
18031803 unsigned SrcReg = UseMO.getReg();
18041804 unsigned SubIdx = MI.getOperand(i+1).getImm();
1805 // Nothing needs to be inserted for operands.
1805 // Nothing needs to be inserted for undef operands.
18061806 if (UseMO.isUndef())
18071807 continue;
18081808
18241824 .addReg(DstReg, RegState::Define, SubIdx)
18251825 .add(UseMO);
18261826
1827 // The first def needs an flag because there is no live register
1827 // The first def needs an undef flag because there is no live register
18281828 // before it.
18291829 if (!DefEmitted) {
18301830 CopyMI->getOperand(0).setIsUndef(true);
379379 ++NumIdCopies;
380380
381381 // Copies like:
382 // %r0 = COPY %r0
383 // %al = COPY %al, %eax
382 // %r0 = COPY undef %r0
383 // %al = COPY %al, implicit-def %eax
384384 // give us additional liveness information: The target (super-)register
385385 // must not be valid before this point. Replace the COPY with a KILL
386386 // instruction to maintain this information.
487487 if (SubReg != 0) {
488488 if (NoSubRegLiveness) {
489489 // A virtual register kill refers to the whole register, so we may
490 // have to add operands for the super-register. A
490 // have to add implicit killed operands for the super-register. A
491491 // partial redef always kills and redefines the super-register.
492492 if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
493493 (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
512512 }
513513 }
514514
515 // The and flags only make sense for
515 // The def undef and def internal flags only make sense for
516516 // sub-register defs, and we are substituting a full physreg. An
517 // operand from the SuperKills list will represent the
517 // implicit killed operand from the SuperKills list will represent the
518518 // partial read of the super-register.
519519 if (MO.isDef()) {
520520 MO.setIsUndef(false);
160160 /// A Chain is a sequence of instructions that are linked together by
161161 /// an accumulation operand. For example:
162162 ///
163 /// fmul d0, ?
164 /// fmla d1, ?, ?, d0
165 /// fmla d2, ?, ?, d1
163 /// fmul def d0, ?
164 /// fmla def d1, ?, ?, killed d0
165 /// fmla def d2, ?, ?, killed d1
166166 ///
167167 /// There may be other instructions interleaved in the sequence that
168168 /// do not belong to the chain. These other instructions must not use
28002800 LiveIntervals *LIS) const {
28012801 // This is a bit of a hack. Consider this instruction:
28022802 //
2803 // %0 = COPY %sp; GPR64all:%0
2803 // %0 = COPY %sp; GPR64all:%0
28042804 //
28052805 // We explicitly chose GPR64all for the virtual register so such a copy might
28062806 // be eliminated by RegisterCoalescer. However, that may not be possible, and
28292829 // Handle the case where a copy is being spilled or filled but the source
28302830 // and destination register class don't match. For example:
28312831 //
2832 // %0 = COPY %xzr; GPR64common:%0
2832 // %0 = COPY %xzr; GPR64common:%0
28332833 //
28342834 // In this case we can still safely fold away the COPY and generate the
28352835 // following spill code:
28392839 // This also eliminates spilled cross register class COPYs (e.g. between x and
28402840 // d regs) of the same size. For example:
28412841 //
2842 // %0 = COPY %1; GPR64:%0, FPR64:%1
2842 // %0 = COPY %1; GPR64:%0, FPR64:%1
28432843 //
28442844 // will be filled as
28452845 //
829829 if (SExtIdx != -1) {
830830 // Generate the sign extension for the proper result of the ldp.
831831 // I.e., with X1, that would be:
832 // %w1 = KILL %w1, %x1
833 // %x1 = SBFMXri %x1, 0, 31
832 // %w1 = KILL %w1, implicit-def %x1
833 // %x1 = SBFMXri killed %x1, 0, 31
834834 MachineOperand &DstMO = MIB->getOperand(SExtIdx);
835835 // Right now, DstMO has the extended register, since it comes from an
836836 // extended opcode.
14491449 unsigned *ReplaceReg) {
14501450 DEBUG(dbgs() << "Shrink PHI: ");
14511451 DEBUG(PHI.dump());
1452 DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
1453 << " = PHI(");
1452 DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
14541453
14551454 bool Replaced = false;
14561455 unsigned NumInputs = getPHINumInputs(PHI);
15061505 SmallVector &PHIRegionIndices) {
15071506 DEBUG(dbgs() << "Replace PHI: ");
15081507 DEBUG(PHI.dump());
1509 DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
1510 << " = PHI(");
1508 DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
15111509
15121510 bool HasExternalEdge = false;
15131511 unsigned NumInputs = getPHINumInputs(PHI);
15651563 DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) << "\n");
15661564 PHI.eraseFromParent();
15671565 } else {
1568 DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
1566 DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
15691567 MachineBasicBlock *MBB = PHI.getParent();
15701568 MachineInstrBuilder MIB =
15711569 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
17501748 return;
17511749 }
17521750 DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
1753 << "): " << printReg(DestRegister, TRI) << " = PHI("
1751 << "): " << printReg(DestRegister, TRI) << " = PHI("
17541752 << printReg(IfSourceRegister, TRI) << ", "
17551753 << printMBBReference(*IfBB) << printReg(CodeSourceRegister, TRI)
17561754 << ", " << printMBBReference(*CodeBB) << ")\n");
21462144 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
21472145 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
21482146 TII->get(TargetOpcode::PHI), DestReg);
2149 DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
2147 DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
21502148
21512149 unsigned CurrentBackedgeReg = 0;
21522150
21712169 BackedgePHI.addMBB((*SRI).second);
21722170 CurrentBackedgeReg = NewBackedgeReg;
21732171 DEBUG(dbgs() << "Inserting backedge PHI: "
2174 << printReg(NewBackedgeReg, TRI) << " = PHI("
2172 << printReg(NewBackedgeReg, TRI) << " = PHI("
21752173 << printReg(CurrentBackedgeReg, TRI) << ", "
21762174 << printMBBReference(*getPHIPred(*PHIDefInstr, 0))
21772175 << ", "
24402438 MachineInstrBuilder MIB =
24412439 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
24422440 TII->get(TargetOpcode::PHI), NewDestReg);
2443 DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
2444 << " = PHI(");
2441 DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) << " = PHI(");
24452442 MIB.addReg(PHISource);
24462443 MIB.addMBB(Entry);
24472444 DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
143143 // to be caused by ALU instructions in the next instruction group that wrote
144144 // to the $src_gpr registers of the VTX_READ.
145145 // e.g.
146 // %t3_x = VTX_READ_PARAM_32_eg %t2_x, 24
147 // %t2_x = MOV %zero
146 // %t3_x = VTX_READ_PARAM_32_eg killed %t2_x, 24
147 // %t2_x = MOV %zero
148148 //Adding this constraint prevents this from happening.
149149 let Constraints = "$src_gpr.ptr = $dst_gpr";
150150 }
211211 // to be caused by ALU instructions in the next instruction group that wrote
212212 // to the $src_gpr registers of the VTX_READ.
213213 // e.g.
214 // %t3_x = VTX_READ_PARAM_32_eg %t2_x, 24
215 // %t2_x = MOV %zero
214 // %t3_x = VTX_READ_PARAM_32_eg killed %t2_x, 24
215 // %t2_x = MOV %zero
216216 //Adding this constraint prevents this from happening.
217217 let Constraints = "$src_gpr.ptr = $dst_gpr";
218218 }
1111 /// common data and/or have enough undef subreg using swizzle abilities.
1212 ///
1313 /// For instance let's consider the following pseudo code :
14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
1515 /// ...
16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3
1717 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3
1818 ///
1919 /// is turned into :
20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3
2121 /// ...
22 /// %7 = INSERT_SUBREG %4, sub3
22 /// %7 = INSERT_SUBREG %4, sub3
2323 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3
2424 ///
2525 /// This allow regalloc to reduce register pressure for vector registers and
1616 /// %vgpr0 = V_MOV_B32_e32 0.0
1717 /// if (...) {
1818 /// %vgpr1 = ...
19 /// %vgpr2 = WWM %vgpr1
20 /// ... = %vgpr2
19 /// %vgpr2 = WWM killed %vgpr1
20 /// ... = killed %vgpr2
2121 /// %vgpr0 = V_MOV_B32_e32 1.0
2222 /// }
2323 /// ... = %vgpr0
970970 // Prevent folding operands backwards in the function. For example,
971971 // the COPY opcode must not be replaced by 1 in this example:
972972 //
973 // %3 = COPY %vgpr0; VGPR_32:%3
973 // %3 = COPY %vgpr0; VGPR_32:%3
974974 // ...
975 // %vgpr0 = V_MOV_B32_e32 1, %exec
975 // %vgpr0 = V_MOV_B32_e32 1, implicit %exec
976976 MachineOperand &Dst = MI.getOperand(0);
977977 if (Dst.isReg() &&
978978 !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
479479 }
480480
481481 // If this is not immediate then it can be copy of immediate value, e.g.:
482 // %1 = S_MOV_B32 255;
482 // %1 = S_MOV_B32 255;
483483 if (Op.isReg()) {
484484 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
485485 if (!isSameReg(Op, Def))
14461446 DEBUG(dbgs() << "widening: " << MI);
14471447 MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
14481448
1449 // Get rid of the old of DstRegD. Leave it if it defines a Q-reg
1449 // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg
14501450 // or some other super-register.
14511451 int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD);
14521452 if (ImpDefIdx != -1)
16491649 }
16501650
16511651 for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {
1652 // %12 = PICLDR %11, 0, pred:14, pred:%noreg
1652 // %12 = PICLDR %11, 0, pred:14, pred:%noreg
16531653 const MachineOperand &MO0 = MI0.getOperand(i);
16541654 const MachineOperand &MO1 = MI1.getOperand(i);
16551655 if (!MO0.isIdenticalTo(MO1))
46674667 NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),
46684668 DDst);
46694669
4670 // On the first instruction, both DSrc and DDst may be if present.
4670 // On the first instruction, both DSrc and DDst may be undef if present.
46714671 // Specifically when the original instruction didn't have them as an
46724672 // .
46734673 unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
46874687 MIB.addReg(DDst, RegState::Define);
46884688
46894689 // On the second instruction, DDst has definitely been defined above, so
4690 // it is not . DSrc, if present, can be as above.
4690 // it is not undef. DSrc, if present, can be undef as above.
46914691 CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
46924692 CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
46934693 MIB.addReg(CurReg, getUndefRegState(CurUndef));
47704770
47714771 // We must be able to clobber the whole D-reg.
47724772 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4773 // Virtual register must be a foo:ssub_0 operand.
4773 // Virtual register must be a def undef foo:ssub_0 operand.
47744774 if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))
47754775 return 0;
47764776 } else if (ARM::SPRRegClass.contains(Reg)) {
921921 // .Lloadcmp:
922922 // ldrexd rDestLo, rDestHi, [rAddr]
923923 // cmp rDestLo, rDesiredLo
924 // sbcs rTempReg, rDestHi, rDesiredHi
924 // sbcs dead rTempReg, rDestHi, rDesiredHi
925925 // bne .Ldone
926926 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
927927 MachineInstrBuilder MIB;
91679167 // operand is still set to noreg. If needed, set the optional operand's
91689168 // register to CPSR, and remove the redundant implicit def.
91699169 //
9170 // e.g. ADCS (..., CPSR) -> ADC (... opt:CPSR).
9170 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
91719171
91729172 // Rename pseudo opcodes.
91739173 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
16961696 if (OddReg == EvenReg && EvenDeadKill) {
16971697 // If the two source operands are the same, the kill marker is
16981698 // probably on the first one. e.g.
1699 // t2STRDi8 %r5, %r5, %r9, 0, 14, %reg0
1699 // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
17001700 EvenDeadKill = false;
17011701 OddDeadKill = true;
17021702 }
572572 return;
573573 } else {
574574 // The PHI node looks like:
575 // %2 = PHI %0, <%bb.1>, %1, <%bb.3>
575 // %2 = PHI %0, <%bb.1>, %1, <%bb.3>
576576 // Trace each incoming definition, e.g., (%0, %bb.1) and (%1, %bb.3)
577577 // The AND operation can be removed if both %0 in %bb.1 and %1 in
578578 // %bb.3 are defined with with a load matching the MaskN.
367367 }
368368 }
369369 // Defs and clobbers can overlap, e.g.
370 // %d0 = COPY %5, %r0, %r1
370 // dead %d0 = COPY %5, implicit-def %r0, implicit-def %r1
371371 for (RegisterRef R : Defs)
372372 Clobbers.erase(R);
373373
186186
187187 // Mapping: vreg -> cell
188188 // The keys are registers _without_ subregisters. This won't allow
189 // definitions in the form of "vreg:subreg = ...". Such definitions
189 // definitions in the form of "vreg:subreg = ...". Such definitions
190190 // would be questionable from the point of view of SSA, since the "vreg"
191191 // could not be initialized in its entirety (specifically, an instruction
192192 // defining the "other part" of "vreg" would also count as a definition
19761976 {
19771977 const MachineOperand &VO = MI.getOperand(1);
19781978 // The operand of CONST32 can be a blockaddress, e.g.
1979 // %0 = CONST32
1979 // %0 = CONST32
19801980 // Do this check for all instructions for safety.
19811981 if (!VO.isImm())
19821982 return false;
31463146 BrI.setDesc(JD);
31473147 while (BrI.getNumOperands() > 0)
31483148 BrI.RemoveOperand(0);
3149 // This ensures that all implicit operands (e.g. %r31, etc)
3149 // This ensures that all implicit operands (e.g. implicit-def %r31, etc)
31503150 // are present in the rewritten branch.
31513151 for (auto &Op : NI->operands())
31523152 BrI.addOperand(Op);
350350 // kill flag for a register (a removeRegisterKilled() analogous to
351351 // addRegisterKilled) that handles aliased register correctly.
352352 // * or has a killed aliased register use of I1's use reg
353 // %d4 = A2_tfrpi 16
354 // %r6 = A2_tfr %r9
355 // %r8 = KILL %r8, %d4
353 // %d4 = A2_tfrpi 16
354 // %r6 = A2_tfr %r9
355 // %r8 = KILL %r8, implicit killed %d4
356356 // If we want to move R6 = across the KILL instruction we would have
357 // to remove the %d4 operand. For now, we are
357 // to remove the implicit killed %d4 operand. For now, we are
358358 // conservative and disallow the move.
359359 // we can't move I1 across it.
360360 if (MI.isDebugValue()) {
2424 //
2525 // Example:
2626 //
27 // %40 = L2_loadrub_io %39, 1
28 // %41 = S2_tstbit_i %40, 0
29 // J2_jumpt %41, <%bb.5>, %pc
30 // J2_jump <%bb.4>, %pc
27 // %40 = L2_loadrub_io killed %39, 1
28 // %41 = S2_tstbit_i killed %40, 0
29 // J2_jumpt killed %41, <%bb.5>, implicit dead %pc
30 // J2_jump <%bb.4>, implicit dead %pc
3131 // Successors according to CFG: %bb.4(62) %bb.5(62)
3232 //
3333 // %bb.4: derived from LLVM BB %if.then
3434 // Predecessors according to CFG: %bb.3
35 // %11 = A2_addp %6, %10
35 // %11 = A2_addp %6, %10
3636 // S2_storerd_io %32, 16, %11
3737 // Successors according to CFG: %bb.5
3838 //
3939 // %bb.5: derived from LLVM BB %if.end
4040 // Predecessors according to CFG: %bb.3 %bb.4
41 // %12 = PHI %6, <%bb.3>, %11, <%bb.4>
42 // %13 = A2_addp %7, %12
43 // %42 = C2_cmpeqi %9, 10
44 // J2_jumpf %42, <%bb.3>, %pc
45 // J2_jump <%bb.6>, %pc>
41 // %12 = PHI %6, <%bb.3>, %11, <%bb.4>
42 // %13 = A2_addp %7, %12
43 // %42 = C2_cmpeqi %9, 10
44 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
45 // J2_jump <%bb.6>, implicit dead %pc
4646 // Successors according to CFG: %bb.6(4) %bb.3(124)
4747 //
4848 // would become:
4949 //
50 // %40 = L2_loadrub_io %39, 1
51 // %41 = S2_tstbit_i %40, 0
52 // spec-> %11 = A2_addp %6, %10
50 // %40 = L2_loadrub_io killed %39, 1
51 // %41 = S2_tstbit_i killed %40, 0
52 // spec-> %11 = A2_addp %6, %10
5353 // pred-> S2_pstorerdf_io %41, %32, 16, %11
54 // %46 = PS_pselect %41, %6, %11
55 // %13 = A2_addp %7, %46
56 // %42 = C2_cmpeqi %9, 10
57 // J2_jumpf %42, <%bb.3>, %pc
58 // J2_jump <%bb.6>, %pc
54 // %46 = PS_pselect %41, %6, %11
55 // %13 = A2_addp %7, %46
56 // %42 = C2_cmpeqi %9, 10
57 // J2_jumpf killed %42, <%bb.3>, implicit dead %pc
58 // J2_jump <%bb.6>, implicit dead %pc
5959 // Successors according to CFG: %bb.6 %bb.3
6060
6161 #include "Hexagon.h"
2727 // definitions are predicable, then in the second step, the conditional
2828 // transfers will then be rewritten as predicated instructions. E.g.
2929 // %0 = A2_or %1, %2
30 // %3 = A2_tfrt %99, %0
30 // %3 = A2_tfrt %99, killed %0
3131 // will be rewritten as
3232 // %3 = A2_port %99, %1, %2
3333 //
3434 // This replacement has two variants: "up" and "down". Consider this case:
3535 // %0 = A2_or %1, %2
3636 // ... [intervening instructions] ...
37 // %3 = A2_tfrt %99, %0
37 // %3 = A2_tfrt %99, killed %0
3838 // variant "up":
3939 // %3 = A2_port %99, %1, %2
4040 // ... [intervening instructions, %0->vreg3] ...
6464 // will see both instructions as actual definitions, and will mark the
6565 // first one as dead. The definition is not actually dead, and this
6666 // situation will need to be fixed. For example:
67 // %1 = A2_tfrt ... ; marked as dead
68 // %1 = A2_tfrf ...
67 // dead %1 = A2_tfrt ... ; marked as dead
68 // %1 = A2_tfrf ...
6969 //
7070 // Since any of the individual predicated transfers may end up getting
7171 // removed (in case it is an identity copy), some pre-existing def may
7272 // be marked as dead after live interval recomputation:
73 // %1 = ... ; marked as dead
73 // dead %1 = ... ; marked as dead
7474 // ...
75 // %1 = A2_tfrf ... ; if A2_tfrt is removed
75 // %1 = A2_tfrf ... ; if A2_tfrt is removed
7676 // This case happens if %1 was used as a source in A2_tfrt, which means
7777 // that is it actually live at the A2_tfrf, and so the now dead definition
7878 // of %1 will need to be updated to non-dead at some point.
17191719 MachineOperand &MO = PredDef->getOperand(i);
17201720 if (MO.isReg()) {
17211721 // Skip all implicit references. In one case there was:
1722 // %140 = FCMPUGT32_rr %138, %139, %usr
1722 // %140 = FCMPUGT32_rr %138, %139, implicit %usr
17231723 if (MO.isImplicit())
17241724 continue;
17251725 if (MO.isUse()) {
16141614 }
16151615
16161616 // Inspired by this pair:
1617 // %r13 = L2_loadri_io %r29, 136; mem:LD4[FixedStack0]
1618 // S2_storeri_io %r29, 132, %r1; flags: mem:ST4[FixedStack1]
1617 // %r13 = L2_loadri_io %r29, 136; mem:LD4[FixedStack0]
1618 // S2_storeri_io %r29, 132, killed %r1; flags: mem:ST4[FixedStack1]
16191619 // Currently AA considers the addresses in these instructions to be aliasing.
16201620 bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
16211621 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
35143514 case Hexagon::EH_RETURN_JMPR:
35153515 case Hexagon::PS_jmpret:
35163516 // jumpr r31
3517 // Actual form JMPR %pc, %r31, %r0.
3517 // Actual form JMPR implicit-def %pc, implicit %r31, implicit internal %r0
35183518 DstReg = MI.getOperand(0).getReg();
35193519 if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
35203520 return HexagonII::HSIG_L2;
37043704 case Hexagon::C2_cmovenewif:
37053705 // if ([!]P0[.new]) Rd = #0
37063706 // Actual form:
3707 // %r16 = C2_cmovenewit %p0, 0, %r16;
3707 // %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16;
37083708 DstReg = MI.getOperand(0).getReg();
37093709 SrcReg = MI.getOperand(1).getReg();
37103710 if (isIntRegForSubInst(DstReg) &&
128128 // using -- if (QRI->isSubRegister(feederReg, cmpReg1) logic
129129 // before the callsite of this function
130130 // But we can not as it comes in the following fashion.
131 // %d0 = Hexagon_S2_lsr_r_p %d0, %r2
132 // %r0 = KILL %r0, %d0
133 // %p0 = CMPEQri %r0, 0
131 // %d0 = Hexagon_S2_lsr_r_p killed %d0, killed %r2
132 // %r0 = KILL %r0, implicit killed %d0
133 // %p0 = CMPEQri killed %r0, 0
134134 // Hence, we need to check if it's a KILL instruction.
135135 if (II->getOpcode() == TargetOpcode::KILL)
136136 return false;
195195 // to new value jump. If they are in the path, bail out.
196196 // KILL sets kill flag on the opcode. It also sets up a
197197 // single register, out of pair.
198 // %d0 = S2_lsr_r_p %d0, %r2
199 // %r0 = KILL %r0, %d0
200 // %p0 = C2_cmpeqi %r0, 0
198 // %d0 = S2_lsr_r_p killed %d0, killed %r2
199 // %r0 = KILL %r0, implicit killed %d0
200 // %p0 = C2_cmpeqi killed %r0, 0
201201 // PHI can be anything after RA.
202202 // COPY can remateriaze things in between feeder, compare and nvj.
203203 if (MII->getOpcode() == TargetOpcode::KILL ||
77 // This peephole pass optimizes in the following cases.
88 // 1. Optimizes redundant sign extends for the following case
99 // Transform the following pattern
10 // %170 = SXTW %166
10 // %170 = SXTW %166
1111 // ...
12 // %176 = COPY %170:isub_lo
12 // %176 = COPY %170:isub_lo
1313 //
1414 // Into
15 // %176 = COPY %166
15 // %176 = COPY %166
1616 //
1717 // 2. Optimizes redundant negation of predicates.
18 // %15 = CMPGTrr %6, %2
18 // %15 = CMPGTrr %6, %2
1919 // ...
20 // %16 = NOT_p %15
20 // %16 = NOT_p killed %15
2121 // ...
22 // JMP_c %16, <%bb.1>, %pc
22 // JMP_c killed %16, <%bb.1>, implicit dead %pc
2323 //
2424 // Into
25 // %15 = CMPGTrr %6, %2;
25 // %15 = CMPGTrr %6, %2;
2626 // ...
27 // JMP_cNot %15, <%bb.1>, %pc;
27 // JMP_cNot killed %15, <%bb.1>, implicit dead %pc;
2828 //
2929 // Note: The peephole pass makes the instrucstions like
30 // %170 = SXTW %166 or %16 = NOT_p %15
30 // %170 = SXTW %166 or %16 = NOT_p killed %15
3131 // redundant and relies on some form of dead removal instructions, like
3232 // DCE or DIE to actually eliminate them.
3333
131131 NextI = std::next(I);
132132 MachineInstr &MI = *I;
133133 // Look for sign extends:
134 // %170 = SXTW %166
134 // %170 = SXTW %166
135135 if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) {
136136 assert(MI.getNumOperands() == 2);
137137 MachineOperand &Dst = MI.getOperand(0);
142142 if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
143143 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
144144 // Map the following:
145 // %170 = SXTW %166
145 // %170 = SXTW %166
146146 // PeepholeMap[170] = %166
147147 PeepholeMap[DstReg] = SrcReg;
148148 }
149149 }
150150
151 // Look for %170 = COMBINE_ir_V4 (0, %169)
151 // Look for %170 = COMBINE_ir_V4 (0, %169)
152152 // %170:DoublRegs, %169:IntRegs
153153 if (!DisableOptExtTo64 && MI.getOpcode() == Hexagon::A4_combineir) {
154154 assert(MI.getNumOperands() == 3);
191191 if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
192192 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
193193 // Map the following:
194 // %170 = NOT_xx %166
194 // %170 = NOT_xx %166
195195 // PeepholeMap[170] = %166
196196 PeepholeMap[DstReg] = SrcReg;
197197 }
198198 }
199199
200200 // Look for copy:
201 // %176 = COPY %170:isub_lo
201 // %176 = COPY %170:isub_lo
202202 if (!DisableOptSZExt && MI.isCopy()) {
203203 assert(MI.getNumOperands() == 2);
204204 MachineOperand &Dst = MI.getOperand(0);
771771
772772 // If data definition is because of implicit definition of the register,
773773 // do not newify the store. Eg.
774 // %r9 = ZXTH %r12, %d6, %r12
775 // S2_storerh_io %r8, 2, %r12; mem:ST2[%scevgep343]
774 // %r9 = ZXTH %r12, implicit %d6, implicit-def %r12
775 // S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343]
776776 for (auto &MO : PacketMI.operands()) {
777777 if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
778778 return false;
786786 // Handle imp-use of super reg case. There is a target independent side
787787 // change that should prevent this situation but I am handling it for
788788 // just-in-case. For example, we cannot newify R2 in the following case:
789 // %r3 = A2_tfrsi 0;
790 // S2_storeri_io %r0, 0, %r2, %d1;
789 // %r3 = A2_tfrsi 0;
790 // S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1;
791791 for (auto &MO : MI.operands()) {
792792 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
793793 return false;
891891 // Go through the packet instructions and search for an anti dependency between
892892 // them and DepReg from MI. Consider this case:
893893 // Trying to add
894 // a) %r1 = TFRI_cdNotPt %p3, 2
894 // a) %r1 = TFRI_cdNotPt %p3, 2
895895 // to this packet:
896896 // {
897 // b) %p0 = C2_or %p3, %p0
898 // c) %p3 = C2_tfrrp %r23
899 // d) %r1 = C2_cmovenewit %p3, 4
897 // b) %p0 = C2_or killed %p3, killed %p0
898 // c) %p3 = C2_tfrrp %r23
899 // d) %r1 = C2_cmovenewit %p3, 4
900900 // }
901901 // The P3 from a) and d) will be complements after
902902 // a)'s P3 is converted to .new form
961961
962962 // One corner case deals with the following scenario:
963963 // Trying to add
964 // a) %r24 = A2_tfrt %p0, %r25
964 // a) %r24 = A2_tfrt %p0, %r25
965965 // to this packet:
966966 // {
967 // b) %r25 = A2_tfrf %p0, %r24
968 // c) %p0 = C2_cmpeqi %r26, 1
967 // b) %r25 = A2_tfrf %p0, %r24
968 // c) %p0 = C2_cmpeqi %r26, 1
969969 // }
970970 //
971971 // On general check a) and b) are complements, but presence of c) will
15421542
15431543 // There are certain anti-dependencies that cannot be ignored.
15441544 // Specifically:
1545 // J2_call ... %r0 ; SUJ
1545 // J2_call ... implicit-def %r0 ; SUJ
15461546 // R0 = ... ; SUI
15471547 // Those cannot be packetized together, since the call will observe
15481548 // the effect of the assignment to R0.
271271 case Hexagon::J2_jumpr:
272272 case Hexagon::PS_jmpret:
273273 // jumpr r31
274 // Actual form JMPR %pc, %r31, %r0.
274 // Actual form JMPR implicit-def %pc, implicit %r31, implicit internal %r0.
275275 DstReg = MCI.getOperand(0).getReg();
276276 if (Hexagon::R31 == DstReg)
277277 return HexagonII::HSIG_L2;
470470 case Hexagon::C2_cmovenewif:
471471 // if ([!]P0[.new]) Rd = #0
472472 // Actual form:
473 // %r16 = C2_cmovenewit %p0, 0, %r16;
473 // %r16 = C2_cmovenewit internal %p0, 0, implicit undef %r16;
474474 DstReg = MCI.getOperand(0).getReg(); // Rd
475475 PredReg = MCI.getOperand(1).getReg(); // P0
476476 if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
112112
113113 if (!HexagonMCInstrInfo::bundleSize(MCB)) {
114114 // There once was a bundle:
115 // BUNDLE %d2, %r4, %r5, %d7, ...
116 // * %d2 = IMPLICIT_DEF; flags:
117 // * %d7 = IMPLICIT_DEF; flags:
115 // BUNDLE implicit-def %d2, implicit-def %r4, implicit-def %r5,
116 // implicit-def %d7, ...
117 // * %d2 = IMPLICIT_DEF; flags:
118 // * %d7 = IMPLICIT_DEF; flags:
118119 // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
119120 // became empty.
120121 DEBUG(dbgs() << "Skipping empty bundle");
136137
137138 if (!HexagonMCInstrInfo::bundleSize(MCB)) {
138139 // There once was a bundle:
139 // BUNDLE %d2, %r4, %r5, %d7, ...
140 // * %d2 = IMPLICIT_DEF; flags:
141 // * %d7 = IMPLICIT_DEF; flags:
140 // BUNDLE implicit-def %d2, implicit-def %r4, implicit-def %r5,
141 // implicit-def %d7, ...
142 // * %d2 = IMPLICIT_DEF; flags:
143 // * %d7 = IMPLICIT_DEF; flags:
142144 // After the IMPLICIT_DEFs were removed by the asm printer, the bundle
143145 // became empty.
144146 DEBUG(dbgs() << "Skipping empty bundle");
182182 // This is typically used to prevent keeping registers artificially live
183183 // in cases when they are defined via predicated instructions. For example:
184184 // r0 = add-if-true cond, r10, r11 (1)
185 // r0 = add-if-false cond, r12, r13, r0 (2)
185 // r0 = add-if-false cond, r12, r13, implicit r0 (2)
186186 // ... = r0 (3)
187187 // Before (1), r0 is not intended to be live, and the use of r0 in (3) is
188188 // not meant to be reached by any def preceding (1). However, since the
479479 MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), get(NewOpc));
480480
481481 // For MIPSR6 JI*C requires an immediate 0 as an operand, JIALC(64) an
482 // immediate 0 as an operand and requires the removal of it's %ra
482 // immediate 0 as an operand and requires the removal of it's implicit-def %ra
483483 // implicit operand as copying the implicit operations of the instructio we're
484484 // looking at will give us the correct flags.
485485 if (NewOpc == Mips::JIC || NewOpc == Mips::JIALC || NewOpc == Mips::JIC64 ||
2121 // This peephole pass optimizes these cases, for example
2222 //
2323 // It will transform the following pattern
24 // %0 = LEA_ADDRi64 %VRFrame, 4
25 // %1 = cvta_to_local_yes_64 %0
24 // %0 = LEA_ADDRi64 %VRFrame, 4
25 // %1 = cvta_to_local_yes_64 %0
2626 //
2727 // into
28 // %1 = LEA_ADDRi64 %VRFrameLocal, 4
28 // %1 = LEA_ADDRi64 %VRFrameLocal, 4
2929 //
3030 // %VRFrameLocal is the virtual register name of %SPL
3131 //
6161 /// %bb.0: derived from LLVM BB %entry
6262 /// Live Ins: %f1 %f3 %x6
6363 ///
64 /// %0 = COPY %f1; F8RC:%0
65 /// %5 = CMPLWI %4, 0; CRRC:%5 GPRC:%4
66 /// %8 = LXSDX %zero8, %7, %rm;
64 /// %0 = COPY %f1; F8RC:%0
65 /// %5 = CMPLWI killed %4, 0; CRRC:%5 GPRC:%4
66 /// %8 = LXSDX %zero8, killed %7, implicit %rm;
6767 /// mem:LD8[ConstantPool] F8RC:%8 G8RC:%7
6868 /// BCC 76, %5, <%bb.2>; CRRC:%5
6969 /// Successors according to CFG: %bb.1(?%) %bb.2(?%)
7474 ///
7575 /// %bb.2: derived from LLVM BB %entry
7676 /// Predecessors according to CFG: %bb.0 %bb.1
77 /// %9 = PHI %8, <%bb.1>, %0, <%bb.0>;
77 /// %9 = PHI %8, <%bb.1>, %0, <%bb.0>;
7878 /// F8RC:%9,%8,%0
7979 ///
8080 /// BCC 76, %5, <%bb.4>; CRRC:%5
8686 ///
8787 /// %bb.4: derived from LLVM BB %entry
8888 /// Predecessors according to CFG: %bb.2 %bb.3
89 /// %13 = PHI %12, <%bb.3>, %2, <%bb.2>;
89 /// %13 = PHI %12, <%bb.3>, %2, <%bb.2>;
9090 /// F8RC:%13,%12,%2
9191 ///
92 /// BLR8 %lr8, %rm, %f1
92 /// BLR8 implicit %lr8, implicit %rm, implicit %f1
9393 ///
9494 /// When this pattern is detected, branch coalescing will try to collapse
9595 /// it by moving code in %bb.2 to %bb.0 and/or %bb.4 and removing %bb.3.
9999 /// %bb.0: derived from LLVM BB %entry
100100 /// Live Ins: %f1 %f3 %x6
101101 ///
102 /// %0 = COPY %f1; F8RC:%0
103 /// %5 = CMPLWI %4, 0; CRRC:%5 GPRC:%4
104 /// %8 = LXSDX %zero8, %7, %rm;
102 /// %0 = COPY %f1; F8RC:%0
103 /// %5 = CMPLWI killed %4, 0; CRRC:%5 GPRC:%4
104 /// %8 = LXSDX %zero8, killed %7, implicit %rm;
105105 /// mem:LD8[ConstantPool] F8RC:%8 G8RC:%7
106106 ///
107107 /// BCC 76, %5, <%bb.4>; CRRC:%5
114114 ///
115115 /// %bb.4: derived from LLVM BB %entry
116116 /// Predecessors according to CFG: %bb.0 %bb.1
117 /// %9 = PHI %8, <%bb.1>, %0, <%bb.0>;
117 /// %9 = PHI %8, <%bb.1>, %0, <%bb.0>;
118118 /// F8RC:%9,%8,%0
119 /// %13 = PHI %12, <%bb.1>, %2, <%bb.0>;
119 /// %13 = PHI %12, <%bb.1>, %2, <%bb.0>;
120120 /// F8RC:%13,%12,%2
121121 ///
122 /// BLR8 %lr8, %rm, %f1
122 /// BLR8 implicit %lr8, implicit %rm, implicit %f1
123123 ///
124124 /// Branch Coalescing does not split blocks, it moves everything in the same
125125 /// direction ensuring it does not break use/definition semantics.
23142314
23152315 // For a method return value, we check the ZExt/SExt flags in attribute.
23162316 // We assume the following code sequence for method call.
2317 // ADJCALLSTACKDOWN 32, %r1, %r1
2317 // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
23182318 // BL8_NOP ,...
2319 // ADJCALLSTACKUP 32, 0, %r1, %r1
2320 // %5 = COPY %x3; G8RC:%5
2319 // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
2320 // %5 = COPY %x3; G8RC:%5
23212321 if (SrcReg == PPC::X3) {
23222322 const MachineBasicBlock *MBB = MI.getParent();
23232323 MachineBasicBlock::const_instr_iterator II =
584584 // We can eliminate RLDICL (e.g. for zero-extension)
585585 // if all bits to clear are already zero in the input.
586586 // This code assume following code sequence for zero-extension.
587 // %6 = COPY %5:sub_32; (optional)
588 // %8 = IMPLICIT_DEF;
587 // %6 = COPY %5:sub_32; (optional)
588 // %8 = IMPLICIT_DEF;
589589 // %7 = INSERT_SUBREG %8, %6, sub_32;
590590 if (!EnableZExtElimination) break;
591591
684684 DEBUG(dbgs() << "Optimizing LI to ADDI: ");
685685 DEBUG(LiMI->dump());
686686
687 // There could be repeated registers in the PHI, e.g: %1 =
687 // There could be repeated registers in the PHI, e.g: %1 =
688688 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
689689 // already replaced the def instruction, skip.
690690 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
7878 }
7979
8080 // We're looking for a sequence like this:
81 // %f0 = LFD 0, %x3, %qf0; mem:LD8[%a](tbaa=!2)
82 // %qf1 = QVESPLATI %qf0, 0, %rm
81 // %f0 = LFD 0, killed %x3, implicit-def %qf0; mem:LD8[%a](tbaa=!2)
82 // %qf1 = QVESPLATI killed %qf0, 0, implicit %rm
8383
8484 for (auto SI = Splats.begin(); SI != Splats.end();) {
8585 MachineInstr *SMI = *SI;
8989 // This pass is run after register coalescing, and so we're looking for
9090 // a situation like this:
9191 // ...
92 // %5 = COPY %9; VSLRC:%5,%9
92 // %5 = COPY %9; VSLRC:%5,%9
9393 // %5 = XSMADDADP %5, %17, %16,
94 // %rm; VSLRC:%5,%17,%16
94 // implicit %rm; VSLRC:%5,%17,%16
9595 // ...
9696 // %9 = XSMADDADP %9, %17, %19,
97 // %rm; VSLRC:%9,%17,%19
97 // implicit %rm; VSLRC:%9,%17,%19
9898 // ...
9999 // Where we can eliminate the copy by changing from the A-type to the
100100 // M-type instruction. Specifically, for this example, this means:
101101 // %5 = XSMADDADP %5, %17, %16,
102 // %rm; VSLRC:%5,%17,%16
102 // implicit %rm; VSLRC:%5,%17,%16
103103 // is replaced by:
104104 // %16 = XSMADDMDP %16, %18, %9,
105 // %rm; VSLRC:%16,%18,%9
106 // and we remove: %5 = COPY %9; VSLRC:%5,%9
105 // implicit %rm; VSLRC:%16,%18,%9
106 // and we remove: %5 = COPY %9; VSLRC:%5,%9
107107
108108 SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
109109
149149 // walking the MIs we may as well test liveness here.
150150 //
151151 // FIXME: There is a case that occurs in practice, like this:
152 // %9 = COPY %f1; VSSRC:%9
152 // %9 = COPY %f1; VSSRC:%9
153153 // ...
154 // %6 = COPY %9; VSSRC:%6,%9
155 // %7 = COPY %9; VSSRC:%7,%9
154 // %6 = COPY %9; VSSRC:%6,%9
155 // %7 = COPY %9; VSSRC:%7,%9
156156 // %9 = XSMADDASP %9, %1, %4; VSSRC:
157157 // %6 = XSMADDASP %6, %1, %2; VSSRC:
158158 // %7 = XSMADDASP %7, %1, %3; VSSRC:
435435 // Also do a forward search to handle cases where an instruction after the
436436 // compare can be converted like
437437 //
438 // LTEBRCompare %f0s, %f0s, %cc LTEBRCompare %f0s, %f0s, %cc
439 // %f2s = LER %f0s
438 // LTEBRCompare %f0s, %f0s, implicit-def %cc LTEBRCompare %f0s, %f0s,
439 // implicit-def %cc %f2s = LER %f0s
440440 //
441441 MBBI = Compare, MBBE = MBB.end();
442442 while (++MBBI != MBBE) {
102102
103103 Before regalloc, we have:
104104
105 %reg1025 = IMUL32rri8 %reg1024, 45, %eflags
105 %reg1025 = IMUL32rri8 %reg1024, 45, implicit-def %eflags
106106 JMP mbb
107107 Successors according to CFG: 0x203afb0 (#3)
108108
109109 bb1: 0x203af60, LLVM BB @0x1e02310, ID#2:
110110 Predecessors according to CFG: 0x203aec0 (#0)
111 %reg1026 = IMUL32rri8 %reg1024, 78, %eflags
111 %reg1026 = IMUL32rri8 %reg1024, 78, implicit-def %eflags
112112 Successors according to CFG: 0x203afb0 (#3)
113113
114114 bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3:
115115 Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2)
116 %reg1027 = PHI %reg1025, mbb,
116 %reg1027 = PHI %reg1025, mbb,
117117 %reg1026, mbb
118 %reg1029 = MOVZX64rr32 %reg1027
118 %reg1029 = MOVZX64rr32 %reg1027
119119
120120 so we'd have to know that IMUL32rri8 leaves the high word zero extended and to
121121 be able to recognize the zero extend. This could also presumably be implemented
190190 /// %bb.2: derived from LLVM BB %if.then
191191 /// Live Ins: %rdi
192192 /// Predecessors according to CFG: %bb.0
193 /// %ax = MOV16rm %rdi, 1, %noreg, 0, %noreg, %eax;
193 /// %ax = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax;
194194 /// mem:LD2[%p]
195 /// No %eax
195 /// No implicit %eax
196196 /// Successors according to CFG: %bb.3(?%)
197197 ///
198198 /// %bb.3: derived from LLVM BB %if.end
199199 /// Live Ins: %eax Only %ax is actually live
200200 /// Predecessors according to CFG: %bb.2 %bb.1
201 /// %ax = KILL %ax, %eax
201 /// %ax = KILL %ax, implicit killed %eax
202202 /// RET 0, %ax
203203 static bool isLive(const MachineInstr &MI,
204204 const LivePhysRegs &LiveRegs,
44684468 unsigned leaInReg2 = 0;
44694469 MachineInstr *InsMI2 = nullptr;
44704470 if (Src == Src2) {
4471 // ADD16rr %reg1028, %reg1028
4471 // ADD16rr killed %reg1028, %reg1028
44724472 // just a single insert_subreg.
44734473 addRegReg(MIB, leaInReg, true, leaInReg, false);
44744474 } else {
76327632 /// This is used for mapping:
76337633 /// %xmm4 = V_SET0
76347634 /// to:
7635 /// %xmm4 = PXORrr %xmm4, %xmm4
7635 /// %xmm4 = PXORrr undef %xmm4, undef %xmm4
76367636 ///
76377637 static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
76387638 const MCInstrDesc &Desc) {
81968196 ///
81978197 /// This catches the VCVTSI2SD family of instructions:
81988198 ///
8199 /// vcvtsi2sdq %rax, %xmm0, %xmm14
8199 /// vcvtsi2sdq %rax, undef %xmm0, %xmm14
82008200 ///
82018201 /// We should to be careful *not* to catch VXOR idioms which are presumably
82028202 /// handled specially in the pipeline:
82038203 ///
8204 /// vxorps %xmm1, %xmm1, %xmm1
8204 /// vxorps undef %xmm1, undef %xmm1, %xmm1
82058205 ///
82068206 /// Like getPartialRegUpdateClearance, this makes a strong assumption that the
82078207 /// high bits that are passed-through are not live.
1089410894 // FIXME: There are instructions which are being manually built without
1089510895 // explicit uses/defs so we also have to check the MCInstrDesc. We should be
1089610896 // able to remove the extra checks once those are fixed up. For example,
10897 // sometimes we might get something like %rax = POP64r 1. This won't be
10897 // sometimes we might get something like %rax = POP64r 1. This won't be
1089810898 // caught by modifiesRegister or readsRegister even though the instruction
1089910899 // really ought to be formed so that modifiesRegister/readsRegister would
1090010900 // catch it.
234234 // If the call has no RegMask, skip it as well. It usually happens on
235235 // helper function calls (such as '_chkstk', '_ftol2') where standard
236236 // calling convention is not used (RegMask is not used to mark register
237 // clobbered and register usage (def/imp-def/use) is well-defined and
237 // clobbered and register usage (def/implicit-def/use) is well-defined and
238238 // explicitly specified.
239239 if (IsCall && !callHasRegMask(MI))
240240 continue;
4242 ; The key problem here is that we may fail to create an MBB referenced by a
4343 ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things
4444 ; happen.
45 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %6, %2; mem:ST4[%addr] GPR:%6,%2 (in function: pending_phis)
45 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %6(s32), %2(p0); mem:ST4[%addr] GPR:%6,%2 (in function: pending_phis)
4646 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis
4747 ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis:
4848 define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) {
6262 }
6363
6464 ; General legalizer inability to handle types whose size wasn't a power of 2.
65 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1, %0; mem:ST6[%addr](align=8) (in function: odd_type)
65 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1(s42), %0(p0); mem:ST6[%addr](align=8) (in function: odd_type)
6666 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_type
6767 ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_type:
6868 define void @odd_type(i42* %addr) {
7171 ret void
7272 }
7373
74 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1, %0; mem:ST28[%addr](align=32) (in function: odd_vector)
74 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1(<7 x s32>), %0(p0); mem:ST28[%addr](align=32) (in function: odd_vector)
7575 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
7676 ; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
7777 define void @odd_vector(<7 x i32>* %addr) {
9090 }
9191
9292 ; Just to make sure we don't accidentally emit a normal load/store.
93 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %2(s64) = G_LOAD %0; mem:LD8[%addr] GPR:%2,%0 (in function: atomic_ops)
93 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %2:gpr(s64) = G_LOAD %0(p0); mem:LD8[%addr] GPR:%2,%0 (in function: atomic_ops)
9494 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for atomic_ops
9595 ; FALLBACK-WITH-REPORT-LABEL: atomic_ops:
9696 define i64 @atomic_ops(i64* %addr) {
131131 }
132132
133133 ; Check that we fallback on invoke translation failures.
134 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0(s128) = G_FCONSTANT quad 2
134 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(s128) = G_FCONSTANT quad 2
135135 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_quad_dump
136136 ; FALLBACK-WITH-REPORT-OUT-LABEL: test_quad_dump:
137137 define fp128 @test_quad_dump() {
138138 ret fp128 0xL00000000000000004000000000000000
139139 }
140140
141 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0(p0) = G_EXTRACT_VECTOR_ELT %1, %2; (in function: vector_of_pointers_extractelement)
141 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %0:_(p0) = G_EXTRACT_VECTOR_ELT %1(<2 x p0>), %2(s32); (in function: vector_of_pointers_extractelement)
142142 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement
143143 ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement:
144144 @var = global <2 x i16*> zeroinitializer
155155 br label %block
156156 }
157157
158 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0, %4; mem:ST16[undef] (in function: vector_of_pointers_insertelement)
158 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0(<2 x p0>), %4(p0); mem:ST16[undef] (in function: vector_of_pointers_insertelement)
159159 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement
160160 ; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement:
161161 define void @vector_of_pointers_insertelement() {
171171 br label %block
172172 }
173173
174 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1, %3; mem:ST12[undef](align=4) (in function: nonpow2_insertvalue_narrowing)
174 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %1(s96), %3(p0); mem:ST12[undef](align=4) (in function: nonpow2_insertvalue_narrowing)
175175 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_insertvalue_narrowing
176176 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_insertvalue_narrowing:
177177 %struct96 = type { float, float, float }
212212 ret void
213213 }
214214
215 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3, %0; mem:ST12[%c](align=16) (in function: nonpow2_store_narrowing
215 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %3(s96), %0(p0); mem:ST12[%c](align=16) (in function: nonpow2_store_narrowing
216216 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing
217217 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing:
218218 define void @nonpow2_store_narrowing(i96* %c) {
222222 ret void
223223 }
224224
225 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0, %1; mem:ST12[undef](align=16) (in function: nonpow2_constant_narrowing)
225 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: G_STORE %0(s96), %1(p0); mem:ST12[undef](align=16) (in function: nonpow2_constant_narrowing)
226226 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_constant_narrowing
227227 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_constant_narrowing:
228228 define void @nonpow2_constant_narrowing() {
232232
233233 ; Currently can't handle vector lengths that aren't an exact multiple of
234234 ; natively supported vector lengths. Test that the fall-back works for those.
235 ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1(<7 x s64>) = G_ADD %0, %0; (in function: nonpow2_vector_add_fewerelements
236 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2(s64) = G_EXTRACT_VECTOR_ELT %1, %3; (in function: nonpow2_vector_add_fewerelements)
235 ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: :0:0: unable to legalize instruction: %1(<7 x s64>) = G_ADD %0, %0; (in function: nonpow2_vector_add_fewerelements
236 ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(s64) = G_EXTRACT_VECTOR_ELT %1(<7 x s64>), %3(s64); (in function: nonpow2_vector_add_fewerelements)
237237 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
238238 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
239239 define void @nonpow2_vector_add_fewerelements() {
88 ...
99 ---
1010 # CHECK: *** Bad machine code: Generic virtual register must have a bank in a RegBankSelected function ***
11 # CHECK: instruction: %0(s64) = COPY
12 # CHECK: operand 0: %0
11 # CHECK: instruction: %0:_(s64) = COPY
12 # CHECK: operand 0: %0
1313 name: test
1414 regBankSelected: true
1515 registers:
2121 %0 = COPY %x0
2222
2323 ; CHECK: *** Bad machine code: Unexpected generic instruction in a Selected function ***
24 ; CHECK: instruction: %1 = G_ADD
24 ; CHECK: instruction: %1:gpr64 = G_ADD
2525 %1 = G_ADD %0, %0
2626
2727 ; CHECK: *** Bad machine code: Generic virtual register invalid in a Selected function ***
28 ; CHECK: instruction: %2(s64) = COPY
29 ; CHECK: operand 0: %2
28 ; CHECK: instruction: %2:gpr(s64) = COPY
29 ; CHECK: operand 0: %2
3030 %2(s64) = COPY %x0
3131 ...
295295
296296 ; Check that we correctly deal with repeated operands.
297297 ; The following testcase creates:
298 ; %d1 = FADDDrr %d0, %d0
298 ; %d1 = FADDDrr killed %d0, %d0
299299 ; We'll get a crash if we naively look at the first operand, remove it
300300 ; from the substitution list then look at the second operand.
301301
0 ; RUN: llc < %s -mtriple=arm64-apple-ios -verify-machineinstrs | FileCheck %s
11
22 ; LdStOpt bug created illegal instruction:
3 ; %d1, %d2 = LDPSi %x0, 1
3 ; %d1, %d2 = LDPSi %x0, 1
44 ; rdar://11512047
55
66 %0 = type opaque
1010 ; CHECK: Before post-MI-sched:
1111 ; CHECK-LABEL: # Machine code for function test1:
1212 ; CHECK: SU(2): STRWui %wzr
13 ; CHECK: SU(3): %x21, %x20 = LDPXi %sp
13 ; CHECK: SU(3): %x21, %x20 = LDPXi %sp
1414 ; CHECK: Predecessors:
1515 ; CHECK-NEXT: SU(0): Out
1616 ; CHECK-NEXT: SU(0): Out
22 ; Check that the dead register definition pass is considering implicit defs.
33 ; When rematerializing through truncates, the coalescer may produce instructions
44 ; with dead defs, but live implicit-defs of subregs:
5 ; E.g. %x1 = MOVi64imm 2, %w1; %x1:GPR64, %w1:GPR32
5 ; E.g. dead %x1 = MOVi64imm 2, implicit-def %w1; %x1:GPR64, %w1:GPR32
66 ; These instructions are live, and their definitions should not be rewritten.
77 ;
88 ;
33
44 ; CHECK-SSA-LABEL: Machine code for function t1
55
6 ; CHECK-SSA: [[QUOTREG:%[0-9]+]] = SDIVWr
7 ; CHECK-SSA-NOT: [[QUOTREG]] =
8 ; CHECK-SSA: {{%[0-9]+}} = MSUBWrrr [[QUOTREG]]
6 ; CHECK-SSA: [[QUOTREG:%[0-9]+]]:gpr32 = SDIVWr
7 ; CHECK-SSA-NOT: [[QUOTREG]] =
8 ; CHECK-SSA: {{%[0-9]+}}:gpr32 = MSUBWrrr killed [[QUOTREG]]
99
1010 ; CHECK-SSA-LABEL: Machine code for function t2
1111
55 ; CHECK: ********** MI Scheduling **********
66 ; CHECK-LABEL: ldr_int:%bb.0
77 ; CHECK: Cluster ld/st SU(1) - SU(2)
8 ; CHECK: SU(1): %{{[0-9]+}} = LDRWui
9 ; CHECK: SU(2): %{{[0-9]+}} = LDRWui
8 ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui
9 ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui
1010 ; EXYNOS: ********** MI Scheduling **********
1111 ; EXYNOS-LABEL: ldr_int:%bb.0
1212 ; EXYNOS: Cluster ld/st SU(1) - SU(2)
13 ; EXYNOS: SU(1): %{{[0-9]+}} = LDRWui
14 ; EXYNOS: SU(2): %{{[0-9]+}} = LDRWui
13 ; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui
14 ; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui
1515 define i32 @ldr_int(i32* %a) nounwind {
1616 %p1 = getelementptr inbounds i32, i32* %a, i32 1
1717 %tmp1 = load i32, i32* %p1, align 2
2525 ; CHECK: ********** MI Scheduling **********
2626 ; CHECK-LABEL: ldp_sext_int:%bb.0
2727 ; CHECK: Cluster ld/st SU(1) - SU(2)
28 ; CHECK: SU(1): %{{[0-9]+}} = LDRSWui
29 ; CHECK: SU(2): %{{[0-9]+}} = LDRSWui
28 ; CHECK: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui
29 ; CHECK: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui
3030 ; EXYNOS: ********** MI Scheduling **********
3131 ; EXYNOS-LABEL: ldp_sext_int:%bb.0
3232 ; EXYNOS: Cluster ld/st SU(1) - SU(2)
33 ; EXYNOS: SU(1): %{{[0-9]+}} = LDRSWui
34 ; EXYNOS: SU(2): %{{[0-9]+}} = LDRSWui
33 ; EXYNOS: SU(1): %{{[0-9]+}}:gpr64 = LDRSWui
34 ; EXYNOS: SU(2): %{{[0-9]+}}:gpr64 = LDRSWui
3535 define i64 @ldp_sext_int(i32* %p) nounwind {
3636 %tmp = load i32, i32* %p, align 4
3737 %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
4646 ; CHECK: ********** MI Scheduling **********
4747 ; CHECK-LABEL: ldur_int:%bb.0
4848 ; CHECK: Cluster ld/st SU(2) - SU(1)
49 ; CHECK: SU(1): %{{[0-9]+}} = LDURWi
50 ; CHECK: SU(2): %{{[0-9]+}} = LDURWi
49 ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDURWi
50 ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDURWi
5151 ; EXYNOS: ********** MI Scheduling **********
5252 ; EXYNOS-LABEL: ldur_int:%bb.0
5353 ; EXYNOS: Cluster ld/st SU(2) - SU(1)
54 ; EXYNOS: SU(1): %{{[0-9]+}} = LDURWi
55 ; EXYNOS: SU(2): %{{[0-9]+}} = LDURWi
54 ; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDURWi
55 ; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDURWi
5656 define i32 @ldur_int(i32* %a) nounwind {
5757 %p1 = getelementptr inbounds i32, i32* %a, i32 -1
5858 %tmp1 = load i32, i32* %p1, align 2
6666 ; CHECK: ********** MI Scheduling **********
6767 ; CHECK-LABEL: ldp_half_sext_zext_int:%bb.0
6868 ; CHECK: Cluster ld/st SU(3) - SU(4)
69 ; CHECK: SU(3): %{{[0-9]+}} = LDRSWui
70 ; CHECK: SU(4): %{{[0-9]+}}:sub_32 = LDRWui
69 ; CHECK: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui
70 ; CHECK: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
7171 ; EXYNOS: ********** MI Scheduling **********
7272 ; EXYNOS-LABEL: ldp_half_sext_zext_int:%bb.0
7373 ; EXYNOS: Cluster ld/st SU(3) - SU(4)
74 ; EXYNOS: SU(3): %{{[0-9]+}} = LDRSWui
75 ; EXYNOS: SU(4): %{{[0-9]+}}:sub_32 = LDRWui
74 ; EXYNOS: SU(3): %{{[0-9]+}}:gpr64 = LDRSWui
75 ; EXYNOS: SU(4): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
7676 define i64 @ldp_half_sext_zext_int(i64* %q, i32* %p) nounwind {
7777 %tmp0 = load i64, i64* %q, align 4
7878 %tmp = load i32, i32* %p, align 4
8989 ; CHECK: ********** MI Scheduling **********
9090 ; CHECK-LABEL: ldp_half_zext_sext_int:%bb.0
9191 ; CHECK: Cluster ld/st SU(3) - SU(4)
92 ; CHECK: SU(3): %{{[0-9]+}}:sub_32 = LDRWui
93 ; CHECK: SU(4): %{{[0-9]+}} = LDRSWui
92 ; CHECK: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
93 ; CHECK: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui
9494 ; EXYNOS: ********** MI Scheduling **********
9595 ; EXYNOS-LABEL: ldp_half_zext_sext_int:%bb.0
9696 ; EXYNOS: Cluster ld/st SU(3) - SU(4)
97 ; EXYNOS: SU(3): %{{[0-9]+}}:sub_32 = LDRWui
98 ; EXYNOS: SU(4): %{{[0-9]+}} = LDRSWui
97 ; EXYNOS: SU(3): undef %{{[0-9]+}}.sub_32:gpr64 = LDRWui
98 ; EXYNOS: SU(4): %{{[0-9]+}}:gpr64 = LDRSWui
9999 define i64 @ldp_half_zext_sext_int(i64* %q, i32* %p) nounwind {
100100 %tmp0 = load i64, i64* %q, align 4
101101 %tmp = load i32, i32* %p, align 4
112112 ; CHECK: ********** MI Scheduling **********
113113 ; CHECK-LABEL: ldr_int_volatile:%bb.0
114114 ; CHECK-NOT: Cluster ld/st
115 ; CHECK: SU(1): %{{[0-9]+}} = LDRWui
116 ; CHECK: SU(2): %{{[0-9]+}} = LDRWui
115 ; CHECK: SU(1): %{{[0-9]+}}:gpr32 = LDRWui
116 ; CHECK: SU(2): %{{[0-9]+}}:gpr32 = LDRWui
117117 ; EXYNOS: ********** MI Scheduling **********
118118 ; EXYNOS-LABEL: ldr_int_volatile:%bb.0
119119 ; EXYNOS-NOT: Cluster ld/st
120 ; EXYNOS: SU(1): %{{[0-9]+}} = LDRWui
121 ; EXYNOS: SU(2): %{{[0-9]+}} = LDRWui
120 ; EXYNOS: SU(1): %{{[0-9]+}}:gpr32 = LDRWui
121 ; EXYNOS: SU(2): %{{[0-9]+}}:gpr32 = LDRWui
122122 define i32 @ldr_int_volatile(i32* %a) nounwind {
123123 %p1 = getelementptr inbounds i32, i32* %a, i32 1
124124 %tmp1 = load volatile i32, i32* %p1, align 2
132132 ; CHECK: ********** MI Scheduling **********
133133 ; CHECK-LABEL: ldq_cluster:%bb.0
134134 ; CHECK: Cluster ld/st SU(1) - SU(3)
135 ; CHECK: SU(1): %{{[0-9]+}} = LDRQui
136 ; CHECK: SU(3): %{{[0-9]+}} = LDRQui
135 ; CHECK: SU(1): %{{[0-9]+}}:fpr128 = LDRQui
136 ; CHECK: SU(3): %{{[0-9]+}}:fpr128 = LDRQui
137137 ; EXYNOS: ********** MI Scheduling **********
138138 ; EXYNOS-LABEL: ldq_cluster:%bb.0
139139 ; EXYNOS-NOT: Cluster ld/st
55 ;
66 ; CHECK: ********** MI Scheduling **********
77 ; CHECK: shiftable
8 ; CHECK: SU(2): %2 = SUBXri %1, 20, 0
8 ; CHECK: SU(2): %2:gpr64common = SUBXri %1, 20, 0
99 ; CHECK: Successors:
1010 ; CHECK-NEXT: SU(4): Data Latency=1 Reg=%2
1111 ; CHECK-NEXT: SU(3): Data Latency=2 Reg=%2
44 ;
55 ; CHECK: ********** MI Scheduling **********
66 ; CHECK: misched_bug:%bb.0 entry
7 ; CHECK: SU(2): %2 = LDRWui %0, 1; mem:LD4[%ptr1_plus1] GPR32:%2 GPR64common:%0
7 ; CHECK: SU(2): %2:gpr32 = LDRWui %0, 1; mem:LD4[%ptr1_plus1] GPR32:%2 GPR64common:%0
88 ; CHECK: Successors:
99 ; CHECK-NEXT: SU(5): Data Latency=4 Reg=%2
1010 ; CHECK-NEXT: SU(4): Ord Latency=0
1212 ; CHECK: Successors:
1313 ; CHECK: SU(4): Ord Latency=0
1414 ; CHECK: SU(4): STRWui %wzr, %1, 0; mem:ST4[%ptr2] GPR64common:%1
15 ; CHECK: SU(5): %w0 = COPY %2; GPR32:%2
15 ; CHECK: SU(5): %w0 = COPY %2; GPR32:%2
1616 ; CHECK: ** ScheduleDAGMI::schedule picking next node
1717 define i32 @misched_bug(i32* %ptr1, i32* %ptr2) {
1818 entry:
77 ; Check that no scheduling dependencies are created between the paired loads and the store during post-RA MI scheduling.
88 ;
99 ; CHECK-LABEL: # Machine code for function foo:
10 ; CHECK: SU(2): %w{{[0-9]+}}, %w{{[0-9]+}} = LDPWi
10 ; CHECK: SU(2): %w{{[0-9]+}}, %w{{[0-9]+}} = LDPWi
1111 ; CHECK: Successors:
1212 ; CHECK-NOT: ch SU(4)
1313 ; CHECK: SU(3)
2121 body: |
2222 bb.0:
2323 ; CHECK: Adding MCLOH_AdrpAdrp:
24 ; CHECK-NEXT: %x1 = ADRP
25 ; CHECK-NEXT: %x1 = ADRP >
24 ; CHECK-NEXT: %x1 = ADRP >
25 ; CHECK-NEXT: %x1 = ADRP
2626 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
27 ; CHECK-NEXT: %x1 = ADRP
28 ; CHECK-NEXT: %x1 = ADRP >
27 ; CHECK-NEXT: %x1 = ADRP >
28 ; CHECK-NEXT: %x1 = ADRP
2929 ; CHECK-NEXT: Adding MCLOH_AdrpAdrp:
30 ; CHECK-NEXT: %x0 = ADRP
31 ; CHECK-NEXT: %x0 = ADRP >
30 ; CHECK-NEXT: %x0 = ADRP >
31 ; CHECK-NEXT: %x0 = ADRP
3232 %x0 = ADRP target-flags(aarch64-page) @g0
3333 %x0 = ADRP target-flags(aarch64-page) @g1
3434 %x1 = ADRP target-flags(aarch64-page) @g2
3737
3838 bb.1:
3939 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
40 ; CHECK-NEXT: %x20 = ADRP
41 ; CHECK-NEXT: %x3 = ADDXri %x20,
40 ; CHECK-NEXT: %x20 = ADRP
41 ; CHECK-NEXT: %x3 = ADDXri %x20,
4242 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
43 ; CHECK-NEXT: %x1 = ADRP
44 ; CHECK-NEXT: %x1 = ADDXri %x1,
43 ; CHECK-NEXT: %x1 = ADRP
44 ; CHECK-NEXT: %x1 = ADDXri %x1,
4545 %x1 = ADRP target-flags(aarch64-page) @g0
4646 %x9 = SUBXri undef %x11, 5, 0 ; should not affect MCLOH formation
4747 %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0
7272
7373 bb.5:
7474 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
75 ; CHECK-NEXT: %x5 = ADRP
76 ; CHECK-NEXT: %s6 = LDRSui %x5,
75 ; CHECK-NEXT: %x5 = ADRP
76 ; CHECK-NEXT: %s6 = LDRSui %x5,
7777 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
78 ; CHECK-NEXT: %x4 = ADRP
79 ; CHECK-NEXT: %x4 = LDRXui %x4,
78 ; CHECK-NEXT: %x4 = ADRP
79 ; CHECK-NEXT: %x4 = LDRXui %x4,
8080 %x4 = ADRP target-flags(aarch64-page) @g2
8181 %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2
8282 %x5 = ADRP target-flags(aarch64-page) @g2
8484
8585 bb.6:
8686 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
87 ; CHECK-NEXT: %x5 = ADRP
88 ; CHECK-NEXT: %x6 = LDRXui %x5,
87 ; CHECK-NEXT: %x5 = ADRP
88 ; CHECK-NEXT: %x6 = LDRXui %x5,
8989 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot:
90 ; CHECK-NEXT: %x4 = ADRP
91 ; CHECK-NEXT: %x4 = LDRXui %x4,
90 ; CHECK-NEXT: %x4 = ADRP
91 ; CHECK-NEXT: %x4 = LDRXui %x4,
9292 %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2
9393 %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2
9494 %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2
103103
104104 bb.8:
105105 ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr:
106 ; CHECK-NEXT: %x7 = ADRP [TF=1]
107 ; CHECK-NEXT: %x8 = ADDXri %x7,
108 ; CHECK-NEXT: %d1 = LDRDui %x8, 8
106 ; CHECK-NEXT: %x7 = ADRP [TF=1]
107 ; CHECK-NEXT: %x8 = ADDXri %x7,
108 ; CHECK-NEXT: %d1 = LDRDui %x8, 8
109109 %x7 = ADRP target-flags(aarch64-page) @g3
110110 %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0
111111 %d1 = LDRDui %x8, 8
112112
113113 bb.9:
114114 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
115 ; CHECK-NEXT: %x3 = ADRP
116 ; CHECK-NEXT: %x3 = ADDXri %x3,
115 ; CHECK-NEXT: %x3 = ADRP
116 ; CHECK-NEXT: %x3 = ADDXri %x3,
117117 ; CHECK-NEXT: Adding MCLOH_AdrpAdd:
118 ; CHECK-NEXT: %x5 = ADRP
119 ; CHECK-NEXT: %x2 = ADDXri %x5,
118 ; CHECK-NEXT: %x5 = ADRP
119 ; CHECK-NEXT: %x2 = ADDXri %x5,
120120 ; CHECK-NEXT: Adding MCLOH_AdrpAddStr:
121 ; CHECK-NEXT: %x1 = ADRP
122 ; CHECK-NEXT: %x1 = ADDXri %x1,
121 ; CHECK-NEXT: %x1 = ADRP
122 ; CHECK-NEXT: %x1 = ADDXri %x1,
123123 ; CHECK-NEXT: STRXui %xzr, %x1, 16
124124 %x1 = ADRP target-flags(aarch64-page) @g3
125125 %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0
137137
138138 bb.10:
139139 ; CHECK-NEXT: Adding MCLOH_AdrpLdr:
140 ; CHECK-NEXT: %x2 = ADRP
141 ; CHECK-NEXT: %x2 = LDRXui %x2,
140 ; CHECK-NEXT: %x2 = ADRP
141 ; CHECK-NEXT: %x2 = LDRXui %x2,
142142 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr:
143 ; CHECK-NEXT: %x1 = ADRP
144 ; CHECK-NEXT: %x1 = LDRXui %x1,
145 ; CHECK-NEXT: %x1 = LDRXui %x1, 24
143 ; CHECK-NEXT: %x1 = ADRP
144 ; CHECK-NEXT: %x1 = LDRXui %x1,
145 ; CHECK-NEXT: %x1 = LDRXui %x1, 24
146146 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
147147 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
148148 %x1 = LDRXui %x1, 24
153153
154154 bb.11:
155155 ; CHECK-NEXT: Adding MCLOH_AdrpLdr
156 ; CHECK-NEXT: %x5 = ADRP
157 ; CHECK-NEXT: %x5 = LDRXui %x5,
156 ; CHECK-NEXT: %x5 = ADRP
157 ; CHECK-NEXT: %x5 = LDRXui %x5,
158158 ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr:
159 ; CHECK-NEXT: %x1 = ADRP
160 ; CHECK-NEXT: %x1 = LDRXui %x1,
159 ; CHECK-NEXT: %x1 = ADRP
160 ; CHECK-NEXT: %x1 = LDRXui %x1,
161161 ; CHECK-NEXT: STRXui %xzr, %x1, 32
162162 %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4
163163 %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4
170170 bb.12:
171171 ; CHECK-NOT: MCLOH_AdrpAdrp
172172 ; CHECK: Adding MCLOH_AdrpAddLdr
173 ; %x9 = ADRP
174 ; %x9 = ADDXri %x9,
175 ; %x5 = LDRXui %x9, 0
173 ; %x9 = ADRP
174 ; %x9 = ADDXri %x9,
175 ; %x5 = LDRXui %x9, 0
176176 %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g4
177177 %x9 = ADDXri %x9, target-flags(aarch64-pageoff, aarch64-got) @g4, 0
178178 %x5 = LDRXui %x9, 0
11
22 ; This file check a bug in MachineCopyPropagation pass. The last COPY will be
33 ; incorrectly removed if the machine instructions are as follows:
4 ; %q5_q6 = COPY %q2_q3
5 ; %d5 =
6 ; %d3 =
7 ; %d3 = COPY %d6
4 ; %q5_q6 = COPY %q2_q3
5 ; %d5 =
6 ; %d3 =
7 ; %d3 = COPY %d6
88 ; This is caused by a bug in function SourceNoLongerAvailable(), which fails to
9 ; remove the relationship of D6 and "%q5_q6 = COPY %q2_q3".
9 ; remove the relationship of D6 and "%q5_q6 = COPY %q2_q3".
1010
1111 @failed = internal unnamed_addr global i1 false
1212
1313 ; CHECK-LABEL: foo:
1414 ; CHECK: ld2
15 ; CHECK-NOT: // kill: D{{[0-9]+}} D{{[0-9]+}}
15 ; CHECK-NOT: // kill: def D{{[0-9]+}} killed D{{[0-9]+}}
1616 define void @foo(<2 x i32> %shuffle251, <8 x i8> %vtbl1.i, i8* %t2, <2 x i32> %vrsubhn_v2.i1364) {
1717 entry:
1818 %val0 = alloca [2 x i64], align 8
66 # Check that the instructions are not dependent on each other, even though
77 # they all read/write to the zero register.
88 # CHECK-LABEL: MI Scheduling
9 # CHECK: SU(0): %wzr = SUBSWri %w1, 0, 0, %nzcv
9 # CHECK: SU(0): dead %wzr = SUBSWri %w1, 0, 0, implicit-def dead %nzcv
1010 # CHECK: # succs left : 0
1111 # CHECK-NOT: Successors:
12 # CHECK: SU(1): %w2 = COPY %wzr
12 # CHECK: SU(1): %w2 = COPY %wzr
1313 # CHECK: # succs left : 0
1414 # CHECK-NOT: Successors:
15 # CHECK: SU(2): %wzr = SUBSWri %w3, 0, 0, %nzcv
15 # CHECK: SU(2): dead %wzr = SUBSWri %w3, 0, 0, implicit-def dead %nzcv
1616 # CHECK: # succs left : 0
1717 # CHECK-NOT: Successors:
18 # CHECK: SU(3): %w4 = COPY %wzr
18 # CHECK: SU(3): %w4 = COPY %wzr
1919 # CHECK: # succs left : 0
2020 # CHECK-NOT: Successors:
2121 name: func
2525 ; CHECK: fi#-2: {{.*}} fixed, at location [SP+8]
2626 ; CHECK: fi#-1: {{.*}} fixed, at location [SP]
2727
28 ; CHECK: [[VRA:%.*]] = LDRXui
29 ; CHECK: [[VRB:%.*]] = LDRXui >
28 ; CHECK: [[VRA:%.*]]:gpr64 = LDRXui >
29 ; CHECK: [[VRB:%.*]]:gpr64 = LDRXui
3030 ; CHECK: STRXui %{{.*}},
3131 ; CHECK: STRXui [[VRB]],
3232
3333 ; Make sure that there is an dependence edge between fi#-2 and fi#-4.
3434 ; Without this edge the scheduler would be free to move the store accross the load.
3535
36 ; CHECK: SU({{.*}}): [[VRB]] = LDRXui
36 ; CHECK: SU({{.*}}): [[VRB]]:gpr64 = LDRXui
3737 ; CHECK-NOT: SU
3838 ; CHECK: Successors:
3939 ; CHECK: SU([[DEPSTOREB:.*]]): Ord Latency=0
44 ; NOOPT: s_load_dwordx2 s[4:5]
55
66 ; FIXME: Why is the SGPR4_SGPR5 reference being removed from DBG_VALUE?
7 ; NOOPT: ; kill: %sgpr8_sgpr9 %sgpr4_sgpr5
7 ; NOOPT: ; kill: def %sgpr8_sgpr9 killed %sgpr4_sgpr5
88 ; NOOPT-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef
99
1010 ; GCN: flat_store_dword
33 # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use
44
55 # CHECK: ScheduleDAGMILive::schedule starting
6 # CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %m0, %exec
6 # CHECK: SU({{.*}} = DS_READ_B32 {{.*}} implicit %m0, implicit %exec
77 # CHECK: Pressure Diff : {{$}}
88 # CHECK: SU({{.*}} DS_WRITE_B32
99
33
44 ; This tests the fast register allocator's handling of partial redefines:
55 ;
6 ; %reg1028:dsub_0, %reg1028:dsub_1 = VLD1q64 %reg1025...
7 ; %reg1030:dsub_1 = COPY %reg1028:dsub_0
6 ; %reg1028:dsub_0, %reg1028:dsub_1 = VLD1q64 %reg1025...
7 ; %reg1030:dsub_1 = COPY killed %reg1028:dsub_0
88 ;
99 ; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial
1010 ; redef, it cannot also get %Q0.
88 ;
99 ; The early-clobber instruction is an str:
1010 ;
11 ; %12 = t2STR_PRE %6, %12, 32, pred:14, pred:%noreg
11 ; early-clobber %12 = t2STR_PRE %6, %12, 32, pred:14, pred:%noreg
1212 ;
1313 ; This tests that shrinkToUses handles the EC redef correctly.
1414
33 define void @vst(i8* %m, [4 x i64] %v) {
44 entry:
55 ; CHECK: vst:
6 ; CHECK: VST1d64Q %r{{[0-9]+}}, 8, %d{{[0-9]+}}, pred:14, pred:%noreg, %q{{[0-9]+}}_q{{[0-9]+}}
6 ; CHECK: VST1d64Q killed %r{{[0-9]+}}, 8, %d{{[0-9]+}}, pred:14, pred:%noreg, implicit killed %q{{[0-9]+}}_q{{[0-9]+}}
77
88 %v0 = extractvalue [4 x i64] %v, 0
99 %v1 = extractvalue [4 x i64] %v, 1
3636 %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
3737 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
3838 ; CHECK: vtbx4:
39 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %q{{[0-9]+}}_q{{[0-9]+}}
39 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, implicit %q{{[0-9]+}}_q{{[0-9]+}}
4040 %tmp1 = load <8 x i8>, <8 x i8>* %A
4141 %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
4242 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
88 ret void
99 }
1010
11 ; CHECK: tBL pred:14, pred:%noreg, , %lr, %sp, %r4, %r4, %r12, %cpsr
11 ; CHECK: tBL pred:14, pred:%noreg, , implicit-def %lr, implicit %sp, implicit killed %r4, implicit-def %r4, implicit-def dead %r12, implicit-def dead %cpsr
1212
6060
6161 ; CHECK: insert_elem
6262 ; This test has a sub-register copy with a kill flag:
63 ; %6:ssub_3 = COPY %6:ssub_2; QPR_VFP2:%6
63 ; %6:ssub_3 = COPY killed %6:ssub_2; QPR_VFP2:%6
6464 ; The rewriter must do something sensible with that, or the scavenger crashes.
6565 define void @insert_elem() nounwind {
6666 entry:
77 %struct.gs_color_s = type { i16, i16, i16, i16, i8, i8 }
88
99 ; In this case, the if converter was cloning the return instruction so that we had
10 ; r2 = ...
10 ; r2 = ...
1111 ; return [pred] r2
1212 ; ldr
1313 ; return
3232 ; This case was a crasher in constrainLocalCopy.
3333 ; The problem was the t2LDR_PRE defining both the global and local lrg.
3434 ; CHECK-LABEL: *** Final schedule for %bb.5 ***
35 ; CHECK: %[[R4:[0-9]+]], %[[R1:[0-9]+]] = t2LDR_PRE %[[R1]]
36 ; CHECK: %{{[0-9]+}} = COPY %[[R1]]
37 ; CHECK: %{{[0-9]+}} = COPY %[[R4]]
35 ; CHECK: %[[R4:[0-9]+]]:gpr, %[[R1:[0-9]+]]:gpr = t2LDR_PRE %[[R1]]
36 ; CHECK: %{{[0-9]+}}:gpr = COPY %[[R1]]
37 ; CHECK: %{{[0-9]+}}:gpr = COPY %[[R4]]
3838 ; CHECK-LABEL: MACHINEINSTRS
3939 %struct.rtx_def = type { [4 x i8], [1 x %union.rtunion_def] }
4040 %union.rtunion_def = type { i64 }
3636 }
3737 #
3838 # CHECK: ********** MI Scheduling **********
39 # CHECK: SU(2): %2 = t2MOVi32imm ; rGPR:%2
39 # CHECK: SU(2): %2:rgpr = t2MOVi32imm ; rGPR:%2
4040 # CHECK_A9: Latency : 2
4141 # CHECK_SWIFT: Latency : 2
4242 # CHECK_R52: Latency : 2
4343 #
44 # CHECK: SU(3): %3 = t2LDRi12 %2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%3,%2
44 # CHECK: SU(3): %3:rgpr = t2LDRi12 %2, 0, pred:14, pred:%noreg; mem:LD4[@g1](dereferenceable) rGPR:%3,%2
4545 # CHECK_A9: Latency : 1
4646 # CHECK_SWIFT: Latency : 3
4747 # CHECK_R52: Latency : 4
4848<