llvm.org GIT mirror llvm / ab6a729
[AsmPrinter] Remove hidden flag -print-schedule. This patch removes hidden codegen flag -print-schedule effectively reverting the logic originally committed as r300311 (https://llvm.org/viewvc/llvm-project?view=revision&revision=300311). Flag -print-schedule was originally introduced by r300311 to address PR32216 (https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better testing of schedule model instruction latencies/throughputs". These days, we can use llvm-mca to test scheduling models. So there is no longer a need for flag -print-schedule in LLVM. The main use case for PR32216 is now addressed by llvm-mca. Flag -print-schedule is mainly used for debugging purposes, and it is only actually used by x86 specific tests. We already have extensive (latency and throughput) tests under "test/tools/llvm-mca" for X86 processor models. That means, most (if not all) existing -print-schedule tests for X86 are redundant. When flag -print-schedule was first added to LLVM, several files had to be modified; a few APIs gained new arguments (see for example method MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained a couple of getSchedInfoStr() methods. Method getSchedInfoStr() had to originally work for both MCInst and MachineInstr. The original implmentation of getSchedInfoStr() introduced a subtle layering violation (reported as PR37160 and then fixed/worked-around by r330615). In retrospect, that new API could have been designed more optimally. We can always query MCSchedModel to get the latency and throughput. More importantly, the "sched-info" string should not have been generated by the subtarget. Note, r317782 fixed an issue where "print-schedule" didn't work very well in the presence of inline assembly. That commit is also reverted by this change. Differential Revision: https://reviews.llvm.org/D57244 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353043 91177308-0d34-0410-b5e6-96231b3b80d8 Andrea Di Biagio 8 months ago
79 changed file(s) with 2488 addition(s) and 129455 deletion(s). Raw diff Collapse all Expand all
120120 using GOTEquivUsePair = std::pair;
121121 MapVector GlobalGOTEquivs;
122122
123 /// Enable print [latency:throughput] in output.
124 bool EnablePrintSchedInfo = false;
125
126123 private:
127124 MCSymbol *CurrentFnBegin = nullptr;
128125 MCSymbol *CurrentFnEnd = nullptr;
188188 /// TargetLowering preference). It does not yet disable the postRA scheduler.
189189 virtual bool enableMachineScheduler() const;
190190
191 /// Support printing of [latency:throughput] comment in output .S file.
192 virtual bool supportPrintSchedInfo() const { return false; }
193
194191 /// True if the machine scheduler should disable the TLI preference
195192 /// for preRA scheduling with the source level scheduler.
196193 virtual bool enableMachineSchedDefaultSched() const { return true; }
284281 /// possible.
285282 virtual bool enableSubRegLiveness() const { return false; }
286283
287 /// Returns string representation of scheduler comment
288 std::string getSchedInfoStr(const MachineInstr &MI) const;
289 std::string getSchedInfoStr(MCInst const &MCI) const override;
290
291284 /// This is called after a .mir file was loaded.
292285 virtual void mirFileLoaded(MachineFunction &MF) const;
293286 };
114114 void EmitSLEB128Value(const MCExpr *Value) override;
115115 void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
116116 void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
117 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
118 bool = false) override;
117 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
119118
120119 /// Emit an instruction to a special fragment, because this instruction
121120 /// can change its size during relaxation.
128128 /// Flag tracking whether any errors have been encountered.
129129 bool HadError = false;
130130
131 /// Enable print [latency:throughput] in output file.
132 bool EnablePrintSchedInfo = false;
133
134131 bool ShowParsedOperands = false;
135132
136133 public:
163160
164161 bool getShowParsedOperands() const { return ShowParsedOperands; }
165162 void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
166
167 void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
168 bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
169163
170164 /// Run the parser on the input source buffer.
171165 virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
951951 virtual void EmitAddrsigSym(const MCSymbol *Sym) {}
952952
953953 /// Emit the given \p Instruction into the current section.
954 /// PrintSchedInfo == true then schedul comment should be added to output
955 virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
956 bool PrintSchedInfo = false);
954 virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
957955
958956 /// Set the bundle alignment mode from now on in the section.
959957 /// The argument is the power of 2 to which the alignment is set. The
179179 auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
180180 return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
181181 }
182
183 /// Returns string representation of scheduler comment
184 virtual std::string getSchedInfoStr(MCInst const &MCI) const {
185 return {};
186 }
187182 };
188183
189184 } // end namespace llvm
5858 #include "llvm/CodeGen/TargetLowering.h"
5959 #include "llvm/CodeGen/TargetOpcodes.h"
6060 #include "llvm/CodeGen/TargetRegisterInfo.h"
61 #include "llvm/CodeGen/TargetSubtargetInfo.h"
6261 #include "llvm/IR/BasicBlock.h"
6362 #include "llvm/IR/Comdat.h"
6463 #include "llvm/IR/Constant.h"
141140
142141 STATISTIC(EmittedInsts, "Number of machine instrs printed");
143142
144 static cl::opt
145 PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
146 cl::desc("Print 'sched: [latency:throughput]' in .s output"));
147
148143 char AsmPrinter::ID = 0;
149144
150145 using gcp_map_type = DenseMap>;
745740 }
746741
747742 /// emitComments - Pretty-print comments for instructions.
748 /// It returns true iff the sched comment was emitted.
749 /// Otherwise it returns false.
750 static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
751 AsmPrinter *AP) {
743 static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
752744 const MachineFunction *MF = MI.getMF();
753745 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
754746
756748 int FI;
757749
758750 const MachineFrameInfo &MFI = MF->getFrameInfo();
759 bool Commented = false;
760751
761752 auto getSize =
762753 [&MFI](const SmallVectorImpl &Accesses) {
776767 if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
777768 if (MFI.isSpillSlotObjectIndex(FI)) {
778769 MMO = *MI.memoperands_begin();
779 CommentOS << MMO->getSize() << "-byte Reload";
780 Commented = true;
770 CommentOS << MMO->getSize() << "-byte Reload\n";
781771 }
782772 } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
783 if (auto Size = getSize(Accesses)) {
784 CommentOS << Size << "-byte Folded Reload";
785 Commented = true;
786 }
773 if (auto Size = getSize(Accesses))
774 CommentOS << Size << "-byte Folded Reload\n";
787775 } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
788776 if (MFI.isSpillSlotObjectIndex(FI)) {
789777 MMO = *MI.memoperands_begin();
790 CommentOS << MMO->getSize() << "-byte Spill";
791 Commented = true;
778 CommentOS << MMO->getSize() << "-byte Spill\n";
792779 }
793780 } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
794 if (auto Size = getSize(Accesses)) {
795 CommentOS << Size << "-byte Folded Spill";
796 Commented = true;
797 }
781 if (auto Size = getSize(Accesses))
782 CommentOS << Size << "-byte Folded Spill\n";
798783 }
799784
800785 // Check for spill-induced copies
801 if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
802 Commented = true;
803 CommentOS << " Reload Reuse";
804 }
805
806 if (Commented) {
807 if (AP->EnablePrintSchedInfo) {
808 // If any comment was added above and we need sched info comment then add
809 // this new comment just after the above comment w/o "\n" between them.
810 CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
811 return true;
812 }
813 CommentOS << "\n";
814 }
815 return false;
786 if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
787 CommentOS << " Reload Reuse\n";
816788 }
817789
818790 /// emitImplicitDef - This method emits the specified machine instruction
11001072 }
11011073 }
11021074
1103 if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
1104 MachineInstr *MIP = const_cast(&MI);
1105 MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
1106 }
1075 if (isVerbose())
1076 emitComments(MI, OutStreamer->GetCommentOS());
11071077
11081078 switch (MI.getOpcode()) {
11091079 case TargetOpcode::CFI_INSTRUCTION:
16351605 }
16361606
16371607 ORE = &getAnalysis().getORE();
1638
1639 const TargetSubtargetInfo &STI = MF.getSubtarget();
1640 EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
1641 ? PrintSchedule
1642 : STI.supportPrintSchedInfo();
16431608 }
16441609
16451610 namespace {
1717 #include "llvm/CodeGen/MachineModuleInfo.h"
1818 #include "llvm/CodeGen/TargetInstrInfo.h"
1919 #include "llvm/CodeGen/TargetRegisterInfo.h"
20 #include "llvm/CodeGen/TargetSubtargetInfo.h"
2120 #include "llvm/IR/Constants.h"
2221 #include "llvm/IR/DataLayout.h"
2322 #include "llvm/IR/InlineAsm.h"
153152 " we don't have an asm parser for this target\n");
154153 Parser->setAssemblerDialect(Dialect);
155154 Parser->setTargetParser(*TAP.get());
156 Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
157155 // Enable lexing Masm binary and hex integer literals in intel inline
158156 // assembly.
159157 if (Dialect == InlineAsm::AD_Intel)
557557 continue;
558558
559559 LLVM_DEBUG(if (dump_intrs) {
560 dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
561 for (auto const *InstrPtr : DelInstrs) {
562 dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
560 dbgs() << "\tFor the Pattern (" << (int)P
561 << ") these instructions could be removed\n";
562 for (auto const *InstrPtr : DelInstrs)
563563 InstrPtr->print(dbgs(), false, false, false, TII);
564 }
565564 dbgs() << "\tThese instructions could replace the removed ones\n";
566 for (auto const *InstrPtr : InsInstrs) {
567 dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
565 for (auto const *InstrPtr : InsInstrs)
568566 InstrPtr->print(dbgs(), false, false, false, TII);
569 }
570567 });
571568
572569 bool SubstituteAlways = false;
1010 //===----------------------------------------------------------------------===//
1111
1212 #include "llvm/CodeGen/TargetSubtargetInfo.h"
13 #include "llvm/ADT/Optional.h"
14 #include "llvm/CodeGen/MachineInstr.h"
15 #include "llvm/CodeGen/TargetInstrInfo.h"
16 #include "llvm/CodeGen/TargetSchedule.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/Format.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include
2113
2214 using namespace llvm;
2315
6557 return false;
6658 }
6759
68 static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
69 static const char *SchedPrefix = " sched: [";
70 std::string Comment;
71 raw_string_ostream CS(Comment);
72 if (RThroughput != 0.0)
73 CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
74 << "]";
75 else
76 CS << SchedPrefix << Latency << ":?]";
77 CS.flush();
78 return Comment;
79 }
80
81 /// Returns string representation of scheduler comment
82 std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
83 if (MI.isPseudo() || MI.isTerminator())
84 return std::string();
85 // We don't cache TSchedModel because it depends on TargetInstrInfo
86 // that could be changed during the compilation
87 TargetSchedModel TSchedModel;
88 TSchedModel.init(this);
89 unsigned Latency = TSchedModel.computeInstrLatency(&MI);
90
91 // Add extra latency due to forwarding delays.
92 const MCSchedClassDesc &SCDesc = *TSchedModel.resolveSchedClass(&MI);
93 Latency +=
94 MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
95
96 double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
97 return createSchedInfoStr(Latency, RThroughput);
98 }
99
100 /// Returns string representation of scheduler comment
101 std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
102 // We don't cache TSchedModel because it depends on TargetInstrInfo
103 // that could be changed during the compilation
104 TargetSchedModel TSchedModel;
105 TSchedModel.init(this);
106 unsigned Latency;
107 if (TSchedModel.hasInstrSchedModel()) {
108 Latency = TSchedModel.computeInstrLatency(MCI);
109 // Add extra latency due to forwarding delays.
110 const MCSchedModel &SM = *TSchedModel.getMCSchedModel();
111 unsigned SClassID = getInstrInfo()->get(MCI.getOpcode()).getSchedClass();
112 while (SM.getSchedClassDesc(SClassID)->isVariant())
113 SClassID = resolveVariantSchedClass(SClassID, &MCI, SM.ProcID);
114 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SClassID);
115 Latency +=
116 MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
117 } else if (TSchedModel.hasInstrItineraries()) {
118 auto *ItinData = TSchedModel.getInstrItineraries();
119 Latency = ItinData->getStageLatency(
120 getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
121 } else
122 return std::string();
123 double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
124 return createSchedInfoStr(Latency, RThroughput);
125 }
126
127 void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
128 }
60 void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
106106 void AddComment(const Twine &T, bool EOL = true) override;
107107
108108 /// Add a comment showing the encoding of an instruction.
109 /// If PrintSchedInfo is true, then the comment sched:[x:y] will be added to
110 /// the output if supported by the target.
111 void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
112 bool PrintSchedInfo);
109 void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
113110
114111 /// Return a raw_ostream that comments can be written to.
115112 /// Unlike AddComment, you are required to terminate comments with \n if you
310307 void emitCGProfileEntry(const MCSymbolRefExpr *From,
311308 const MCSymbolRefExpr *To, uint64_t Count) override;
312309
313 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
314 bool PrintSchedInfo) override;
310 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
315311
316312 void EmitBundleAlignMode(unsigned AlignPow2) override;
317313 void EmitBundleLock(bool AlignToEnd) override;
17381734 }
17391735
17401736 void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
1741 const MCSubtargetInfo &STI,
1742 bool PrintSchedInfo) {
1737 const MCSubtargetInfo &STI) {
17431738 raw_ostream &OS = GetCommentOS();
17441739 SmallString<256> Code;
17451740 SmallVector Fixups;
18181813 }
18191814 }
18201815 }
1821 OS << "]";
1822 // If we are not going to add fixup or schedule comments after this point
1823 // then we have to end the current comment line with "\n".
1824 if (Fixups.size() || !PrintSchedInfo)
1825 OS << "\n";
1816 OS << "]\n";
18261817
18271818 for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
18281819 MCFixup &F = Fixups[i];
18341825 }
18351826
18361827 void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
1837 const MCSubtargetInfo &STI,
1838 bool PrintSchedInfo) {
1828 const MCSubtargetInfo &STI) {
18391829 assert(getCurrentSectionOnly() &&
18401830 "Cannot emit contents before setting section!");
18411831
18421832 // Show the encoding in a comment if we have a code emitter.
1843 AddEncodingComment(Inst, STI, PrintSchedInfo);
1833 AddEncodingComment(Inst, STI);
18441834
18451835 // Show the MCInst if enabled.
18461836 if (ShowInst) {
1847 if (PrintSchedInfo)
1848 GetCommentOS() << "\n";
18491837 Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
18501838 GetCommentOS() << "\n";
18511839 }
18541842 getTargetStreamer()->prettyPrintAsm(*InstPrinter, OS, Inst, STI);
18551843 else
18561844 InstPrinter->printInst(&Inst, OS, "", STI);
1857
1858 if (PrintSchedInfo) {
1859 std::string SI = STI.getSchedInfoStr(Inst);
1860 if (!SI.empty())
1861 GetCommentOS() << SI;
1862 }
18631845
18641846 StringRef Comments = CommentToEmit;
18651847 if (Comments.size() && Comments.back() != '\n')
313313 }
314314
315315 void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
316 const MCSubtargetInfo &STI, bool) {
316 const MCSubtargetInfo &STI) {
317317 getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst);
318318 EmitInstructionImpl(Inst, STI);
319319 getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst);
951951 }
952952 }
953953
954 void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
955 bool) {
954 void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
956955 // Scan for values.
957956 for (unsigned i = Inst.getNumOperands(); i--;)
958957 if (Inst.getOperand(i).isExpr())
8181 RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
8282
8383 void RecordStreamer::EmitInstruction(const MCInst &Inst,
84 const MCSubtargetInfo &STI, bool) {
84 const MCSubtargetInfo &STI) {
8585 MCStreamer::EmitInstruction(Inst, STI);
8686 }
8787
4545 public:
4646 RecordStreamer(MCContext &Context, const Module &M);
4747
48 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
49 bool) override;
48 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
5049 void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
5150 void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
5251 bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
101101 /// This function is the one used to emit instruction data into the ELF
102102 /// streamer. We override it to add the appropriate mapping symbol if
103103 /// necessary.
104 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
105 bool) override {
104 void EmitInstruction(const MCInst &Inst,
105 const MCSubtargetInfo &STI) override {
106106 EmitA64MappingSymbol();
107107 MCELFStreamer::EmitInstruction(Inst, STI);
108108 }
483483 /// This function is the one used to emit instruction data into the ELF
484484 /// streamer. We override it to add the appropriate mapping symbol if
485485 /// necessary.
486 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
487 bool) override {
486 void EmitInstruction(const MCInst &Inst,
487 const MCSubtargetInfo &STI) override {
488488 if (IsThumb)
489489 EmitThumbMappingSymbol();
490490 else
5858 MCII(createHexagonMCInstrInfo()) {}
5959
6060 void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
61 const MCSubtargetInfo &STI, bool) {
61 const MCSubtargetInfo &STI) {
6262 assert(MCB.getOpcode() == Hexagon::BUNDLE);
6363 assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
6464 assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
2929 std::unique_ptr Emitter,
3030 MCAssembler *Assembler);
3131
32 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
33 bool) override;
32 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
3433 void EmitSymbol(const MCInst &Inst);
3534 void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
3635 unsigned ByteAlignment,
3333 }
3434
3535 void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
36 const MCSubtargetInfo &STI, bool) {
36 const MCSubtargetInfo &STI) {
3737 MCELFStreamer::EmitInstruction(Inst, STI);
3838
3939 MCContext &Context = getContext();
4040 /// \p Inst is actually emitted. For example, we can inspect the operands and
4141 /// gather sufficient information that allows us to reason about the register
4242 /// usage for the translation unit.
43 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
44 bool = false) override;
43 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
4544
4645 /// Overriding this function allows us to record all labels that should be
4746 /// marked as microMIPS. Based on this data marking is done in
142142 public:
143143 /// This function is the one used to emit instruction data into the ELF
144144 /// streamer. We override it to mask dangerous instructions.
145 void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
146 bool) override {
145 void EmitInstruction(const MCInst &Inst,
146 const MCSubtargetInfo &STI) override {
147147 // Sandbox indirect jumps.
148148 if (isIndirectJump(Inst)) {
149149 if (PendingCall)
193193 // X86AsmInstrumentation implementation:
194194 void InstrumentAndEmitInstruction(const MCInst &Inst, OperandVector &Operands,
195195 MCContext &Ctx, const MCInstrInfo &MII,
196 MCStreamer &Out,
197 /* unused */ bool) override {
196 MCStreamer &Out) override {
198197 InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
199198 if (RepPrefix)
200199 EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
10421041
10431042 void X86AsmInstrumentation::InstrumentAndEmitInstruction(
10441043 const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
1045 const MCInstrInfo &MII, MCStreamer &Out, bool PrintSchedInfoEnabled) {
1046 EmitInstruction(Out, Inst, PrintSchedInfoEnabled);
1047 }
1048
1049 void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst,
1050 bool PrintSchedInfoEnabled) {
1051 Out.EmitInstruction(Inst, *STI, PrintSchedInfoEnabled);
1044 const MCInstrInfo &MII, MCStreamer &Out) {
1045 EmitInstruction(Out, Inst);
1046 }
1047
1048 void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
1049 const MCInst &Inst) {
1050 Out.EmitInstruction(Inst, *STI);
10521051 }
10531052
10541053 unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
4040 virtual void InstrumentAndEmitInstruction(
4141 const MCInst &Inst,
4242 SmallVectorImpl> &Operands,
43 MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out,
44 bool PrintSchedInfoEnabled);
43 MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
4544
4645 protected:
4746 friend X86AsmInstrumentation *
5352
5453 unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
5554
56 void EmitInstruction(MCStreamer &Out, const MCInst &Inst,
57 bool PrintSchedInfoEnabled = false);
55 void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
5856
5957 const MCSubtargetInfo *&STI;
6058
28632863 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
28642864 MCStreamer &Out) {
28652865 Instrumentation->InstrumentAndEmitInstruction(
2866 Inst, Operands, getContext(), MII, Out,
2867 getParser().shouldPrintSchedInfo());
2866 Inst, Operands, getContext(), MII, Out);
28682867 }
28692868
28702869 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
13021302 OS << ']';
13031303 --i; // For loop increments element #.
13041304 }
1305 OS << '\n';
13051306
13061307 // We successfully added a comment to this instruction.
13071308 return true;
5858 IP_HAS_REPEAT_NE = 4,
5959 IP_HAS_REPEAT = 8,
6060 IP_HAS_LOCK = 16,
61 NO_SCHED_INFO = 32, // Don't add sched comment to the current instr because
62 // it was already added
63 IP_HAS_NOTRACK = 64
61 IP_HAS_NOTRACK = 32
6462 };
6563 } // end namespace X86;
6664
9999 }
100100
101101 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
102 OutStreamer->EmitInstruction(Inst, getSubtargetInfo(),
103 EnablePrintSchedInfo &&
104 !(Inst.getFlags() & X86::NO_SCHED_INFO));
102 OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
105103 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
106104 }
107105
18591857 SmallVector Mask;
18601858 DecodePSHUFBMask(C, Width, Mask);
18611859 if (!Mask.empty())
1862 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1863 !EnablePrintSchedInfo);
1860 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
18641861 }
18651862 break;
18661863 }
19321929 SmallVector Mask;
19331930 DecodeVPERMILPMask(C, ElSize, Width, Mask);
19341931 if (!Mask.empty())
1935 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1936 !EnablePrintSchedInfo);
1932 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
19371933 }
19381934 break;
19391935 }
19641960 SmallVector Mask;
19651961 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
19661962 if (!Mask.empty())
1967 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1968 !EnablePrintSchedInfo);
1963 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
19691964 }
19701965 break;
19711966 }
19821977 SmallVector Mask;
19831978 DecodeVPPERMMask(C, Width, Mask);
19841979 if (!Mask.empty())
1985 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1986 !EnablePrintSchedInfo);
1980 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
19871981 }
19881982 break;
19891983 }
20001994 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
20011995 if (auto *CF = dyn_cast(C)) {
20021996 CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
2003 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
1997 OutStreamer->AddComment(CS.str());
20041998 }
20051999 }
20062000 break;
20972091 }
20982092 }
20992093 CS << "]";
2100 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2094 OutStreamer->AddComment(CS.str());
21012095 } else if (auto *CV = dyn_cast(C)) {
21022096 CS << "<";
21032097 for (int l = 0; l != NumLanes; ++l) {
21092103 }
21102104 }
21112105 CS << ">";
2112 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2106 OutStreamer->AddComment(CS.str());
21132107 }
21142108 }
21152109 break;
21962190 printConstant(C, CS);
21972191 }
21982192 CS << "]";
2199 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2193 OutStreamer->AddComment(CS.str());
22002194 }
22012195 }
22022196
22032197 MCInst TmpInst;
22042198 MCInstLowering.Lower(MI, TmpInst);
2205 if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment))
2206 TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO);
22072199
22082200 // Stackmap shadows cannot include branch targets, so we can count the bytes
22092201 // in a call towards the shadow, but must ensure that the no thread returns
832832 /// Enable the MachineScheduler pass for all X86 subtargets.
833833 bool enableMachineScheduler() const override { return true; }
834834
835 // TODO: Update the regression tests and return true.
836 bool supportPrintSchedInfo() const override { return false; }
837
838835 bool enableEarlyIfConversion() const override;
839836
840837 AntiDepBreakMode getAntiDepBreakMode() const override {
+0
-394
test/CodeGen/X86/3dnow-schedule.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+3dnowa | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
2
3 define void @test_femms() optsize {
4 ; CHECK-LABEL: test_femms:
5 ; CHECK: # %bb.0:
6 ; CHECK-NEXT: femms # sched: [31:10.33]
7 ; CHECK-NEXT: retq # sched: [1:1.00]
8 call void @llvm.x86.mmx.femms()
9 ret void
10 }
11 declare void @llvm.x86.mmx.femms() nounwind readnone
12
13 define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
14 ; CHECK-LABEL: test_pavgusb:
15 ; CHECK: # %bb.0:
16 ; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00]
17 ; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00]
18 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
19 ; CHECK-NEXT: retq # sched: [1:1.00]
20 %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
21 %2 = load x86_mmx, x86_mmx *%a2, align 8
22 %3 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
23 %4 = bitcast x86_mmx %3 to i64
24 ret i64 %4
25 }
26 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
27
28 define i64 @test_pf2id(x86_mmx* %a0) optsize {
29 ; CHECK-LABEL: test_pf2id:
30 ; CHECK: # %bb.0:
31 ; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [9:1.00]
32 ; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00]
33 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
34 ; CHECK-NEXT: retq # sched: [1:1.00]
35 %1 = load x86_mmx, x86_mmx *%a0, align 8
36 %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1)
37 %3 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %2)
38 %4 = bitcast x86_mmx %3 to i64
39 ret i64 %4
40 }
41 declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
42
43 define i64 @test_pf2iw(x86_mmx* %a0) optsize {
44 ; CHECK-LABEL: test_pf2iw:
45 ; CHECK: # %bb.0:
46 ; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [9:1.00]
47 ; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00]
48 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
49 ; CHECK-NEXT: retq # sched: [1:1.00]
50 %1 = load x86_mmx, x86_mmx *%a0, align 8
51 %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1)
52 %3 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %2)
53 %4 = bitcast x86_mmx %3 to i64
54 ret i64 %4
55 }
56 declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
57
58 define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
59 ; CHECK-LABEL: test_pfacc:
60 ; CHECK: # %bb.0:
61 ; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00]
62 ; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00]
63 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
64 ; CHECK-NEXT: retq # sched: [1:1.00]
65 %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
66 %2 = load x86_mmx, x86_mmx *%a2, align 8
67 %3 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %1, x86_mmx %2)
68 %4 = bitcast x86_mmx %3 to i64
69 ret i64 %4
70 }
71 declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
72
73 define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
74 ; CHECK-LABEL: test_pfadd:
75 ; CHECK: # %bb.0:
76 ; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00]
77 ; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00]
78 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
79 ; CHECK-NEXT: retq # sched: [1:1.00]
80 %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
81 %2 = load x86_mmx, x86_mmx *%a2, align 8
82 %3 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
83 %4 = bitcast x86_mmx %3 to i64
84 ret i64 %4
85 }
86 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
87
88 define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
89 ; CHECK-LABEL: test_pfcmpeq:
90 ; CHECK: # %bb.0:
91 ; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00]
92 ; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00]
93 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
94 ; CHECK-NEXT: retq # sched: [1:1.00]
95 %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
96 %2 = load x86_mmx, x86_mmx *%a2, align 8
97 %3 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
98 %4 = bitcast x86_mmx %3 to i64
99 ret i64 %4
100 }
101 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
102
103 define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
104 ; CHECK-LABEL: test_pfcmpge:
105 ; CHECK: # %bb.0:
106 ; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00]
107 ; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00]
108 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
109 ; CHECK-NEXT: retq # sched: [1:1.00]
110 %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
111 %2 = load x86_mmx, x86_mmx *%a2, align 8
112 %3 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %1, x86_mmx %2)
113 %4 = bitcast x86_mmx %3 to i64
114 ret i64 %4
115 }
116 declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
117
118 define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
119 ; CHECK-LABEL: test_pfcmpgt:
120 ; CHECK: # %bb.0:
121 ; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00]
122 ; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00]
123 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
124 ; CHECK-NEXT: retq # sched: [1:1.00]
125 %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
126 %2 = load x86_mmx, x86_mmx *%a2, align 8
127 %3 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %1, x86_mmx %2)
128 %4 = bitcast x86_mmx %3 to i64
129 ret i64 %4
130 }
131 declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
132
133 define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
134 ; CHECK-LABEL: test_pfmax:
135 ; CHECK: # %bb.0:
136 ; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00]
137 ; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00]
138 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
139 ; CHECK-NEXT: retq # sched: [1:1.00]
140 %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
141 %2 = load x86_mmx, x86_mmx *%a2, align 8
142 %3 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
143 %4 = bitcast x86_mmx %3 to i64
144 ret i64 %4
145 }
146 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
147
148 define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
149 ; CHECK-LABEL: test_pfmin:
150 ; CHECK: # %bb.0:
151 ; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00]
152 ; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00]
153 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
154 ; CHECK-NEXT: retq # sched: [1:1.00]
155 %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
156 %2 = load x86_mmx, x86_mmx *%a2, align 8
157 %3 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
158 %4 = bitcast x86_mmx %3 to i64
159 ret i64 %4
160 }
161 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
162
163 define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
164 ; CHECK-LABEL: test_pfmul:
165 ; CHECK: # %bb.0:
166 ; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00]
167 ; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00]
168 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
169 ; CHECK-NEXT: retq # sched: [1:1.00]
170 %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
171 %2 = load x86_mmx, x86_mmx *%a2, align 8
172 %3 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
173 %4 = bitcast x86_mmx %3 to i64
174 ret i64 %4
175 }
176 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
177
178 define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
179 ; CHECK-LABEL: test_pfnacc:
180 ; CHECK: # %bb.0:
181 ; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00]
182 ; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00]
183 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
184 ; CHECK-NEXT: retq # sched: [1:1.00]
185 %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
186 %2 = load x86_mmx, x86_mmx *%a2, align 8
187 %3 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %1, x86_mmx %2)
188 %4 = bitcast x86_mmx %3 to i64
189 ret i64 %4
190 }
191 declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
192
193 define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
194 ; CHECK-LABEL: test_pfpnacc:
195 ; CHECK: # %bb.0:
196 ; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00]
197 ; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00]
198 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
199 ; CHECK-NEXT: retq # sched: [1:1.00]
200 %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
201 %2 = load x86_mmx, x86_mmx *%a2, align 8
202 %3 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %1, x86_mmx %2)
203 %4 = bitcast x86_mmx %3 to i64
204 ret i64 %4
205 }
206 declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
207
208 define i64 @test_pfrcp(x86_mmx* %a0) optsize {
209 ; CHECK-LABEL: test_pfrcp:
210 ; CHECK: # %bb.0:
211 ; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00]
212 ; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00]
213 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
214 ; CHECK-NEXT: retq # sched: [1:1.00]
215 %1 = load x86_mmx, x86_mmx *%a0, align 8
216 %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1)
217 %3 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %2)
218 %4 = bitcast x86_mmx %3 to i64
219 ret i64 %4
220 }
221 declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
222
223 define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
224 ; CHECK-LABEL: test_pfrcpit1:
225 ; CHECK: # %bb.0:
226 ; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
227 ; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00]
228 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
229 ; CHECK-NEXT: retq # sched: [1:1.00]
230 %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
231 %2 = load x86_mmx, x86_mmx *%a2, align 8
232 %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %1, x86_mmx %2)
233 %4 = bitcast x86_mmx %3 to i64
234 ret i64 %4
235 }
236 declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
237
238 define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
239 ; CHECK-LABEL: test_pfrcpit2:
240 ; CHECK: # %bb.0:
241 ; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
242 ; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00]
243 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
244 ; CHECK-NEXT: retq # sched: [1:1.00]
245 %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
246 %2 = load x86_mmx, x86_mmx *%a2, align 8
247 %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %1, x86_mmx %2)
248 %4 = bitcast x86_mmx %3 to i64
249 ret i64 %4
250 }
251 declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
252
253 define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
254 ; CHECK-LABEL: test_pfrsqit1:
255 ; CHECK: # %bb.0:
256 ; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
257 ; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00]
258 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
259 ; CHECK-NEXT: retq # sched: [1:1.00]
260 %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
261 %2 = load x86_mmx, x86_mmx *%a2, align 8
262 %3 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %1, x86_mmx %2)
263 %4 = bitcast x86_mmx %3 to i64
264 ret i64 %4
265 }
266 declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
267
268 define i64 @test_pfrsqrt(x86_mmx* %a0) optsize {
269 ; CHECK-LABEL: test_pfrsqrt:
270 ; CHECK: # %bb.0:
271 ; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00]
272 ; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00]
273 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
274 ; CHECK-NEXT: retq # sched: [1:1.00]
275 %1 = load x86_mmx, x86_mmx *%a0, align 8
276 %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1)
277 %3 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %2)
278 %4 = bitcast x86_mmx %3 to i64
279 ret i64 %4
280 }
281 declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
282
283 define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
284 ; CHECK-LABEL: test_pfsub:
285 ; CHECK: # %bb.0:
286 ; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00]
287 ; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00]
288 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
289 ; CHECK-NEXT: retq # sched: [1:1.00]
290 %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
291 %2 = load x86_mmx, x86_mmx *%a2, align 8
292 %3 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
293 %4 = bitcast x86_mmx %3 to i64
294 ret i64 %4
295 }
296 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
297
298 define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
299 ; CHECK-LABEL: test_pfsubr:
300 ; CHECK: # %bb.0:
301 ; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00]
302 ; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00]
303 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
304 ; CHECK-NEXT: retq # sched: [1:1.00]
305 %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
306 %2 = load x86_mmx, x86_mmx *%a2, align 8
307 %3 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
308 %4 = bitcast x86_mmx %3 to i64
309 ret i64 %4
310 }
311 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
312
313 define i64 @test_pi2fd(x86_mmx* %a0) optsize {
314 ; CHECK-LABEL: test_pi2fd:
315 ; CHECK: # %bb.0:
316 ; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [9:1.00]
317 ; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [3:1.00]
318 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
319 ; CHECK-NEXT: retq # sched: [1:1.00]
320 %1 = load x86_mmx, x86_mmx *%a0, align 8
321 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
322 %3 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %2)
323 %4 = bitcast x86_mmx %3 to i64
324 ret i64 %4
325 }
326 declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
327
328 define i64 @test_pi2fw(x86_mmx* %a0) optsize {
329 ; CHECK-LABEL: test_pi2fw:
330 ; CHECK: # %bb.0:
331 ; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [9:1.00]
332 ; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [3:1.00]
333 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
334 ; CHECK-NEXT: retq # sched: [1:1.00]
335 %1 = load x86_mmx, x86_mmx *%a0, align 8
336 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
337 %3 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %2)
338 %4 = bitcast x86_mmx %3 to i64
339 ret i64 %4
340 }
341 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
342
343 define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
344 ; CHECK-LABEL: test_pmulhrw:
345 ; CHECK: # %bb.0:
346 ; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00]
347 ; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00]
348 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
349 ; CHECK-NEXT: retq # sched: [1:1.00]
350 %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)
351 %2 = load x86_mmx, x86_mmx *%a2, align 8
352 %3 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
353 %4 = bitcast x86_mmx %3 to i64
354 ret i64 %4
355 }
356 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
357
358 define void @test_prefetch(i8* %a0) optsize {
359 ; CHECK-LABEL: test_prefetch:
360 ; CHECK: # %bb.0:
361 ; CHECK-NEXT: #APP
362 ; CHECK-NEXT: prefetch (%rdi) # sched: [5:0.50]
363 ; CHECK-NEXT: #NO_APP
364 ; CHECK-NEXT: retq # sched: [1:1.00]
365 tail call void asm sideeffect "prefetch $0", "*m"(i8 *%a0) nounwind
366 ret void
367 }
368
369 define void @test_prefetchw(i8* %a0) optsize {
370 ; CHECK-LABEL: test_prefetchw:
371 ; CHECK: # %bb.0:
372 ; CHECK-NEXT: #APP
373 ; CHECK-NEXT: prefetchw (%rdi) # sched: [5:0.50]
374 ; CHECK-NEXT: #NO_APP
375 ; CHECK-NEXT: retq # sched: [1:1.00]
376 tail call void asm sideeffect "prefetchw $0", "*m"(i8 *%a0) nounwind
377 ret void
378 }
379
380 define i64 @test_pswapd(x86_mmx* %a0) optsize {
381 ; CHECK-LABEL: test_pswapd:
382 ; CHECK: # %bb.0:
383 ; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00]
384 ; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00]
385 ; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
386 ; CHECK-NEXT: retq # sched: [1:1.00]
387 %1 = load x86_mmx, x86_mmx *%a0, align 8
388 %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1)
389 %3 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %2)
390 %4 = bitcast x86_mmx %3 to i64
391 ret i64 %4
392 }
393 declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
+0
-114
test/CodeGen/X86/adx-schedule.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+adx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
7
8 define void @test_adcx(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
9 ; GENERIC-LABEL: test_adcx:
10 ; GENERIC: # %bb.0:
11 ; GENERIC-NEXT: #APP
12 ; GENERIC-NEXT: adcxl %edi, %edi # sched: [2:0.67]
13 ; GENERIC-NEXT: adcxq %rdx, %rdx # sched: [2:0.67]
14 ; GENERIC-NEXT: adcxl (%rsi), %edi # sched: [7:0.67]
15 ; GENERIC-NEXT: adcxq (%rcx), %rdx # sched: [7:0.67]
16 ; GENERIC-NEXT: #NO_APP
17 ; GENERIC-NEXT: retq # sched: [1:1.00]
18 ;
19 ; BROADWELL-LABEL: test_adcx:
20 ; BROADWELL: # %bb.0:
21 ; BROADWELL-NEXT: #APP
22 ; BROADWELL-NEXT: adcxl %edi, %edi # sched: [1:0.50]
23 ; BROADWELL-NEXT: adcxq %rdx, %rdx # sched: [1:0.50]
24 ; BROADWELL-NEXT: adcxl (%rsi), %edi # sched: [6:0.50]
25 ; BROADWELL-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50]
26 ; BROADWELL-NEXT: #NO_APP
27 ; BROADWELL-NEXT: retq # sched: [7:1.00]
28 ;
29 ; SKYLAKE-LABEL: test_adcx:
30 ; SKYLAKE: # %bb.0:
31 ; SKYLAKE-NEXT: #APP
32 ; SKYLAKE-NEXT: adcxl %edi, %edi # sched: [1:0.50]
33 ; SKYLAKE-NEXT: adcxq %rdx, %rdx # sched: [1:0.50]
34 ; SKYLAKE-NEXT: adcxl (%rsi), %edi # sched: [6:0.50]
35 ; SKYLAKE-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50]
36 ; SKYLAKE-NEXT: #NO_APP
37 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
38 ;
39 ; KNL-LABEL: test_adcx:
40 ; KNL: # %bb.0:
41 ; KNL-NEXT: #APP
42 ; KNL-NEXT: adcxl %edi, %edi # sched: [2:0.50]
43 ; KNL-NEXT: adcxq %rdx, %rdx # sched: [2:0.50]
44 ; KNL-NEXT: adcxl (%rsi), %edi # sched: [7:0.50]
45 ; KNL-NEXT: adcxq (%rcx), %rdx # sched: [7:0.50]
46 ; KNL-NEXT: #NO_APP
47 ; KNL-NEXT: retq # sched: [7:1.00]
48 ;
49 ; ZNVER1-LABEL: test_adcx:
50 ; ZNVER1: # %bb.0:
51 ; ZNVER1-NEXT: #APP
52 ; ZNVER1-NEXT: adcxl %edi, %edi # sched: [1:0.25]
53 ; ZNVER1-NEXT: adcxq %rdx, %rdx # sched: [1:0.25]
54 ; ZNVER1-NEXT: adcxl (%rsi), %edi # sched: [5:0.50]
55 ; ZNVER1-NEXT: adcxq (%rcx), %rdx # sched: [5:0.50]
56 ; ZNVER1-NEXT: #NO_APP
57 ; ZNVER1-NEXT: retq # sched: [1:0.50]
58 tail call void asm "adcx $0, $0 \0A\09 adcx $2, $2 \0A\09 adcx $1, $0 \0A\09 adcx $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
59 ret void
60 }
61 define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
62 ; GENERIC-LABEL: test_adox:
63 ; GENERIC: # %bb.0:
64 ; GENERIC-NEXT: #APP
65 ; GENERIC-NEXT: adoxl %edi, %edi # sched: [2:0.67]
66 ; GENERIC-NEXT: adoxq %rdx, %rdx # sched: [2:0.67]
67 ; GENERIC-NEXT: adoxl (%rsi), %edi # sched: [7:0.67]
68 ; GENERIC-NEXT: adoxq (%rcx), %rdx # sched: [7:0.67]
69 ; GENERIC-NEXT: #NO_APP
70 ; GENERIC-NEXT: retq # sched: [1:1.00]
71 ;
72 ; BROADWELL-LABEL: test_adox:
73 ; BROADWELL: # %bb.0:
74 ; BROADWELL-NEXT: #APP
75 ; BROADWELL-NEXT: adoxl %edi, %edi # sched: [1:0.50]
76 ; BROADWELL-NEXT: adoxq %rdx, %rdx # sched: [1:0.50]
77 ; BROADWELL-NEXT: adoxl (%rsi), %edi # sched: [6:0.50]
78 ; BROADWELL-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50]
79 ; BROADWELL-NEXT: #NO_APP
80 ; BROADWELL-NEXT: retq # sched: [7:1.00]
81 ;
82 ; SKYLAKE-LABEL: test_adox:
83 ; SKYLAKE: # %bb.0:
84 ; SKYLAKE-NEXT: #APP
85 ; SKYLAKE-NEXT: adoxl %edi, %edi # sched: [1:0.50]
86 ; SKYLAKE-NEXT: adoxq %rdx, %rdx # sched: [1:0.50]
87 ; SKYLAKE-NEXT: adoxl (%rsi), %edi # sched: [6:0.50]
88 ; SKYLAKE-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50]
89 ; SKYLAKE-NEXT: #NO_APP
90 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
91 ;
92 ; KNL-LABEL: test_adox:
93 ; KNL: # %bb.0:
94 ; KNL-NEXT: #APP
95 ; KNL-NEXT: adoxl %edi, %edi # sched: [2:0.50]
96 ; KNL-NEXT: adoxq %rdx, %rdx # sched: [2:0.50]
97 ; KNL-NEXT: adoxl (%rsi), %edi # sched: [7:0.50]
98 ; KNL-NEXT: adoxq (%rcx), %rdx # sched: [7:0.50]
99 ; KNL-NEXT: #NO_APP
100 ; KNL-NEXT: retq # sched: [7:1.00]
101 ;
102 ; ZNVER1-LABEL: test_adox:
103 ; ZNVER1: # %bb.0:
104 ; ZNVER1-NEXT: #APP
105 ; ZNVER1-NEXT: adoxl %edi, %edi # sched: [1:0.25]
106 ; ZNVER1-NEXT: adoxq %rdx, %rdx # sched: [1:0.25]
107 ; ZNVER1-NEXT: adoxl (%rsi), %edi # sched: [5:0.50]
108 ; ZNVER1-NEXT: adoxq (%rcx), %rdx # sched: [5:0.50]
109 ; ZNVER1-NEXT: #NO_APP
110 ; ZNVER1-NEXT: retq # sched: [1:0.50]
111 tail call void asm "adox $0, $0 \0A\09 adox $2, $2 \0A\09 adox $1, $0 \0A\09 adox $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
112 ret void
113 }
+0
-751
test/CodeGen/X86/aes-schedule.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
21 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
22
23 define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
24 ; GENERIC-LABEL: test_aesdec:
25 ; GENERIC: # %bb.0:
26 ; GENERIC-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
27 ; GENERIC-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
28 ; GENERIC-NEXT: retq # sched: [1:1.00]
29 ;
30 ; SLM-LABEL: test_aesdec:
31 ; SLM: # %bb.0:
32 ; SLM-NEXT: aesdec %xmm1, %xmm0 # sched: [8:5.00]
33 ; SLM-NEXT: aesdec (%rdi), %xmm0 # sched: [8:5.00]
34 ; SLM-NEXT: retq # sched: [4:1.00]
35 ;
36 ; SANDY-SSE-LABEL: test_aesdec:
37 ; SANDY-SSE: # %bb.0:
38 ; SANDY-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
39 ; SANDY-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
40 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
41 ;
42 ; SANDY-LABEL: test_aesdec:
43 ; SANDY: # %bb.0:
44 ; SANDY-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
45 ; SANDY-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
46 ; SANDY-NEXT: retq # sched: [1:1.00]
47 ;
48 ; HASWELL-SSE-LABEL: test_aesdec:
49 ; HASWELL-SSE: # %bb.0:
50 ; HASWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
51 ; HASWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
52 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
53 ;
54 ; HASWELL-LABEL: test_aesdec:
55 ; HASWELL: # %bb.0:
56 ; HASWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
57 ; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
58 ; HASWELL-NEXT: retq # sched: [7:1.00]
59 ;
60 ; BROADWELL-SSE-LABEL: test_aesdec:
61 ; BROADWELL-SSE: # %bb.0:
62 ; BROADWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
63 ; BROADWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [12:1.00]
64 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
65 ;
66 ; BROADWELL-LABEL: test_aesdec:
67 ; BROADWELL: # %bb.0:
68 ; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
69 ; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
70 ; BROADWELL-NEXT: retq # sched: [7:1.00]
71 ;
72 ; SKYLAKE-SSE-LABEL: test_aesdec:
73 ; SKYLAKE-SSE: # %bb.0:
74 ; SKYLAKE-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00]
75 ; SKYLAKE-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00]
76 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
77 ;
78 ; SKYLAKE-LABEL: test_aesdec:
79 ; SKYLAKE: # %bb.0:
80 ; SKYLAKE-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
81 ; SKYLAKE-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
82 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
83 ;
84 ; SKX-SSE-LABEL: test_aesdec:
85 ; SKX-SSE: # %bb.0:
86 ; SKX-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00]
87 ; SKX-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00]
88 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
89 ;
90 ; SKX-LABEL: test_aesdec:
91 ; SKX: # %bb.0:
92 ; SKX-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
93 ; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
94 ; SKX-NEXT: retq # sched: [7:1.00]
95 ;
96 ; BDVER2-SSE-LABEL: test_aesdec:
97 ; BDVER2-SSE: # %bb.0:
98 ; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [9:1.00]
99 ; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [14:1.00]
100 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
101 ;
102 ; BDVER2-LABEL: test_aesdec:
103 ; BDVER2: # %bb.0:
104 ; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
105 ; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
106 ; BDVER2-NEXT: retq # sched: [5:1.00]
107 ;
108 ; BTVER2-SSE-LABEL: test_aesdec:
109 ; BTVER2-SSE: # %bb.0:
110 ; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00]
111 ; BTVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [8:1.00]
112 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
113 ;
114 ; BTVER2-LABEL: test_aesdec:
115 ; BTVER2: # %bb.0:
116 ; BTVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
117 ; BTVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
118 ; BTVER2-NEXT: retq # sched: [4:1.00]
119 ;
120 ; ZNVER1-SSE-LABEL: test_aesdec:
121 ; ZNVER1-SSE: # %bb.0:
122 ; ZNVER1-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:0.50]
123 ; ZNVER1-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [11:0.50]
124 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
125 ;
126 ; ZNVER1-LABEL: test_aesdec:
127 ; ZNVER1: # %bb.0:
128 ; ZNVER1-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
129 ; ZNVER1-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
130 ; ZNVER1-NEXT: retq # sched: [1:0.50]
131 %1 = load <2 x i64>, <2 x i64> *%a2, align 16
132 %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
133 %3 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %2, <2 x i64> %1)
134 ret <2 x i64> %3
135 }
136 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>)
137
138 define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
139 ; GENERIC-LABEL: test_aesdeclast:
140 ; GENERIC: # %bb.0:
141 ; GENERIC-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
142 ; GENERIC-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
143 ; GENERIC-NEXT: retq # sched: [1:1.00]
144 ;
145 ; SLM-LABEL: test_aesdeclast:
146 ; SLM: # %bb.0:
147 ; SLM-NEXT: aesdeclast %xmm1, %xmm0 # sched: [8:5.00]
148 ; SLM-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:5.00]
149 ; SLM-NEXT: retq # sched: [4:1.00]
150 ;
151 ; SANDY-SSE-LABEL: test_aesdeclast:
152 ; SANDY-SSE: # %bb.0:
153 ; SANDY-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
154 ; SANDY-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
155 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
156 ;
157 ; SANDY-LABEL: test_aesdeclast:
158 ; SANDY: # %bb.0:
159 ; SANDY-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
160 ; SANDY-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
161 ; SANDY-NEXT: retq # sched: [1:1.00]
162 ;
163 ; HASWELL-SSE-LABEL: test_aesdeclast:
164 ; HASWELL-SSE: # %bb.0:
165 ; HASWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
166 ; HASWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
167 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
168 ;
169 ; HASWELL-LABEL: test_aesdeclast:
170 ; HASWELL: # %bb.0:
171 ; HASWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
172 ; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
173 ; HASWELL-NEXT: retq # sched: [7:1.00]
174 ;
175 ; BROADWELL-SSE-LABEL: test_aesdeclast:
176 ; BROADWELL-SSE: # %bb.0:
177 ; BROADWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
178 ; BROADWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [12:1.00]
179 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
180 ;
181 ; BROADWELL-LABEL: test_aesdeclast:
182 ; BROADWELL: # %bb.0:
183 ; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
184 ; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
185 ; BROADWELL-NEXT: retq # sched: [7:1.00]
186 ;
187 ; SKYLAKE-SSE-LABEL: test_aesdeclast:
188 ; SKYLAKE-SSE: # %bb.0:
189 ; SKYLAKE-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00]
190 ; SKYLAKE-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00]
191 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
192 ;
193 ; SKYLAKE-LABEL: test_aesdeclast:
194 ; SKYLAKE: # %bb.0:
195 ; SKYLAKE-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
196 ; SKYLAKE-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
197 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
198 ;
199 ; SKX-SSE-LABEL: test_aesdeclast:
200 ; SKX-SSE: # %bb.0:
201 ; SKX-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00]
202 ; SKX-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00]
203 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
204 ;
205 ; SKX-LABEL: test_aesdeclast:
206 ; SKX: # %bb.0:
207 ; SKX-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
208 ; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
209 ; SKX-NEXT: retq # sched: [7:1.00]
210 ;
211 ; BDVER2-SSE-LABEL: test_aesdeclast:
212 ; BDVER2-SSE: # %bb.0:
213 ; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [9:1.00]
214 ; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [14:1.00]
215 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
216 ;
217 ; BDVER2-LABEL: test_aesdeclast:
218 ; BDVER2: # %bb.0:
219 ; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
220 ; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
221 ; BDVER2-NEXT: retq # sched: [5:1.00]
222 ;
223 ; BTVER2-SSE-LABEL: test_aesdeclast:
224 ; BTVER2-SSE: # %bb.0:
225 ; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00]
226 ; BTVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:1.00]
227 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
228 ;
229 ; BTVER2-LABEL: test_aesdeclast:
230 ; BTVER2: # %bb.0:
231 ; BTVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
232 ; BTVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
233 ; BTVER2-NEXT: retq # sched: [4:1.00]
234 ;
235 ; ZNVER1-SSE-LABEL: test_aesdeclast:
236 ; ZNVER1-SSE: # %bb.0:
237 ; ZNVER1-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:0.50]
238 ; ZNVER1-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [11:0.50]
239 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
240 ;
241 ; ZNVER1-LABEL: test_aesdeclast:
242 ; ZNVER1: # %bb.0:
243 ; ZNVER1-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
244 ; ZNVER1-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
245 ; ZNVER1-NEXT: retq # sched: [1:0.50]
246 %1 = load <2 x i64>, <2 x i64> *%a2, align 16
247 %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
248 %3 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %2, <2 x i64> %1)
249 ret <2 x i64> %3
250 }
251 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>)
252
253 define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
254 ; GENERIC-LABEL: test_aesenc:
255 ; GENERIC: # %bb.0:
256 ; GENERIC-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
257 ; GENERIC-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
258 ; GENERIC-NEXT: retq # sched: [1:1.00]
259 ;
260 ; SLM-LABEL: test_aesenc:
261 ; SLM: # %bb.0:
262 ; SLM-NEXT: aesenc %xmm1, %xmm0 # sched: [8:5.00]
263 ; SLM-NEXT: aesenc (%rdi), %xmm0 # sched: [8:5.00]
264 ; SLM-NEXT: retq # sched: [4:1.00]
265 ;
266 ; SANDY-SSE-LABEL: test_aesenc:
267 ; SANDY-SSE: # %bb.0:
268 ; SANDY-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
269 ; SANDY-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
270 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
271 ;
272 ; SANDY-LABEL: test_aesenc:
273 ; SANDY: # %bb.0:
274 ; SANDY-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
275 ; SANDY-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
276 ; SANDY-NEXT: retq # sched: [1:1.00]
277 ;
278 ; HASWELL-SSE-LABEL: test_aesenc:
279 ; HASWELL-SSE: # %bb.0:
280 ; HASWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
281 ; HASWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
282 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
283 ;
284 ; HASWELL-LABEL: test_aesenc:
285 ; HASWELL: # %bb.0:
286 ; HASWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
287 ; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
288 ; HASWELL-NEXT: retq # sched: [7:1.00]
289 ;
290 ; BROADWELL-SSE-LABEL: test_aesenc:
291 ; BROADWELL-SSE: # %bb.0:
292 ; BROADWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
293 ; BROADWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [12:1.00]
294 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
295 ;
296 ; BROADWELL-LABEL: test_aesenc:
297 ; BROADWELL: # %bb.0:
298 ; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
299 ; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
300 ; BROADWELL-NEXT: retq # sched: [7:1.00]
301 ;
302 ; SKYLAKE-SSE-LABEL: test_aesenc:
303 ; SKYLAKE-SSE: # %bb.0:
304 ; SKYLAKE-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00]
305 ; SKYLAKE-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00]
306 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
307 ;
308 ; SKYLAKE-LABEL: test_aesenc:
309 ; SKYLAKE: # %bb.0:
310 ; SKYLAKE-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
311 ; SKYLAKE-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
312 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
313 ;
314 ; SKX-SSE-LABEL: test_aesenc:
315 ; SKX-SSE: # %bb.0:
316 ; SKX-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00]
317 ; SKX-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00]
318 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
319 ;
320 ; SKX-LABEL: test_aesenc:
321 ; SKX: # %bb.0:
322 ; SKX-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
323 ; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
324 ; SKX-NEXT: retq # sched: [7:1.00]
325 ;
326 ; BDVER2-SSE-LABEL: test_aesenc:
327 ; BDVER2-SSE: # %bb.0:
328 ; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [9:1.00]
329 ; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [14:1.00]
330 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
331 ;
332 ; BDVER2-LABEL: test_aesenc:
333 ; BDVER2: # %bb.0:
334 ; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
335 ; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
336 ; BDVER2-NEXT: retq # sched: [5:1.00]
337 ;
338 ; BTVER2-SSE-LABEL: test_aesenc:
339 ; BTVER2-SSE: # %bb.0:
340 ; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00]
341 ; BTVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [8:1.00]
342 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
343 ;
344 ; BTVER2-LABEL: test_aesenc:
345 ; BTVER2: # %bb.0:
346 ; BTVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
347 ; BTVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
348 ; BTVER2-NEXT: retq # sched: [4:1.00]
349 ;
350 ; ZNVER1-SSE-LABEL: test_aesenc:
351 ; ZNVER1-SSE: # %bb.0:
352 ; ZNVER1-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:0.50]
353 ; ZNVER1-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [11:0.50]
354 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
355 ;
356 ; ZNVER1-LABEL: test_aesenc:
357 ; ZNVER1: # %bb.0:
358 ; ZNVER1-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
359 ; ZNVER1-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
360 ; ZNVER1-NEXT: retq # sched: [1:0.50]
361 %1 = load <2 x i64>, <2 x i64> *%a2, align 16
362 %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
363 %3 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %2, <2 x i64> %1)
364 ret <2 x i64> %3
365 }
366 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>)
367
368 define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
369 ; GENERIC-LABEL: test_aesenclast:
370 ; GENERIC: # %bb.0:
371 ; GENERIC-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
372 ; GENERIC-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
373 ; GENERIC-NEXT: retq # sched: [1:1.00]
374 ;
375 ; SLM-LABEL: test_aesenclast:
376 ; SLM: # %bb.0:
377 ; SLM-NEXT: aesenclast %xmm1, %xmm0 # sched: [8:5.00]
378 ; SLM-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:5.00]
379 ; SLM-NEXT: retq # sched: [4:1.00]
380 ;
381 ; SANDY-SSE-LABEL: test_aesenclast:
382 ; SANDY-SSE: # %bb.0:
383 ; SANDY-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
384 ; SANDY-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
385 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
386 ;
387 ; SANDY-LABEL: test_aesenclast:
388 ; SANDY: # %bb.0:
389 ; SANDY-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
390 ; SANDY-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
391 ; SANDY-NEXT: retq # sched: [1:1.00]
392 ;
393 ; HASWELL-SSE-LABEL: test_aesenclast:
394 ; HASWELL-SSE: # %bb.0:
395 ; HASWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
396 ; HASWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
397 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
398 ;
399 ; HASWELL-LABEL: test_aesenclast:
400 ; HASWELL: # %bb.0:
401 ; HASWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
402 ; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
403 ; HASWELL-NEXT: retq # sched: [7:1.00]
404 ;
405 ; BROADWELL-SSE-LABEL: test_aesenclast:
406 ; BROADWELL-SSE: # %bb.0:
407 ; BROADWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
408 ; BROADWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [12:1.00]
409 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
410 ;
411 ; BROADWELL-LABEL: test_aesenclast:
412 ; BROADWELL: # %bb.0:
413 ; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
414 ; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
415 ; BROADWELL-NEXT: retq # sched: [7:1.00]
416 ;
417 ; SKYLAKE-SSE-LABEL: test_aesenclast:
418 ; SKYLAKE-SSE: # %bb.0:
419 ; SKYLAKE-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00]
420 ; SKYLAKE-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00]
421 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
422 ;
423 ; SKYLAKE-LABEL: test_aesenclast:
424 ; SKYLAKE: # %bb.0:
425 ; SKYLAKE-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
426 ; SKYLAKE-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
427 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
428 ;
429 ; SKX-SSE-LABEL: test_aesenclast:
430 ; SKX-SSE: # %bb.0:
431 ; SKX-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00]
432 ; SKX-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00]
433 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
434 ;
435 ; SKX-LABEL: test_aesenclast:
436 ; SKX: # %bb.0:
437 ; SKX-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
438 ; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
439 ; SKX-NEXT: retq # sched: [7:1.00]
440 ;
441 ; BDVER2-SSE-LABEL: test_aesenclast:
442 ; BDVER2-SSE: # %bb.0:
443 ; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [9:1.00]
444 ; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [14:1.00]
445 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
446 ;
447 ; BDVER2-LABEL: test_aesenclast:
448 ; BDVER2: # %bb.0:
449 ; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
450 ; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
451 ; BDVER2-NEXT: retq # sched: [5:1.00]
452 ;
453 ; BTVER2-SSE-LABEL: test_aesenclast:
454 ; BTVER2-SSE: # %bb.0:
455 ; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00]
456 ; BTVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:1.00]
457 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
458 ;
459 ; BTVER2-LABEL: test_aesenclast:
460 ; BTVER2: # %bb.0:
461 ; BTVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
462 ; BTVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
463 ; BTVER2-NEXT: retq # sched: [4:1.00]
464 ;
465 ; ZNVER1-SSE-LABEL: test_aesenclast:
466 ; ZNVER1-SSE: # %bb.0:
467 ; ZNVER1-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:0.50]
468 ; ZNVER1-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [11:0.50]
469 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
470 ;
471 ; ZNVER1-LABEL: test_aesenclast:
472 ; ZNVER1: # %bb.0:
473 ; ZNVER1-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
474 ; ZNVER1-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
475 ; ZNVER1-NEXT: retq # sched: [1:0.50]
476 %1 = load <2 x i64>, <2 x i64> *%a2, align 16
477 %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
478 %3 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %2, <2 x i64> %1)
479 ret <2 x i64> %3
480 }
481 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>)
482
483 define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) {
484 ; GENERIC-LABEL: test_aesimc:
485 ; GENERIC: # %bb.0:
486 ; GENERIC-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
487 ; GENERIC-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
488 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
489 ; GENERIC-NEXT: retq # sched: [1:1.00]
490 ;
491 ; SLM-LABEL: test_aesimc:
492 ; SLM: # %bb.0:
493 ; SLM-NEXT: aesimc %xmm0, %xmm1 # sched: [8:5.00]
494 ; SLM-NEXT: aesimc (%rdi), %xmm0 # sched: [8:5.00]
495 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
496 ; SLM-NEXT: retq # sched: [4:1.00]
497 ;
498 ; SANDY-SSE-LABEL: test_aesimc:
499 ; SANDY-SSE: # %bb.0:
500 ; SANDY-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
501 ; SANDY-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
502 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
503 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
504 ;
505 ; SANDY-LABEL: test_aesimc:
506 ; SANDY: # %bb.0:
507 ; SANDY-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00]
508 ; SANDY-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00]
509 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
510 ; SANDY-NEXT: retq # sched: [1:1.00]
511 ;
512 ; HASWELL-SSE-LABEL: test_aesimc:
513 ; HASWELL-SSE: # %bb.0:
514 ; HASWELL-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [14:2.00]
515 ; HASWELL-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [20:2.00]
516 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
517 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
518 ;
519 ; HASWELL-LABEL: test_aesimc:
520 ; HASWELL: # %bb.0:
521 ; HASWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00]
522 ; HASWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [20:2.00]
523 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
524 ; HASWELL-NEXT: retq # sched: [7:1.00]
525 ;
526 ; BROADWELL-SSE-LABEL: test_aesimc:
527 ; BROADWELL-SSE: # %bb.0:
528 ; BROADWELL-SSE-NEXT: aesimc (%rdi), %xmm1 # sched: [19:2.00]
529 ; BROADWELL-SSE-NEXT: aesimc %xmm0, %xmm0 # sched: [14:2.00]
530 ; BROADWELL-SSE-NEXT: por %xmm0, %xmm1 # sched: [1:0.33]
531 ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
532 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
533 ;
534 ; BROADWELL-LABEL: test_aesimc:
535 ; BROADWELL: # %bb.0:
536 ; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [19:2.00]
537 ; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00]
538 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
539 ; BROADWELL-NEXT: retq # sched: [7:1.00]
540 ;
541 ; SKYLAKE-SSE-LABEL: test_aesimc:
542 ; SKYLAKE-SSE: # %bb.0:
543 ; SKYLAKE-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00]
544 ; SKYLAKE-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00]
545 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
546 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
547 ;
548 ; SKYLAKE-LABEL: test_aesimc:
549 ; SKYLAKE: # %bb.0:
550 ; SKYLAKE-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00]
551 ; SKYLAKE-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00]
552 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
553 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
554 ;
555 ; SKX-SSE-LABEL: test_aesimc:
556 ; SKX-SSE: # %bb.0:
557 ; SKX-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00]
558 ; SKX-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00]
559 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
560 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
561 ;
562 ; SKX-LABEL: test_aesimc:
563 ; SKX: # %bb.0:
564 ; SKX-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00]
565 ; SKX-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00]
566 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
567 ; SKX-NEXT: retq # sched: [7:1.00]
568 ;
569 ; BDVER2-SSE-LABEL: test_aesimc:
570 ; BDVER2-SSE: # %bb.0:
571 ; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [5:1.00]
572 ; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [10:1.00]
573 ; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
574 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
575 ;
576 ; BDVER2-LABEL: test_aesimc:
577 ; BDVER2: # %bb.0:
578 ; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [10:1.00]
579 ; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [5:1.00]
580 ; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
581 ; BDVER2-NEXT: retq # sched: [5:1.00]
582 ;
583 ; BTVER2-SSE-LABEL: test_aesimc:
584 ; BTVER2-SSE: # %bb.0:
585 ; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00]
586 ; BTVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [7:1.00]
587 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
588 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
589 ;
590 ; BTVER2-LABEL: test_aesimc:
591 ; BTVER2: # %bb.0:
592 ; BTVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [7:1.00]
593 ; BTVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [2:1.00]
594 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
595 ; BTVER2-NEXT: retq # sched: [4:1.00]
596 ;
597 ; ZNVER1-SSE-LABEL: test_aesimc:
598 ; ZNVER1-SSE: # %bb.0:
599 ; ZNVER1-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [4:0.50]
600 ; ZNVER1-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [11:0.50]
601 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
602 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
603 ;
604 ; ZNVER1-LABEL: test_aesimc:
605 ; ZNVER1: # %bb.0:
606 ; ZNVER1-NEXT: vaesimc (%rdi), %xmm1 # sched: [11:0.50]
607 ; ZNVER1-NEXT: vaesimc %xmm0, %xmm0 # sched: [4:0.50]
608 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
609 ; ZNVER1-NEXT: retq # sched: [1:0.50]
610 %1 = load <2 x i64>, <2 x i64> *%a1, align 16
611 %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
612 %3 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %1)
613 %4 = or <2 x i64> %2, %3
614 ret <2 x i64> %4
615 }
616 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>)
617
618 define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) {
619 ; GENERIC-LABEL: test_aeskeygenassist:
620 ; GENERIC: # %bb.0:
621 ; GENERIC-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
622 ; GENERIC-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
623 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
624 ; GENERIC-NEXT: retq # sched: [1:1.00]
625 ;
626 ; SLM-LABEL: test_aeskeygenassist:
627 ; SLM: # %bb.0:
628 ; SLM-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:5.00]
629 ; SLM-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:5.00]
630 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
631 ; SLM-NEXT: retq # sched: [4:1.00]
632 ;
633 ; SANDY-SSE-LABEL: test_aeskeygenassist:
634 ; SANDY-SSE: # %bb.0:
635 ; SANDY-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
636 ; SANDY-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
637 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
638 ; SANDY-SSE-NEXT: retq # sched: [1:1.00]
639 ;
640 ; SANDY-LABEL: test_aeskeygenassist:
641 ; SANDY: # %bb.0:
642 ; SANDY-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67]
643 ; SANDY-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33]
644 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
645 ; SANDY-NEXT: retq # sched: [1:1.00]
646 ;
647 ; HASWELL-SSE-LABEL: test_aeskeygenassist:
648 ; HASWELL-SSE: # %bb.0:
649 ; HASWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00]
650 ; HASWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [34:7.00]
651 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
652 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
653 ;
654 ; HASWELL-LABEL: test_aeskeygenassist:
655 ; HASWELL: # %bb.0:
656 ; HASWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00]
657 ; HASWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [34:7.00]
658 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
659 ; HASWELL-NEXT: retq # sched: [7:1.00]
660 ;
661 ; BROADWELL-SSE-LABEL: test_aeskeygenassist:
662 ; BROADWELL-SSE: # %bb.0:
663 ; BROADWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00]
664 ; BROADWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [33:7.00]
665 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
666 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
667 ;
668 ; BROADWELL-LABEL: test_aeskeygenassist:
669 ; BROADWELL: # %bb.0:
670 ; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00]
671 ; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [33:7.00]
672 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
673 ; BROADWELL-NEXT: retq # sched: [7:1.00]
674 ;
675 ; SKYLAKE-SSE-LABEL: test_aeskeygenassist:
676 ; SKYLAKE-SSE: # %bb.0:
677 ; SKYLAKE-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00]
678 ; SKYLAKE-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00]
679 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
680 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
681 ;
682 ; SKYLAKE-LABEL: test_aeskeygenassist:
683 ; SKYLAKE: # %bb.0:
684 ; SKYLAKE-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00]
685 ; SKYLAKE-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00]
686 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
687 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
688 ;
689 ; SKX-SSE-LABEL: test_aeskeygenassist:
690 ; SKX-SSE: # %bb.0:
691 ; SKX-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00]
692 ; SKX-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00]
693 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
694 ; SKX-SSE-NEXT: retq # sched: [7:1.00]
695 ;
696 ; SKX-LABEL: test_aeskeygenassist:
697 ; SKX: # %bb.0:
698 ; SKX-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00]
699 ; SKX-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00]
700 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
701 ; SKX-NEXT: retq # sched: [7:1.00]
702 ;
703 ; BDVER2-SSE-LABEL: test_aeskeygenassist:
704 ; BDVER2-SSE: # %bb.0:
705 ; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [5:1.00]
706 ; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [10:1.00]
707 ; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
708 ; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
709 ;
710 ; BDVER2-LABEL: test_aeskeygenassist:
711 ; BDVER2: # %bb.0:
712 ; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [10:1.00]
713 ; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [5:1.00]
714 ; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
715 ; BDVER2-NEXT: retq # sched: [5:1.00]
716 ;
717 ; BTVER2-SSE-LABEL: test_aeskeygenassist:
718 ; BTVER2-SSE: # %bb.0:
719 ; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00]
720 ; BTVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [7:1.00]
721 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
722 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
723 ;
724 ; BTVER2-LABEL: test_aeskeygenassist:
725 ; BTVER2: # %bb.0:
726 ; BTVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [7:1.00]
727 ; BTVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [2:1.00]
728 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
729 ; BTVER2-NEXT: retq # sched: [4:1.00]
730 ;
731 ; ZNVER1-SSE-LABEL: test_aeskeygenassist:
732 ; ZNVER1-SSE: # %bb.0:
733 ; ZNVER1-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [4:0.50]
734 ; ZNVER1-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [11:0.50]
735 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
736 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
737 ;
738 ; ZNVER1-LABEL: test_aeskeygenassist:
739 ; ZNVER1: # %bb.0:
740 ; ZNVER1-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [11:0.50]
741 ; ZNVER1-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [4:0.50]
742 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
743 ; ZNVER1-NEXT: retq # sched: [1:0.50]
744 %1 = load <2 x i64>, <2 x i64> *%a1, align 16
745 %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
746 %3 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %1, i8 7)
747 %4 = or <2 x i64> %2, %3
748 ret <2 x i64> %4
749 }
750 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+0
-6120
test/CodeGen/X86/avx-schedule.ll less more
None ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
11
12 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
13 ; GENERIC-LABEL: test_addpd:
14 ; GENERIC: # %bb.0:
15 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
16 ; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
17 ; GENERIC-NEXT: retq # sched: [1:1.00]
18 ;
19 ; SANDY-LABEL: test_addpd:
20 ; SANDY: # %bb.0:
21 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
22 ; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
23 ; SANDY-NEXT: retq # sched: [1:1.00]
24 ;
25 ; HASWELL-LABEL: test_addpd:
26 ; HASWELL: # %bb.0:
27 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
28 ; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
29 ; HASWELL-NEXT: retq # sched: [7:1.00]
30 ;
31 ; BROADWELL-LABEL: test_addpd:
32 ; BROADWELL: # %bb.0:
33 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
34 ; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
35 ; BROADWELL-NEXT: retq # sched: [7:1.00]
36 ;
37 ; SKYLAKE-LABEL: test_addpd:
38 ; SKYLAKE: # %bb.0:
39 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
40 ; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
41 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
42 ;
43 ; SKX-LABEL: test_addpd:
44 ; SKX: # %bb.0:
45 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
46 ; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
47 ; SKX-NEXT: retq # sched: [7:1.00]
48 ;
49 ; BDVER2-LABEL: test_addpd:
50 ; BDVER2: # %bb.0:
51 ; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
52 ; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
53 ; BDVER2-NEXT: retq # sched: [5:1.00]
54 ;
55 ; BTVER2-LABEL: test_addpd:
56 ; BTVER2: # %bb.0:
57 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
58 ; BTVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
59 ; BTVER2-NEXT: retq # sched: [4:1.00]
60 ;
61 ; ZNVER1-LABEL: test_addpd:
62 ; ZNVER1: # %bb.0:
63 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
64 ; ZNVER1-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
65 ; ZNVER1-NEXT: retq # sched: [1:0.50]
66 %1 = fadd <4 x double> %a0, %a1
67 %2 = load <4 x double>, <4 x double> *%a2, align 32
68 %3 = fadd <4 x double> %1, %2
69 ret <4 x double> %3
70 }
71
72 define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
73 ; GENERIC-LABEL: test_addps:
74 ; GENERIC: # %bb.0:
75 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
76 ; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
77 ; GENERIC-NEXT: retq # sched: [1:1.00]
78 ;
79 ; SANDY-LABEL: test_addps:
80 ; SANDY: # %bb.0:
81 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
82 ; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
83 ; SANDY-NEXT: retq # sched: [1:1.00]
84 ;
85 ; HASWELL-LABEL: test_addps:
86 ; HASWELL: # %bb.0:
87 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
88 ; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
89 ; HASWELL-NEXT: retq # sched: [7:1.00]
90 ;
91 ; BROADWELL-LABEL: test_addps:
92 ; BROADWELL: # %bb.0:
93 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
94 ; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
95 ; BROADWELL-NEXT: retq # sched: [7:1.00]
96 ;
97 ; SKYLAKE-LABEL: test_addps:
98 ; SKYLAKE: # %bb.0:
99 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
100 ; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
101 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
102 ;
103 ; SKX-LABEL: test_addps:
104 ; SKX: # %bb.0:
105 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
106 ; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
107 ; SKX-NEXT: retq # sched: [7:1.00]
108 ;
109 ; BDVER2-LABEL: test_addps:
110 ; BDVER2: # %bb.0:
111 ; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
112 ; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
113 ; BDVER2-NEXT: retq # sched: [5:1.00]
114 ;
115 ; BTVER2-LABEL: test_addps:
116 ; BTVER2: # %bb.0:
117 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
118 ; BTVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
119 ; BTVER2-NEXT: retq # sched: [4:1.00]
120 ;
121 ; ZNVER1-LABEL: test_addps:
122 ; ZNVER1: # %bb.0:
123 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
124 ; ZNVER1-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
125 ; ZNVER1-NEXT: retq # sched: [1:0.50]
126 %1 = fadd <8 x float> %a0, %a1
127 %2 = load <8 x float>, <8 x float> *%a2, align 32
128 %3 = fadd <8 x float> %1, %2
129 ret <8 x float> %3
130 }
131
132 define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
133 ; GENERIC-LABEL: test_addsubpd:
134 ; GENERIC: # %bb.0:
135 ; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
136 ; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
137 ; GENERIC-NEXT: retq # sched: [1:1.00]
138 ;
139 ; SANDY-LABEL: test_addsubpd:
140 ; SANDY: # %bb.0:
141 ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
142 ; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
143 ; SANDY-NEXT: retq # sched: [1:1.00]
144 ;
145 ; HASWELL-LABEL: test_addsubpd:
146 ; HASWELL: # %bb.0:
147 ; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
148 ; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
149 ; HASWELL-NEXT: retq # sched: [7:1.00]
150 ;
151 ; BROADWELL-LABEL: test_addsubpd:
152 ; BROADWELL: # %bb.0:
153 ; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
154 ; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
155 ; BROADWELL-NEXT: retq # sched: [7:1.00]
156 ;
157 ; SKYLAKE-LABEL: test_addsubpd:
158 ; SKYLAKE: # %bb.0:
159 ; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
160 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
161 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
162 ;
163 ; SKX-LABEL: test_addsubpd:
164 ; SKX: # %bb.0:
165 ; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
166 ; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
167 ; SKX-NEXT: retq # sched: [7:1.00]
168 ;
169 ; BDVER2-LABEL: test_addsubpd:
170 ; BDVER2: # %bb.0:
171 ; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
172 ; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
173 ; BDVER2-NEXT: retq # sched: [5:1.00]
174 ;
175 ; BTVER2-LABEL: test_addsubpd:
176 ; BTVER2: # %bb.0:
177 ; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
178 ; BTVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
179 ; BTVER2-NEXT: retq # sched: [4:1.00]
180 ;
181 ; ZNVER1-LABEL: test_addsubpd:
182 ; ZNVER1: # %bb.0:
183 ; ZNVER1-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
184 ; ZNVER1-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
185 ; ZNVER1-NEXT: retq # sched: [1:0.50]
186 %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
187 %2 = load <4 x double>, <4 x double> *%a2, align 32
188 %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2)
189 ret <4 x double> %3
190 }
191 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
192
193 define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
194 ; GENERIC-LABEL: test_addsubps:
195 ; GENERIC: # %bb.0:
196 ; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
197 ; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
198 ; GENERIC-NEXT: retq # sched: [1:1.00]
199 ;
200 ; SANDY-LABEL: test_addsubps:
201 ; SANDY: # %bb.0:
202 ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
203 ; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
204 ; SANDY-NEXT: retq # sched: [1:1.00]
205 ;
206 ; HASWELL-LABEL: test_addsubps:
207 ; HASWELL: # %bb.0:
208 ; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
209 ; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
210 ; HASWELL-NEXT: retq # sched: [7:1.00]
211 ;
212 ; BROADWELL-LABEL: test_addsubps:
213 ; BROADWELL: # %bb.0:
214 ; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
215 ; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
216 ; BROADWELL-NEXT: retq # sched: [7:1.00]
217 ;
218 ; SKYLAKE-LABEL: test_addsubps:
219 ; SKYLAKE: # %bb.0:
220 ; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
221 ; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
222 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
223 ;
224 ; SKX-LABEL: test_addsubps:
225 ; SKX: # %bb.0:
226 ; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
227 ; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
228 ; SKX-NEXT: retq # sched: [7:1.00]
229 ;
230 ; BDVER2-LABEL: test_addsubps:
231 ; BDVER2: # %bb.0:
232 ; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
233 ; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
234 ; BDVER2-NEXT: retq # sched: [5:1.00]
235 ;
236 ; BTVER2-LABEL: test_addsubps:
237 ; BTVER2: # %bb.0:
238 ; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
239 ; BTVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
240 ; BTVER2-NEXT: retq # sched: [4:1.00]
241 ;
242 ; ZNVER1-LABEL: test_addsubps:
243 ; ZNVER1: # %bb.0:
244 ; ZNVER1-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
245 ; ZNVER1-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
246 ; ZNVER1-NEXT: retq # sched: [1:0.50]
247 %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
248 %2 = load <8 x float>, <8 x float> *%a2, align 32
249 %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2)
250 ret <8 x float> %3
251 }
252 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
253
254 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
255 ; GENERIC-LABEL: test_andnotpd:
256 ; GENERIC: # %bb.0:
257 ; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
258 ; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
259 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
260 ; GENERIC-NEXT: retq # sched: [1:1.00]
261 ;
262 ; SANDY-LABEL: test_andnotpd:
263 ; SANDY: # %bb.0:
264 ; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
265 ; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
266 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
267 ; SANDY-NEXT: retq # sched: [1:1.00]
268 ;
269 ; HASWELL-LABEL: test_andnotpd:
270 ; HASWELL: # %bb.0:
271 ; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
272 ; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
273 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
274 ; HASWELL-NEXT: retq # sched: [7:1.00]
275 ;
276 ; BROADWELL-LABEL: test_andnotpd:
277 ; BROADWELL: # %bb.0:
278 ; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
279 ; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
280 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
281 ; BROADWELL-NEXT: retq # sched: [7:1.00]
282 ;
283 ; SKYLAKE-LABEL: test_andnotpd:
284 ; SKYLAKE: # %bb.0:
285 ; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
286 ; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
287 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
288 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
289 ;
290 ; SKX-LABEL: test_andnotpd:
291 ; SKX: # %bb.0:
292 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
293 ; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
294 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
295 ; SKX-NEXT: retq # sched: [7:1.00]
296 ;
297 ; BDVER2-LABEL: test_andnotpd:
298 ; BDVER2: # %bb.0:
299 ; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
300 ; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
301 ; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
302 ; BDVER2-NEXT: retq # sched: [5:1.00]
303 ;
304 ; BTVER2-LABEL: test_andnotpd:
305 ; BTVER2: # %bb.0:
306 ; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
307 ; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
308 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
309 ; BTVER2-NEXT: retq # sched: [4:1.00]
310 ;
311 ; ZNVER1-LABEL: test_andnotpd:
312 ; ZNVER1: # %bb.0:
313 ; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
314 ; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
315 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
316 ; ZNVER1-NEXT: retq # sched: [1:0.50]
317 %1 = bitcast <4 x double> %a0 to <4 x i64>
318 %2 = bitcast <4 x double> %a1 to <4 x i64>
319 %3 = xor <4 x i64> %1,
320 %4 = and <4 x i64> %3, %2
321 %5 = load <4 x double>, <4 x double> *%a2, align 32
322 %6 = bitcast <4 x double> %5 to <4 x i64>
323 %7 = xor <4 x i64> %4,
324 %8 = and <4 x i64> %6, %7
325 %9 = bitcast <4 x i64> %8 to <4 x double>
326 %10 = fadd <4 x double> %a1, %9
327 ret <4 x double> %10
328 }
329
330 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
331 ; GENERIC-LABEL: test_andnotps:
332 ; GENERIC: # %bb.0:
333 ; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
334 ; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
335 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
336 ; GENERIC-NEXT: retq # sched: [1:1.00]
337 ;
338 ; SANDY-LABEL: test_andnotps:
339 ; SANDY: # %bb.0:
340 ; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
341 ; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
342 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
343 ; SANDY-NEXT: retq # sched: [1:1.00]
344 ;
345 ; HASWELL-LABEL: test_andnotps:
346 ; HASWELL: # %bb.0:
347 ; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
348 ; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
349 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
350 ; HASWELL-NEXT: retq # sched: [7:1.00]
351 ;
352 ; BROADWELL-LABEL: test_andnotps:
353 ; BROADWELL: # %bb.0:
354 ; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
355 ; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
356 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
357 ; BROADWELL-NEXT: retq # sched: [7:1.00]
358 ;
359 ; SKYLAKE-LABEL: test_andnotps:
360 ; SKYLAKE: # %bb.0:
361 ; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
362 ; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
363 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
364 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
365 ;
366 ; SKX-LABEL: test_andnotps:
367 ; SKX: # %bb.0:
368 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
369 ; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
370 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
371 ; SKX-NEXT: retq # sched: [7:1.00]
372 ;
373 ; BDVER2-LABEL: test_andnotps:
374 ; BDVER2: # %bb.0:
375 ; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
376 ; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
377 ; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
378 ; BDVER2-NEXT: retq # sched: [5:1.00]
379 ;
380 ; BTVER2-LABEL: test_andnotps:
381 ; BTVER2: # %bb.0:
382 ; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
383 ; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
384 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
385 ; BTVER2-NEXT: retq # sched: [4:1.00]
386 ;
387 ; ZNVER1-LABEL: test_andnotps:
388 ; ZNVER1: # %bb.0:
389 ; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
390 ; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
391 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
392 ; ZNVER1-NEXT: retq # sched: [1:0.50]
393 %1 = bitcast <8 x float> %a0 to <4 x i64>
394 %2 = bitcast <8 x float> %a1 to <4 x i64>
395 %3 = xor <4 x i64> %1,
396 %4 = and <4 x i64> %3, %2
397 %5 = load <8 x float>, <8 x float> *%a2, align 32
398 %6 = bitcast <8 x float> %5 to <4 x i64>
399 %7 = xor <4 x i64> %4,
400 %8 = and <4 x i64> %6, %7
401 %9 = bitcast <4 x i64> %8 to <8 x float>
402 %10 = fadd <8 x float> %a1, %9
403 ret <8 x float> %10
404 }
405
406 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
407 ; GENERIC-LABEL: test_andpd:
408 ; GENERIC: # %bb.0:
409 ; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
410 ; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
411 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
412 ; GENERIC-NEXT: retq # sched: [1:1.00]
413 ;
414 ; SANDY-LABEL: test_andpd:
415 ; SANDY: # %bb.0:
416 ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
417 ; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
418 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
419 ; SANDY-NEXT: retq # sched: [1:1.00]
420 ;
421 ; HASWELL-LABEL: test_andpd:
422 ; HASWELL: # %bb.0:
423 ; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
424 ; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
425 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
426 ; HASWELL-NEXT: retq # sched: [7:1.00]
427 ;
428 ; BROADWELL-LABEL: test_andpd:
429 ; BROADWELL: # %bb.0:
430 ; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
431 ; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
432 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
433 ; BROADWELL-NEXT: retq # sched: [7:1.00]
434 ;
435 ; SKYLAKE-LABEL: test_andpd:
436 ; SKYLAKE: # %bb.0:
437 ; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
438 ; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
439 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
440 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
441 ;
442 ; SKX-LABEL: test_andpd:
443 ; SKX: # %bb.0:
444 ; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
445 ; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
446 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
447 ; SKX-NEXT: retq # sched: [7:1.00]
448 ;
449 ; BDVER2-LABEL: test_andpd:
450 ; BDVER2: # %bb.0:
451 ; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
452 ; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
453 ; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
454 ; BDVER2-NEXT: retq # sched: [5:1.00]
455 ;
456 ; BTVER2-LABEL: test_andpd:
457 ; BTVER2: # %bb.0:
458 ; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
459 ; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
460 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
461 ; BTVER2-NEXT: retq # sched: [4:1.00]
462 ;
463 ; ZNVER1-LABEL: test_andpd:
464 ; ZNVER1: # %bb.0:
465 ; ZNVER1-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
466 ; ZNVER1-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
467 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
468 ; ZNVER1-NEXT: retq # sched: [1:0.50]
469 %1 = bitcast <4 x double> %a0 to <4 x i64>
470 %2 = bitcast <4 x double> %a1 to <4 x i64>
471 %3 = and <4 x i64> %1, %2
472 %4 = load <4 x double>, <4 x double> *%a2, align 32
473 %5 = bitcast <4 x double> %4 to <4 x i64>
474 %6 = and <4 x i64> %3, %5
475 %7 = bitcast <4 x i64> %6 to <4 x double>
476 %8 = fadd <4 x double> %a1, %7
477 ret <4 x double> %8
478 }
479
480 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
481 ; GENERIC-LABEL: test_andps:
482 ; GENERIC: # %bb.0:
483 ; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
484 ; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
485 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
486 ; GENERIC-NEXT: retq # sched: [1:1.00]
487 ;
488 ; SANDY-LABEL: test_andps:
489 ; SANDY: # %bb.0:
490 ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
491 ; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
492 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
493 ; SANDY-NEXT: retq # sched: [1:1.00]
494 ;
495 ; HASWELL-LABEL: test_andps:
496 ; HASWELL: # %bb.0:
497 ; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
498 ; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
499 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
500 ; HASWELL-NEXT: retq # sched: [7:1.00]
501 ;
502 ; BROADWELL-LABEL: test_andps:
503 ; BROADWELL: # %bb.0:
504 ; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
505 ; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
506 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
507 ; BROADWELL-NEXT: retq # sched: [7:1.00]
508 ;
509 ; SKYLAKE-LABEL: test_andps:
510 ; SKYLAKE: # %bb.0:
511 ; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
512 ; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
513 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
514 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
515 ;
516 ; SKX-LABEL: test_andps:
517 ; SKX: # %bb.0:
518 ; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
519 ; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
520 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
521 ; SKX-NEXT: retq # sched: [7:1.00]
522 ;
523 ; BDVER2-LABEL: test_andps:
524 ; BDVER2: # %bb.0:
525 ; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
526 ; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
527 ; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
528 ; BDVER2-NEXT: retq # sched: [5:1.00]
529 ;
530 ; BTVER2-LABEL: test_andps:
531 ; BTVER2: # %bb.0:
532 ; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
533 ; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
534 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
535 ; BTVER2-NEXT: retq # sched: [4:1.00]
536 ;
537 ; ZNVER1-LABEL: test_andps:
538 ; ZNVER1: # %bb.0:
539 ; ZNVER1-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
540 ; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
541 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
542 ; ZNVER1-NEXT: retq # sched: [1:0.50]
543 %1 = bitcast <8 x float> %a0 to <4 x i64>
544 %2 = bitcast <8 x float> %a1 to <4 x i64>
545 %3 = and <4 x i64> %1, %2
546 %4 = load <8 x float>, <8 x float> *%a2, align 32
547 %5 = bitcast <8 x float> %4 to <4 x i64>
548 %6 = and <4 x i64> %3, %5
549 %7 = bitcast <4 x i64> %6 to <8 x float>
550 %8 = fadd <8 x float> %a1, %7
551 ret <8 x float> %8
552 }
553
554 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
555 ; GENERIC-LABEL: test_blendpd:
556 ; GENERIC: # %bb.0:
557 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
558 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
559 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
560 ; GENERIC-NEXT: retq # sched: [1:1.00]
561 ;
562 ; SANDY-LABEL: test_blendpd:
563 ; SANDY: # %bb.0:
564 ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
565 ; SANDY-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
566 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
567 ; SANDY-NEXT: retq # sched: [1:1.00]
568 ;
569 ; HASWELL-LABEL: test_blendpd:
570 ; HASWELL: # %bb.0:
571 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
572 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
573 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
574 ; HASWELL-NEXT: retq # sched: [7:1.00]
575 ;
576 ; BROADWELL-LABEL: test_blendpd:
577 ; BROADWELL: # %bb.0:
578 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
579 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:0.50]
580 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
581 ; BROADWELL-NEXT: retq # sched: [7:1.00]
582 ;
583 ; SKYLAKE-LABEL: test_blendpd:
584 ; SKYLAKE: # %bb.0:
585 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
586 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
587 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
588 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
589 ;
590 ; SKX-LABEL: test_blendpd:
591 ; SKX: # %bb.0:
592 ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
593 ; SKX-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
594 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
595 ; SKX-NEXT: retq # sched: [7:1.00]
596 ;
597 ; BDVER2-LABEL: test_blendpd:
598 ; BDVER2: # %bb.0:
599 ; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [2:1.00]
600 ; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:1.00]
601 ; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
602 ; BDVER2-NEXT: retq # sched: [5:1.00]
603 ;
604 ; BTVER2-LABEL: test_blendpd:
605 ; BTVER2: # %bb.0:
606 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
607 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [6:2.00]
608 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
609 ; BTVER2-NEXT: retq # sched: [4:1.00]
610 ;
611 ; ZNVER1-LABEL: test_blendpd:
612 ; ZNVER1: # %bb.0:
613 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
614 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
615 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
616 ; ZNVER1-NEXT: retq # sched: [1:0.50]
617 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32>
618 %2 = load <4 x double>, <4 x double> *%a2, align 32
619 %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32>
620 %4 = fadd <4 x double> %1, %3
621 ret <4 x double> %4
622 }
623
624 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
625 ; GENERIC-LABEL: test_blendps:
626 ; GENERIC: # %bb.0:
627 ; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
628 ; GENERIC-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
629 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
630 ; GENERIC-NEXT: retq # sched: [1:1.00]
631 ;
632 ; SANDY-LABEL: test_blendps:
633 ; SANDY: # %bb.0:
634 ; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
635 ; SANDY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
636 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
637 ; SANDY-NEXT: retq # sched: [1:1.00]
638 ;
639 ; HASWELL-LABEL: test_blendps:
640 ; HASWELL: # %bb.0:
641 ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
642 ; HASWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
643 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
644 ; HASWELL-NEXT: retq # sched: [7:1.00]
645 ;
646 ; BROADWELL-LABEL: test_blendps:
647 ; BROADWELL: # %bb.0:
648 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
649 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:0.50]
650 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
651 ; BROADWELL-NEXT: retq # sched: [7:1.00]
652 ;
653 ; SKYLAKE-LABEL: test_blendps:
654 ; SKYLAKE: # %bb.0:
655 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
656 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
657 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
658 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
659 ;
660 ; SKX-LABEL: test_blendps:
661 ; SKX: # %bb.0:
662 ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
663 ; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
664 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
665 ; SKX-NEXT: retq # sched: [7:1.00]
666 ;
667 ; BDVER2-LABEL: test_blendps:
668 ; BDVER2: # %bb.0:
669 ; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [2:1.00]
670 ; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:1.00]
671 ; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
672 ; BDVER2-NEXT: retq # sched: [5:1.00]
673 ;
674 ; BTVER2-LABEL: test_blendps:
675 ; BTVER2: # %bb.0:
676 ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
677 ; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00]
678 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
679 ; BTVER2-NEXT: retq # sched: [4:1.00]
680 ;
681 ; ZNVER1-LABEL: test_blendps:
682 ; ZNVER1: # %bb.0:
683 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
684 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
685 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
686 ; ZNVER1-NEXT: retq # sched: [1:0.50]
687 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32>
688 %2 = load <8 x float>, <8 x float> *%a2, align 32
689 %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32>
690 %4 = fadd <8 x float> %1, %3
691 ret <8 x float> %4
692 }
693
694 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
695 ; GENERIC-LABEL: test_blendvpd:
696 ; GENERIC: # %bb.0:
697 ; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
698 ; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
699 ; GENERIC-NEXT: retq # sched: [1:1.00]
700 ;
701 ; SANDY-LABEL: test_blendvpd:
702 ; SANDY: # %bb.0:
703 ; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
704 ; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
705 ; SANDY-NEXT: retq # sched: [1:1.00]
706 ;
707 ; HASWELL-LABEL: test_blendvpd:
708 ; HASWELL: # %bb.0:
709 ; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
710 ; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
711 ; HASWELL-NEXT: retq # sched: [7:1.00]
712 ;
713 ; BROADWELL-LABEL: test_blendvpd:
714 ; BROADWELL: # %bb.0:
715 ; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
716 ; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
717 ; BROADWELL-NEXT: retq # sched: [7:1.00]
718 ;
719 ; SKYLAKE-LABEL: test_blendvpd:
720 ; SKYLAKE: # %bb.0:
721 ; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
722 ; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
723 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
724 ;
725 ; SKX-LABEL: test_blendvpd:
726 ; SKX: # %bb.0:
727 ; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
728 ; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
729 ; SKX-NEXT: retq # sched: [7:1.00]
730 ;
731 ; BDVER2-LABEL: test_blendvpd:
732 ; BDVER2: # %bb.0:
733 ; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00]
734 ; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00]
735 ; BDVER2-NEXT: retq # sched: [5:1.00]
736 ;
737 ; BTVER2-LABEL: test_blendvpd:
738 ; BTVER2: # %bb.0:
739 ; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
740 ; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
741 ; BTVER2-NEXT: retq # sched: [4:1.00]
742 ;
743 ; ZNVER1-LABEL: test_blendvpd:
744 ; ZNVER1: # %bb.0:
745 ; ZNVER1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
746 ; ZNVER1-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
747 ; ZNVER1-NEXT: retq # sched: [1:0.50]
748 %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
749 %2 = load <4 x double>, <4 x double> *%a3, align 32
750 %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2)
751 ret <4 x double> %3
752 }
753 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
754
755 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
756 ; GENERIC-LABEL: test_blendvps:
757 ; GENERIC: # %bb.0:
758 ; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
759 ; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
760 ; GENERIC-NEXT: retq # sched: [1:1.00]
761 ;
762 ; SANDY-LABEL: test_blendvps:
763 ; SANDY: # %bb.0:
764 ; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
765 ; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
766 ; SANDY-NEXT: retq # sched: [1:1.00]
767 ;
768 ; HASWELL-LABEL: test_blendvps:
769 ; HASWELL: # %bb.0:
770 ; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
771 ; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
772 ; HASWELL-NEXT: retq # sched: [7:1.00]
773 ;
774 ; BROADWELL-LABEL: test_blendvps:
775 ; BROADWELL: # %bb.0:
776 ; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
777 ; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
778 ; BROADWELL-NEXT: retq # sched: [7:1.00]
779 ;
780 ; SKYLAKE-LABEL: test_blendvps:
781 ; SKYLAKE: # %bb.0:
782 ; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
783 ; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
784 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
785 ;
786 ; SKX-LABEL: test_blendvps:
787 ; SKX: # %bb.0:
788 ; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
789 ; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
790 ; SKX-NEXT: retq # sched: [7:1.00]
791 ;
792 ; BDVER2-LABEL: test_blendvps:
793 ; BDVER2: # %bb.0:
794 ; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00]
795 ; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00]
796 ; BDVER2-NEXT: retq # sched: [5:1.00]
797 ;
798 ; BTVER2-LABEL: test_blendvps:
799 ; BTVER2: # %bb.0:
800 ; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
801 ; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
802 ; BTVER2-NEXT: retq # sched: [4:1.00]
803 ;
804 ; ZNVER1-LABEL: test_blendvps:
805 ; ZNVER1: # %bb.0:
806 ; ZNVER1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
807 ; ZNVER1-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
808 ; ZNVER1-NEXT: retq # sched: [1:0.50]
809 %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
810 %2 = load <8 x float>, <8 x float> *%a3, align 32
811 %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2)
812 ret <8 x float> %3
813 }
814 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
815
816 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
817 ; GENERIC-LABEL: test_broadcastf128:
818 ; GENERIC: # %bb.0:
819 ; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
820 ; GENERIC-NEXT: retq # sched: [1:1.00]
821 ;
822 ; SANDY-LABEL: test_broadcastf128:
823 ; SANDY: # %bb.0:
824 ; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
825 ; SANDY-NEXT: retq # sched: [1:1.00]
826 ;
827 ; HASWELL-LABEL: test_broadcastf128:
828 ; HASWELL: # %bb.0:
829 ; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
830 ; HASWELL-NEXT: retq # sched: [7:1.00]
831 ;
832 ; BROADWELL-LABEL: test_broadcastf128:
833 ; BROADWELL: # %bb.0:
834 ; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50]
835 ; BROADWELL-NEXT: retq # sched: [7:1.00]
836 ;
837 ; SKYLAKE-LABEL: test_broadcastf128:
838 ; SKYLAKE: # %bb.0:
839 ; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
840 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
841 ;
842 ; SKX-LABEL: test_broadcastf128:
843 ; SKX: # %bb.0:
844 ; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
845 ; SKX-NEXT: retq # sched: [7:1.00]
846 ;
847 ; BDVER2-LABEL: test_broadcastf128:
848 ; BDVER2: # %bb.0:
849 ; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
850 ; BDVER2-NEXT: retq # sched: [5:1.00]
851 ;
852 ; BTVER2-LABEL: test_broadcastf128:
853 ; BTVER2: # %bb.0:
854 ; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00]
855 ; BTVER2-NEXT: retq # sched: [4:1.00]
856 ;
857 ; ZNVER1-LABEL: test_broadcastf128:
858 ; ZNVER1: # %bb.0:
859 ; ZNVER1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50]
860 ; ZNVER1-NEXT: retq # sched: [1:0.50]
861 %1 = load <4 x float>, <4 x float> *%a0, align 32
862 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32>
863 ret <8 x float> %2
864 }
865
866 define <4 x double> @test_broadcastsd_ymm(double *%a0) {
867 ; GENERIC-LABEL: test_broadcastsd_ymm:
868 ; GENERIC: # %bb.0:
869 ; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
870 ; GENERIC-NEXT: retq # sched: [1:1.00]
871 ;
872 ; SANDY-LABEL: test_broadcastsd_ymm:
873 ; SANDY: # %bb.0:
874 ; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
875 ; SANDY-NEXT: retq # sched: [1:1.00]
876 ;
877 ; HASWELL-LABEL: test_broadcastsd_ymm:
878 ; HASWELL: # %bb.0:
879 ; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
880 ; HASWELL-NEXT: retq # sched: [7:1.00]
881 ;
882 ; BROADWELL-LABEL: test_broadcastsd_ymm:
883 ; BROADWELL: # %bb.0:
884 ; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50]
885 ; BROADWELL-NEXT: retq # sched: [7:1.00]
886 ;
887 ; SKYLAKE-LABEL: test_broadcastsd_ymm:
888 ; SKYLAKE: # %bb.0:
889 ; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
890 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
891 ;
892 ; SKX-LABEL: test_broadcastsd_ymm:
893 ; SKX: # %bb.0:
894 ; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
895 ; SKX-NEXT: retq # sched: [7:1.00]
896 ;
897 ; BDVER2-LABEL: test_broadcastsd_ymm:
898 ; BDVER2: # %bb.0:
899 ; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
900 ; BDVER2-NEXT: retq # sched: [5:1.00]
901 ;
902 ; BTVER2-LABEL: test_broadcastsd_ymm:
903 ; BTVER2: # %bb.0:
904 ; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
905 ; BTVER2-NEXT: retq # sched: [4:1.00]
906 ;
907 ; ZNVER1-LABEL: test_broadcastsd_ymm:
908 ; ZNVER1: # %bb.0:
909 ; ZNVER1-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50]
910 ; ZNVER1-NEXT: retq # sched: [1:0.50]
911 %1 = load double, double *%a0, align 8
912 %2 = insertelement <4 x double> undef, double %1, i32 0
913 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
914 ret <4 x double> %3
915 }
916
917 define <4 x float> @test_broadcastss(float *%a0) {
918 ; GENERIC-LABEL: test_broadcastss:
919 ; GENERIC: # %bb.0:
920 ; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
921 ; GENERIC-NEXT: retq # sched: [1:1.00]
922 ;
923 ; SANDY-LABEL: test_broadcastss:
924 ; SANDY: # %bb.0:
925 ; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
926 ; SANDY-NEXT: retq # sched: [1:1.00]
927 ;
928 ; HASWELL-LABEL: test_broadcastss:
929 ; HASWELL: # %bb.0:
930 ; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
931 ; HASWELL-NEXT: retq # sched: [7:1.00]
932 ;
933 ; BROADWELL-LABEL: test_broadcastss:
934 ; BROADWELL: # %bb.0:
935 ; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50]
936 ; BROADWELL-NEXT: retq # sched: [7:1.00]
937 ;
938 ; SKYLAKE-LABEL: test_broadcastss:
939 ; SKYLAKE: # %bb.0:
940 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
941 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
942 ;
943 ; SKX-LABEL: test_broadcastss:
944 ; SKX: # %bb.0:
945 ; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
946 ; SKX-NEXT: retq # sched: [7:1.00]
947 ;
948 ; BDVER2-LABEL: test_broadcastss:
949 ; BDVER2: # %bb.0:
950 ; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [7:0.50]
951 ; BDVER2-NEXT: retq # sched: [5:1.00]
952 ;
953 ; BTVER2-LABEL: test_broadcastss:
954 ; BTVER2: # %bb.0:
955 ; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
956 ; BTVER2-NEXT: retq # sched: [4:1.00]
957 ;
958 ; ZNVER1-LABEL: test_broadcastss:
959 ; ZNVER1: # %bb.0:
960 ; ZNVER1-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [8:0.50]
961 ; ZNVER1-NEXT: retq # sched: [1:0.50]
962 %1 = load float, float *%a0, align 4
963 %2 = insertelement <4 x float> undef, float %1, i32 0
964 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
965 ret <4 x float> %3
966 }
967
968 define <8 x float> @test_broadcastss_ymm(float *%a0) {
969 ; GENERIC-LABEL: test_broadcastss_ymm:
970 ; GENERIC: # %bb.0:
971 ; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
972 ; GENERIC-NEXT: retq # sched: [1:1.00]
973 ;
974 ; SANDY-LABEL: test_broadcastss_ymm:
975 ; SANDY: # %bb.0:
976 ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
977 ; SANDY-NEXT: retq # sched: [1:1.00]
978 ;
979 ; HASWELL-LABEL: test_broadcastss_ymm:
980 ; HASWELL: # %bb.0:
981 ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
982 ; HASWELL-NEXT: retq # sched: [7:1.00]
983 ;
984 ; BROADWELL-LABEL: test_broadcastss_ymm:
985 ; BROADWELL: # %bb.0:
986 ; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50]
987 ; BROADWELL-NEXT: retq # sched: [7:1.00]
988 ;
989 ; SKYLAKE-LABEL: test_broadcastss_ymm:
990 ; SKYLAKE: # %bb.0:
991 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
992 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
993 ;
994 ; SKX-LABEL: test_broadcastss_ymm:
995 ; SKX: # %bb.0:
996 ; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
997 ; SKX-NEXT: retq # sched: [7:1.00]
998 ;
999 ; BDVER2-LABEL: test_broadcastss_ymm:
1000 ; BDVER2: # %bb.0:
1001 ; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
1002 ; BDVER2-NEXT: retq # sched: [5:1.00]
1003 ;
1004 ; BTVER2-LABEL: test_broadcastss_ymm:
1005 ; BTVER2: # %bb.0:
1006 ; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
1007 ; BTVER2-NEXT: retq # sched: [4:1.00]
1008 ;
1009 ; ZNVER1-LABEL: test_broadcastss_ymm:
1010 ; ZNVER1: # %bb.0:
1011 ; ZNVER1-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [8:0.50]
1012 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1013 %1 = load float, float *%a0, align 4
1014 %2 = insertelement <8 x float> undef, float %1, i32 0
1015 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
1016 ret <8 x float> %3
1017 }
1018
1019 define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
1020 ; GENERIC-LABEL: test_cmppd:
1021 ; GENERIC: # %bb.0:
1022 ; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1023 ; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1024 ; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1025 ; GENERIC-NEXT: retq # sched: [1:1.00]
1026 ;
1027 ; SANDY-LABEL: test_cmppd:
1028 ; SANDY: # %bb.0:
1029 ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1030 ; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1031 ; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1032 ; SANDY-NEXT: retq # sched: [1:1.00]
1033 ;
1034 ; HASWELL-LABEL: test_cmppd:
1035 ; HASWELL: # %bb.0:
1036 ; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1037 ; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1038 ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1039 ; HASWELL-NEXT: retq # sched: [7:1.00]
1040 ;
1041 ; BROADWELL-LABEL: test_cmppd:
1042 ; BROADWELL: # %bb.0:
1043 ; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1044 ; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
1045 ; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1046 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1047 ;
1048 ; SKYLAKE-LABEL: test_cmppd:
1049 ; SKYLAKE: # %bb.0:
1050 ; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
1051 ; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
1052 ; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1053 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1054 ;
1055 ; SKX-LABEL: test_cmppd:
1056 ; SKX: # %bb.0:
1057 ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
1058 ; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
1059 ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1060 ; SKX-NEXT: retq # sched: [7:1.00]
1061 ;
1062 ; BDVER2-LABEL: test_cmppd:
1063 ; BDVER2: # %bb.0:
1064 ; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
1065 ; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
1066 ; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
1067 ; BDVER2-NEXT: retq # sched: [5:1.00]
1068 ;
1069 ; BTVER2-LABEL: test_cmppd:
1070 ; BTVER2: # %bb.0:
1071 ; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
1072 ; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
1073 ; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1074 ; BTVER2-NEXT: retq # sched: [4:1.00]
1075 ;
1076 ; ZNVER1-LABEL: test_cmppd:
1077 ; ZNVER1: # %bb.0:
1078 ; ZNVER1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1079 ; ZNVER1-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1080 ; ZNVER1-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
1081 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1082 %1 = fcmp oeq <4 x double> %a0, %a1
1083 %2 = load <4 x double>, <4 x double> *%a2, align 32
1084 %3 = fcmp oeq <4 x double> %a0, %2
1085 %4 = sext <4 x i1> %1 to <4 x i64>
1086 %5 = sext <4 x i1> %3 to <4 x i64>
1087 %6 = or <4 x i64> %4, %5
1088 %7 = bitcast <4 x i64> %6 to <4 x double>
1089 ret <4 x double> %7
1090 }
1091
1092 define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
1093 ; GENERIC-LABEL: test_cmpps:
1094 ; GENERIC: # %bb.0:
1095 ; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1096 ; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1097 ; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1098 ; GENERIC-NEXT: retq # sched: [1:1.00]
1099 ;
1100 ; SANDY-LABEL: test_cmpps:
1101 ; SANDY: # %bb.0:
1102 ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1103 ; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1104 ; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1105 ; SANDY-NEXT: retq # sched: [1:1.00]
1106 ;
1107 ; HASWELL-LABEL: test_cmpps:
1108 ; HASWELL: # %bb.0:
1109 ; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1110 ; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1111 ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1112 ; HASWELL-NEXT: retq # sched: [7:1.00]
1113 ;
1114 ; BROADWELL-LABEL: test_cmpps:
1115 ; BROADWELL: # %bb.0:
1116 ; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1117 ; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
1118 ; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1119 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1120 ;
1121 ; SKYLAKE-LABEL: test_cmpps:
1122 ; SKYLAKE: # %bb.0:
1123 ; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
1124 ; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
1125 ; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1126 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1127 ;
1128 ; SKX-LABEL: test_cmpps:
1129 ; SKX: # %bb.0:
1130 ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
1131 ; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
1132 ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1133 ; SKX-NEXT: retq # sched: [7:1.00]
1134 ;
1135 ; BDVER2-LABEL: test_cmpps:
1136 ; BDVER2: # %bb.0:
1137 ; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
1138 ; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
1139 ; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
1140 ; BDVER2-NEXT: retq # sched: [5:1.00]
1141 ;
1142 ; BTVER2-LABEL: test_cmpps:
1143 ; BTVER2: # %bb.0:
1144 ; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
1145 ; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
1146 ; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1147 ; BTVER2-NEXT: retq # sched: [4:1.00]
1148 ;
1149 ; ZNVER1-LABEL: test_cmpps:
1150 ; ZNVER1: # %bb.0:
1151 ; ZNVER1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1152 ; ZNVER1-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1153 ; ZNVER1-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
1154 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1155 %1 = fcmp oeq <8 x float> %a0, %a1
1156 %2 = load <8 x float>, <8 x float> *%a2, align 32
1157 %3 = fcmp oeq <8 x float> %a0, %2
1158 %4 = sext <8 x i1> %1 to <8 x i32>
1159 %5 = sext <8 x i1> %3 to <8 x i32>
1160 %6 = or <8 x i32> %4, %5
1161 %7 = bitcast <8 x i32> %6 to <8 x float>
1162 ret <8 x float> %7
1163 }
1164
1165 define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
1166 ; GENERIC-LABEL: test_cvtdq2pd:
1167 ; GENERIC: # %bb.0:
1168 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
1169 ; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
1170 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1171 ; GENERIC-NEXT: retq # sched: [1:1.00]
1172 ;
1173 ; SANDY-LABEL: test_cvtdq2pd:
1174 ; SANDY: # %bb.0:
1175 ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
1176 ; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
1177 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1178 ; SANDY-NEXT: retq # sched: [1:1.00]
1179 ;
1180 ; HASWELL-LABEL: test_cvtdq2pd:
1181 ; HASWELL: # %bb.0:
1182 ; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
1183 ; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
1184 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1185 ; HASWELL-NEXT: retq # sched: [7:1.00]
1186 ;
1187 ; BROADWELL-LABEL: test_cvtdq2pd:
1188 ; BROADWELL: # %bb.0:
1189 ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00]
1190 ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
1191 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1192 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1193 ;
1194 ; SKYLAKE-LABEL: test_cvtdq2pd:
1195 ; SKYLAKE: # %bb.0:
1196 ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
1197 ; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
1198 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1199 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1200 ;
1201 ; SKX-LABEL: test_cvtdq2pd:
1202 ; SKX: # %bb.0:
1203 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
1204 ; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
1205 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1206 ; SKX-NEXT: retq # sched: [7:1.00]
1207 ;
1208 ; BDVER2-LABEL: test_cvtdq2pd:
1209 ; BDVER2: # %bb.0:
1210 ; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:2.00]
1211 ; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [8:2.00]
1212 ; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1213 ; BDVER2-NEXT: retq # sched: [5:1.00]
1214 ;
1215 ; BTVER2-LABEL: test_cvtdq2pd:
1216 ; BTVER2: # %bb.0:
1217 ; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
1218 ; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00]
1219 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1220 ; BTVER2-NEXT: retq # sched: [4:1.00]
1221 ;
1222 ; ZNVER1-LABEL: test_cvtdq2pd:
1223 ; ZNVER1: # %bb.0:
1224 ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
1225 ; ZNVER1-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00]
1226 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1227 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1228 %1 = sitofp <4 x i32> %a0 to <4 x double>
1229 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1230 %3 = sitofp <4 x i32> %2 to <4 x double>
1231 %4 = fadd <4 x double> %1, %3
1232 ret <4 x double> %4
1233 }
1234
1235 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
1236 ; GENERIC-LABEL: test_cvtdq2ps:
1237 ; GENERIC: # %bb.0:
1238 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1239 ; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
1240 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1241 ; GENERIC-NEXT: retq # sched: [1:1.00]
1242 ;
1243 ; SANDY-LABEL: test_cvtdq2ps:
1244 ; SANDY: # %bb.0:
1245 ; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1246 ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
1247 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
1248 ; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
1249 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1250 ; SANDY-NEXT: retq # sched: [1:1.00]
1251 ;
1252 ; HASWELL-LABEL: test_cvtdq2ps:
1253 ; HASWELL: # %bb.0:
1254 ; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1255 ; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
1256 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1257 ; HASWELL-NEXT: retq # sched: [7:1.00]
1258 ;
1259 ; BROADWELL-LABEL: test_cvtdq2ps:
1260 ; BROADWELL: # %bb.0:
1261 ; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1262 ; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00]
1263 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1264 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1265 ;
1266 ; SKYLAKE-LABEL: test_cvtdq2ps:
1267 ; SKYLAKE: # %bb.0:
1268 ; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
1269 ; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
1270 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1271 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1272 ;
1273 ; SKX-LABEL: test_cvtdq2ps:
1274 ; SKX: # %bb.0:
1275 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
1276 ; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
1277 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1278 ; SKX-NEXT: retq # sched: [7:1.00]
1279 ;
1280 ; BDVER2-LABEL: test_cvtdq2ps:
1281 ; BDVER2: # %bb.0:
1282 ; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:2.00]
1283 ; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:2.00]
1284 ; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1285 ; BDVER2-NEXT: retq # sched: [5:1.00]
1286 ;
1287 ; BTVER2-LABEL: test_cvtdq2ps:
1288 ; BTVER2: # %bb.0:
1289 ; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
1290 ; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00]
1291 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1292 ; BTVER2-NEXT: retq # sched: [4:1.00]
1293 ;
1294 ; ZNVER1-LABEL: test_cvtdq2ps:
1295 ; ZNVER1: # %bb.0:
1296 ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00]
1297 ; ZNVER1-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00]
1298 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1299 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1300 %1 = sitofp <8 x i32> %a0 to <8 x float>
1301 %2 = load <8 x i32>, <8 x i32> *%a1, align 16
1302 %3 = sitofp <8 x i32> %2 to <8 x float>
1303 %4 = fadd <8 x float> %1, %3
1304 ret <8 x float> %4
1305 }
1306
1307 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
1308 ; GENERIC-LABEL: test_cvtpd2dq:
1309 ; GENERIC: # %bb.0:
1310 ; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1311 ; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1312 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1313 ; GENERIC-NEXT: retq # sched: [1:1.00]
1314 ;
1315 ; SANDY-LABEL: test_cvtpd2dq:
1316 ; SANDY: # %bb.0:
1317 ; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1318 ; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1319 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1320 ; SANDY-NEXT: retq # sched: [1:1.00]
1321 ;
1322 ; HASWELL-LABEL: test_cvtpd2dq:
1323 ; HASWELL: # %bb.0:
1324 ; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1325 ; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1326 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1327 ; HASWELL-NEXT: retq # sched: [7:1.00]
1328 ;
1329 ; BROADWELL-LABEL: test_cvtpd2dq:
1330 ; BROADWELL: # %bb.0:
1331 ; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1332 ; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1333 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1334 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1335 ;
1336 ; SKYLAKE-LABEL: test_cvtpd2dq:
1337 ; SKYLAKE: # %bb.0:
1338 ; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1339 ; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1340 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1341 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1342 ;
1343 ; SKX-LABEL: test_cvtpd2dq:
1344 ; SKX: # %bb.0:
1345 ; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1346 ; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
1347 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1348 ; SKX-NEXT: retq # sched: [7:1.00]
1349 ;
1350 ; BDVER2-LABEL: test_cvtpd2dq:
1351 ; BDVER2: # %bb.0:
1352 ; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [13:2.00]
1353 ; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [8:2.00]
1354 ; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
1355 ; BDVER2-NEXT: retq # sched: [5:1.00]
1356 ;
1357 ; BTVER2-LABEL: test_cvtpd2dq:
1358 ; BTVER2: # %bb.0:
1359 ; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
1360 ; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00]
1361 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1362 ; BTVER2-NEXT: retq # sched: [4:1.00]
1363 ;
1364 ; ZNVER1-LABEL: test_cvtpd2dq:
1365 ; ZNVER1: # %bb.0:
1366 ; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
1367 ; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00]
1368 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1369 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1370 %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
1371 %2 = load <4 x double>, <4 x double> *%a1, align 32
1372 %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2)
1373 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32>
1374 ret <8 x i32> %4
1375 }
1376 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
1377
1378 define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
1379 ; GENERIC-LABEL: test_cvttpd2dq:
1380 ; GENERIC: # %bb.0:
1381 ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1382 ; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1383 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1384 ; GENERIC-NEXT: retq # sched: [1:1.00]
1385 ;
1386 ; SANDY-LABEL: test_cvttpd2dq:
1387 ; SANDY: # %bb.0:
1388 ; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1389 ; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1390 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1391 ; SANDY-NEXT: retq # sched: [1:1.00]
1392 ;
1393 ; HASWELL-LABEL: test_cvttpd2dq:
1394 ; HASWELL: # %bb.0:
1395 ; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1396 ; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1397 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1398 ; HASWELL-NEXT: retq # sched: [7:1.00]
1399 ;
1400 ; BROADWELL-LABEL: test_cvttpd2dq:
1401 ; BROADWELL: # %bb.0:
1402 ; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1403 ; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1404 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1405 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1406 ;
1407 ; SKYLAKE-LABEL: test_cvttpd2dq:
1408 ; SKYLAKE: # %bb.0:
1409 ; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1410 ; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1411 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1412 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1413 ;
1414 ; SKX-LABEL: test_cvttpd2dq:
1415 ; SKX: # %bb.0:
1416 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1417 ; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
1418 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1419 ; SKX-NEXT: retq # sched: [7:1.00]
1420 ;
1421 ; BDVER2-LABEL: test_cvttpd2dq:
1422 ; BDVER2: # %bb.0:
1423 ; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [13:2.00]
1424 ; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [8:2.00]
1425 ; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
1426 ; BDVER2-NEXT: retq # sched: [5:1.00]
1427 ;
1428 ; BTVER2-LABEL: test_cvttpd2dq:
1429 ; BTVER2: # %bb.0:
1430 ; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
1431 ; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00]
1432 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1433 ; BTVER2-NEXT: retq # sched: [4:1.00]
1434 ;
1435 ; ZNVER1-LABEL: test_cvttpd2dq:
1436 ; ZNVER1: # %bb.0:
1437 ; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
1438 ; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00]
1439 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1440 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1441 %1 = fptosi <4 x double> %a0 to <4 x i32>
1442 %2 = load <4 x double>, <4 x double> *%a1, align 32
1443 %3 = fptosi <4 x double> %2 to <4 x i32>
1444 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32>
1445 ret <8 x i32> %4
1446 }
1447
1448 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
1449 ; GENERIC-LABEL: test_cvtpd2ps:
1450 ; GENERIC: # %bb.0:
1451 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1452 ; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1453 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1454 ; GENERIC-NEXT: retq # sched: [1:1.00]
1455 ;
1456 ; SANDY-LABEL: test_cvtpd2ps:
1457 ; SANDY: # %bb.0:
1458 ; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1459 ; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1460 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1461 ; SANDY-NEXT: retq # sched: [1:1.00]
1462 ;
1463 ; HASWELL-LABEL: test_cvtpd2ps:
1464 ; HASWELL: # %bb.0:
1465 ; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
1466 ; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1467 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1468 ; HASWELL-NEXT: retq # sched: [7:1.00]
1469 ;
1470 ; BROADWELL-LABEL: test_cvtpd2ps:
1471 ; BROADWELL: # %bb.0:
1472 ; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
1473 ; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1474 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1475 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1476 ;
1477 ; SKYLAKE-LABEL: test_cvtpd2ps:
1478 ; SKYLAKE: # %bb.0:
1479 ; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1480 ; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1481 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1482 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1483 ;
1484 ; SKX-LABEL: test_cvtpd2ps:
1485 ; SKX: # %bb.0:
1486 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1487 ; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1488 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1489 ; SKX-NEXT: retq # sched: [7:1.00]
1490 ;
1491 ; BDVER2-LABEL: test_cvtpd2ps:
1492 ; BDVER2: # %bb.0:
1493 ; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [13:2.00]
1494 ; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [8:2.00]
1495 ; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
1496 ; BDVER2-NEXT: retq # sched: [5:1.00]
1497 ;
1498 ; BTVER2-LABEL: test_cvtpd2ps:
1499 ; BTVER2: # %bb.0:
1500 ; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
1501 ; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00]
1502 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1503 ; BTVER2-NEXT: retq # sched: [4:1.00]
1504 ;
1505 ; ZNVER1-LABEL: test_cvtpd2ps:
1506 ; ZNVER1: # %bb.0:
1507 ; ZNVER1-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1508 ; ZNVER1-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00]
1509 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1510 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1511 %1 = fptrunc <4 x double> %a0 to <4 x float>
1512 %2 = load <4 x double>, <4 x double> *%a1, align 32
1513 %3 = fptrunc <4 x double> %2 to <4 x float>
1514 %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32>
1515 ret <8 x float> %4
1516 }
1517
1518 define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
1519 ; GENERIC-LABEL: test_cvtps2dq:
1520 ; GENERIC: # %bb.0:
1521 ; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1522 ; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
1523 ; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1524 ; GENERIC-NEXT: retq # sched: [1:1.00]
1525 ;
1526 ; SANDY-LABEL: test_cvtps2dq:
1527 ; SANDY: # %bb.0:
1528 ; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1529 ; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
1530 <