llvm.org GIT mirror llvm / d3c452a
[X86] Clean up whitespace as well as minor coding style git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223339 91177308-0d34-0410-b5e6-96231b3b80d8 Michael Liao 5 years ago
33 changed file(s) with 405 addition(s) and 411 deletion(s). Raw diff Collapse all Expand all
8585 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
8686 SmallVector InfixOperatorStack;
8787 SmallVector PostfixStack;
88
88
8989 public:
9090 int64_t popOperand() {
9191 assert (!PostfixStack.empty() && "Poped an empty stack!");
9999 "Unexpected operand!");
100100 PostfixStack.push_back(std::make_pair(Op, Val));
101101 }
102
102
103103 void popOperator() { InfixOperatorStack.pop_back(); }
104104 void pushOperator(InfixCalculatorTok Op) {
105105 // Push the new operator if the stack is empty.
107107 InfixOperatorStack.push_back(Op);
108108 return;
109109 }
110
110
111111 // Push the new operator if it has a higher precedence than the operator
112112 // on the top of the stack or the operator on the top of the stack is a
113113 // left parentheses.
117117 InfixOperatorStack.push_back(Op);
118118 return;
119119 }
120
120
121121 // The operator on the top of the stack has higher precedence than the
122122 // new operator.
123123 unsigned ParenCount = 0;
125125 // Nothing to process.
126126 if (InfixOperatorStack.empty())
127127 break;
128
128
129129 Idx = InfixOperatorStack.size() - 1;
130130 StackOp = InfixOperatorStack[Idx];
131131 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
132132 break;
133
133
134134 // If we have an even parentheses count and we see a left parentheses,
135135 // then stop processing.
136136 if (!ParenCount && StackOp == IC_LPAREN)
137137 break;
138
138
139139 if (StackOp == IC_RPAREN) {
140140 ++ParenCount;
141141 InfixOperatorStack.pop_back();
157157 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
158158 PostfixStack.push_back(std::make_pair(StackOp, 0));
159159 }
160
160
161161 if (PostfixStack.empty())
162162 return 0;
163
163
164164 SmallVector OperandStack;
165165 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
166166 ICToken Op = PostfixStack[i];
262262 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
263263 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
264264 AddImmPrefix(addimmprefix) { Info.clear(); }
265
265
266266 unsigned getBaseReg() { return BaseReg; }
267267 unsigned getIndexReg() { return IndexReg; }
268268 unsigned getScale() { return Scale; }
11021102 (*I).Kind = AOK_Delete;
11031103 }
11041104 const char *SymLocPtr = SymName.data();
1105 // Skip everything before the symbol.
1105 // Skip everything before the symbol.
11061106 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
11071107 assert(Len > 0 && "Expected a non-negative length.");
11081108 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
11271127 // identifier. Don't try an parse it as a register.
11281128 if (Tok.getString().startswith("."))
11291129 break;
1130
1130
11311131 // If we're parsing an immediate expression, we don't expect a '['.
11321132 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
11331133 break;
11931193 MCSymbol *Sym =
11941194 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
11951195 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1196 const MCExpr *Val =
1196 const MCExpr *Val =
11971197 MCSymbolRefExpr::Create(Sym, Variant, getContext());
11981198 if (IDVal == "b" && Sym->isUndefined())
11991199 return Error(Loc, "invalid reference to undefined symbol");
12781278 const MCExpr *NewDisp;
12791279 if (ParseIntelDotOperator(Disp, NewDisp))
12801280 return nullptr;
1281
1281
12821282 End = Tok.getEndLoc();
12831283 Parser.Lex(); // Eat the field.
12841284 Disp = NewDisp;
5050
5151 #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
5252
53 namespace llvm {
54
53 namespace llvm {
54
5555 // Fill-ins to make the compiler happy. These constants are never actually
5656 // assigned; they are just filler to make an automatically-generated switch
5757 // statement work.
126126 static void logger(void* arg, const char* log) {
127127 if (!arg)
128128 return;
129
129
130130 raw_ostream &vStream = *(static_cast(arg));
131131 vStream << log << "\n";
132 }
133
132 }
133
134134 //
135135 // Public interface for the disassembler
136136 //
183183 }
184184
185185 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
186 /// immediate Value in the MCInst.
186 /// immediate Value in the MCInst.
187187 ///
188188 /// @param Value - The immediate Value, has had any PC adjustment made by
189189 /// the caller.
195195 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
196196 /// called then that function is called to get any symbolic information for the
197197 /// immediate in the instruction using the Address, Offset and Width. If that
198 /// returns non-zero then the symbolic information it returns is used to create
198 /// returns non-zero then the symbolic information it returns is used to create
199199 /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
200200 /// returns zero and isBranch is true then a symbol look up for immediate Value
201201 /// is done and if a symbol is found an MCExpr is created with that, else
203203 /// if it adds an operand to the MCInst and false otherwise.
204204 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
205205 uint64_t Address, uint64_t Offset,
206 uint64_t Width, MCInst &MI,
207 const MCDisassembler *Dis) {
206 uint64_t Width, MCInst &MI,
207 const MCDisassembler *Dis) {
208208 return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
209209 Offset, Width);
210210 }
214214 /// These can often be addresses in a literal pool. The Address of the
215215 /// instruction and its immediate Value are used to determine the address
216216 /// being referenced in the literal pool entry. The SymbolLookUp call back will
217 /// return a pointer to a literal 'C' string if the referenced address is an
217 /// return a pointer to a literal 'C' string if the referenced address is an
218218 /// address into a section with 'C' string literals.
219219 static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
220220 const void *Decoder) {
286286 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
287287 const OperandSpecifier &operand,
288288 InternalInstruction &insn,
289 const MCDisassembler *Dis) {
289 const MCDisassembler *Dis) {
290290 // Sign-extend the immediate if necessary.
291291
292292 OperandType type = (OperandType)operand.type;
406406 debug("A R/M register operand may not have a SIB byte");
407407 return true;
408408 }
409
409
410410 switch (insn.eaBase) {
411411 default:
412412 debug("Unexpected EA base register");
426426 ALL_REGS
427427 #undef ENTRY
428428 }
429
429
430430 return false;
431431 }
432432
439439 /// from.
440440 /// @return - 0 on success; nonzero otherwise
441441 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
442 const MCDisassembler *Dis) {
442 const MCDisassembler *Dis) {
443443 // Addresses in an MCInst are represented as five operands:
444 // 1. basereg (register) The R/M base, or (if there is a SIB) the
444 // 1. basereg (register) The R/M base, or (if there is a SIB) the
445445 // SIB base
446 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
446 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
447447 // scale amount
448448 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
449 // the index (which is multiplied by the
449 // the index (which is multiplied by the
450450 // scale amount)
451451 // 4. displacement (immediate) 0, or the displacement if there is one
452452 // 5. segmentreg (register) x86_registerNONE for now, but could be set
453453 // if we have segment overrides
454
454
455455 MCOperand baseReg;
456456 MCOperand scaleAmount;
457457 MCOperand indexReg;
458458 MCOperand displacement;
459459 MCOperand segmentReg;
460460 uint64_t pcrel = 0;
461
461
462462 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
463463 if (insn.sibBase != SIB_BASE_NONE) {
464464 switch (insn.sibBase) {
511511 (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
512512 SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
513513 IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
514 insn.sibIndex = (SIBIndex)(IndexBase +
514 insn.sibIndex = (SIBIndex)(IndexBase +
515515 (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
516516 }
517517
533533 } else {
534534 indexReg = MCOperand::CreateReg(0);
535535 }
536
536
537537 scaleAmount = MCOperand::CreateImm(insn.sibScale);
538538 } else {
539539 switch (insn.eaBase) {
552552 }
553553 else
554554 baseReg = MCOperand::CreateReg(0);
555
555
556556 indexReg = MCOperand::CreateReg(0);
557557 break;
558558 case EA_BASE_BX_SI:
583583 // placeholders to keep the compiler happy.
584584 #define ENTRY(x) \
585585 case EA_BASE_##x: \
586 baseReg = MCOperand::CreateReg(X86::x); break;
586 baseReg = MCOperand::CreateReg(X86::x); break;
587587 ALL_EA_BASES
588588 #undef ENTRY
589589 #define ENTRY(x) case EA_REG_##x:
594594 return true;
595595 }
596596 }
597
597
598598 scaleAmount = MCOperand::CreateImm(1);
599599 }
600
600
601601 displacement = MCOperand::CreateImm(insn.displacement);
602602
603603 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
604
604
605605 mcInst.addOperand(baseReg);
606606 mcInst.addOperand(scaleAmount);
607607 mcInst.addOperand(indexReg);
622622 /// from.
623623 /// @return - 0 on success; nonzero otherwise
624624 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
625 InternalInstruction &insn, const MCDisassembler *Dis) {
625 InternalInstruction &insn, const MCDisassembler *Dis) {
626626 switch (operand.type) {
627627 default:
628628 debug("Unexpected type for a R/M operand");
669669 return translateRMMemory(mcInst, insn, Dis);
670670 }
671671 }
672
672
673673 /// translateFPRegister - Translates a stack position on the FPU stack to its
674674 /// LLVM form, and appends it to an MCInst.
675675 ///
697697 return false;
698698 }
699699
700 /// translateOperand - Translates an operand stored in an internal instruction
700 /// translateOperand - Translates an operand stored in an internal instruction
701701 /// to LLVM's format and appends it to an MCInst.
702702 ///
703703 /// @param mcInst - The MCInst to append to.
706706 /// @return - false on success; true otherwise.
707707 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
708708 InternalInstruction &insn,
709 const MCDisassembler *Dis) {
709 const MCDisassembler *Dis) {
710710 switch (operand.encoding) {
711711 default:
712712 debug("Unhandled operand encoding during translation");
760760 insn, Dis);
761761 }
762762 }
763
763
764764 /// translateInstruction - Translates an internal instruction and all its
765765 /// operands to an MCInst.
766766 ///
769769 /// @return - false on success; true otherwise.
770770 static bool translateInstruction(MCInst &mcInst,
771771 InternalInstruction &insn,
772 const MCDisassembler *Dis) {
772 const MCDisassembler *Dis) {
773773 if (!insn.spec) {
774774 debug("Instruction has no specification");
775775 return true;
776776 }
777
777
778778 mcInst.setOpcode(insn.instructionID);
779779 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
780780 // prefix bytes should be disassembled as xrelease and xacquire then set the
785785 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
786786 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
787787 }
788
788
789789 insn.numImmediatesTranslated = 0;
790
790
791791 for (const auto &Op : insn.operands) {
792792 if (Op.encoding != ENCODING_NONE) {
793793 if (translateOperand(mcInst, Op, insn, Dis)) {
795795 }
796796 }
797797 }
798
798
799799 return false;
800800 }
801801
806806 return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
807807 }
808808
809 extern "C" void LLVMInitializeX86Disassembler() {
809 extern "C" void LLVMInitializeX86Disassembler() {
810810 // Register the disassembler.
811 TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
811 TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
812812 createX86Disassembler);
813813 TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
814814 createX86Disassembler);
162162 int64_t Address;
163163 if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
164164 O << formatHex((uint64_t)Address);
165 }
166 else {
165 } else {
167166 // Otherwise, just print the expression.
168167 O << *Op.getExpr();
169168 }
5555 void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
5656 printMemReference(MI, OpNo, O);
5757 }
58
58
5959 void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
6060 printMemReference(MI, OpNo, O);
6161 }
136136 private:
137137 bool HasCustomInstComment;
138138 };
139
139
140140 }
141141
142142 #endif
167167 const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
168168 const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
169169 const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
170
170
171171 // If this has a segment register, print it.
172172 if (SegReg.getReg()) {
173173 printOperand(MI, Op+X86::AddrSegmentReg, O);
174174 O << ':';
175175 }
176
176
177177 O << '[';
178
178
179179 bool NeedPlus = false;
180180 if (BaseReg.getReg()) {
181181 printOperand(MI, Op+X86::AddrBaseReg, O);
182182 NeedPlus = true;
183183 }
184
184
185185 if (IndexReg.getReg()) {
186186 if (NeedPlus) O << " + ";
187187 if (ScaleVal != 1)
208208 O << formatImm(DispVal);
209209 }
210210 }
211
211
212212 O << ']';
213213 }
214214
4747 O << "opaque ptr ";
4848 printMemReference(MI, OpNo, O);
4949 }
50
50
5151 void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
5252 O << "byte ptr ";
5353 printMemReference(MI, OpNo, O);
151151 printMemOffset(MI, OpNo, O);
152152 }
153153 };
154
154
155155 }
156156
157157 #endif
511511 // Defines a new offset for the CFA. E.g.
512512 //
513513 // With frame:
514 //
514 //
515515 // pushq %rbp
516516 // L0:
517517 // .cfi_def_cfa_offset 16
758758 (RegNo > X86::ZMM15 && RegNo <= X86::ZMM31));
759759 }
760760
761
761
762762 inline bool isX86_64NonExtLowByteReg(unsigned reg) {
763763 return (reg == X86::SPL || reg == X86::BPL ||
764764 reg == X86::SIL || reg == X86::DIL);
720720 // MemAddr, src1(VEX_4V), src2(ModR/M)
721721 // MemAddr, src1(ModR/M), imm8
722722 //
723 if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
723 if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
724724 X86::AddrBaseReg).getReg()))
725725 VEX_B = 0x0;
726726 if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
862862 EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
863863 }
864864 EncodeRC = true;
865 }
865 }
866866 break;
867867 case X86II::MRMDestReg:
868868 // MRMDestReg instructions forms:
3939 enum {
4040 X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
4141 };
42 }
43
42 }
43
4444 /// N86 namespace - Native X86 register numbers
4545 ///
4646 namespace N86 {
183183 if (A->isUndefined() || B->isUndefined()) {
184184 StringRef Name = A->isUndefined() ? A->getName() : B->getName();
185185 Asm.getContext().FatalError(Fixup.getLoc(),
186 "unsupported relocation with subtraction expression, symbol '" +
186 "unsupported relocation with subtraction expression, symbol '" +
187187 Name + "' can not be undefined in a subtraction expression");
188188 }
189189
195195 if (A_Base) {
196196 Index = A_Base->getIndex();
197197 IsExtern = 1;
198 }
199 else {
198 } else {
200199 Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
201200 IsExtern = 0;
202201 }
214213 if (B_Base) {
215214 Index = B_Base->getIndex();
216215 IsExtern = 1;
217 }
218 else {
216 } else {
219217 Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
220218 IsExtern = 0;
221219 }
1212
1313 Target llvm::TheX86_32Target, llvm::TheX86_64Target;
1414
15 extern "C" void LLVMInitializeX86TargetInfo() {
15 extern "C" void LLVMInitializeX86TargetInfo() {
1616 RegisterTarget
1717 X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
1818
330330 FeatureSlowSHLD]>;
331331 def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem,
332332 FeatureSlowSHLD]>;
333 def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
333 def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
334334 FeatureSlowSHLD]>;
335335 def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
336336 FeatureSlowSHLD]>;
408408 // be good for modern chips without enabling instruction set encodings past the
409409 // basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
410410 // modern 64-bit x86 chip, and enables features that are generally beneficial.
411 //
411 //
412412 // We currently use the Sandy Bridge model as the default scheduling model as
413413 // we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
414414 // covers a huge swath of x86 processors. If there are specific scheduling
136136
137137 unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
138138 unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
139 unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT);
139 unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
140140 unsigned fastMaterializeConstant(const Constant *C) override;
141141
142142 unsigned fastMaterializeAlloca(const AllocaInst *C) override;
543543
544544 // Ok, we need to do a load from a stub. If we've already loaded from
545545 // this stub, reuse the loaded pointer, otherwise emit the load now.
546 DenseMap*, unsigned>::iterator I = LocalValueMap.find(V);
546 DenseMap *, unsigned>::iterator I = LocalValueMap.find(V);
547547 unsigned LoadReg;
548548 if (I != LocalValueMap.end() && I->second != 0) {
549549 LoadReg = I->second;
654654 case Instruction::Alloca: {
655655 // Do static allocas.
656656 const AllocaInst *A = cast(V);
657 DenseMap*, int>::iterator SI =
657 DenseMap *, int>::iterator SI =
658658 FuncInfo.StaticAllocaMap.find(A);
659659 if (SI != FuncInfo.StaticAllocaMap.end()) {
660660 AM.BaseType = X86AddressMode::FrameIndexBase;
902902
903903 unsigned Alignment = S->getAlignment();
904904 unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
905 if (Alignment == 0) // Ensure that codegen never sees alignment 0
905 if (Alignment == 0) // Ensure that codegen never sees alignment 0
906906 Alignment = ABIAlignment;
907907 bool Aligned = Alignment >= ABIAlignment;
908908
10081008
10091009 // Make the copy.
10101010 unsigned DstReg = VA.getLocReg();
1011 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
1011 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
10121012 // Avoid a cross-class copy. This is very unlikely.
10131013 if (!SrcRC->contains(DstReg))
10141014 return false;
1015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1016 DstReg).addReg(SrcReg);
1015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1016 TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
10171017
10181018 // Add register to return instruction.
10191019 RetRegs.push_back(VA.getLocReg());
10291029 assert(Reg &&
10301030 "SRetReturnReg should have been set in LowerFormalArguments()!");
10311031 unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
1032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
1033 RetReg).addReg(Reg);
1032 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1033 TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
10341034 RetRegs.push_back(RetReg);
10351035 }
10361036
10371037 // Now emit the RET.
10381038 MachineInstrBuilder MIB =
1039 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
1039 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1040 TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
10401041 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
10411042 MIB.addReg(RetRegs[i], RegState::Implicit);
10421043 return true;
12821283 updateValueMap(I, ResultReg);
12831284 return true;
12841285 }
1285
12861286
12871287 bool X86FastISel::X86SelectBranch(const Instruction *I) {
12881288 // Unconditional branches are selected by tablegen-generated code.
16311631 TII.get(X86::MOV32r0), Zero32);
16321632
16331633 // Copy the zero into the appropriate sub/super/identical physical
1634 // register. Unfortunately the operations needed are not uniform enough to
1635 // fit neatly into the table above.
1634 // register. Unfortunately the operations needed are not uniform enough
1635 // to fit neatly into the table above.
16361636 if (VT.SimpleTy == MVT::i16) {
16371637 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
16381638 TII.get(Copy), TypeEntry.HighInReg)
17401740 EVT CmpVT = TLI.getValueType(CmpLHS->getType());
17411741 // Emit a compare of the LHS and RHS, setting the flags.
17421742 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
1743 return false;
1743 return false;
17441744
17451745 if (SETFOpc) {
17461746 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
18191819
18201820 if (I->getType() != CI->getOperand(0)->getType() ||
18211821 !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
1822 (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
1822 (Subtarget->hasSSE2() && RetVT == MVT::f64)))
18231823 return false;
18241824
18251825 const Value *CmpLHS = CI->getOperand(0);
20672067 const TargetRegisterClass *CopyRC =
20682068 (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
20692069 unsigned CopyReg = createResultReg(CopyRC);
2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
2071 CopyReg).addReg(InputReg);
2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2071 TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
20722072 InputReg = CopyReg;
20732073 }
20742074
21052105 VT = MVT::i32;
21062106 else if (Len >= 2)
21072107 VT = MVT::i16;
2108 else {
2108 else
21092109 VT = MVT::i8;
2110 }
21112110
21122111 unsigned Reg;
21132112 bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
25272526
25282527 if (!Subtarget->is64Bit())
25292528 return false;
2530
2529
25312530 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
25322531 unsigned GPRCnt = 0;
25332532 unsigned FPRCnt = 0;
33233322 if (!X86SelectAddress(Ptr, AM))
33243323 return false;
33253324
3326 const X86InstrInfo &XII = (const X86InstrInfo&)TII;
3325 const X86InstrInfo &XII = (const X86InstrInfo &)TII;
33273326
33283327 unsigned Size = DL.getTypeAllocSize(LI->getType());
33293328 unsigned Alignment = LI->getAlignment();
517517 X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
518518
519519 bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMacho());
520
520
521521 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
522522 // function, and use up to 128 bytes of stack space, don't have a frame
523523 // pointer, calls, or dynamic alloca then we do not need to adjust the
572572 // Calculate required stack adjustment.
573573 uint64_t FrameSize = StackSize - SlotSize;
574574 // If required, include space for extra hidden slot for stashing base pointer.
575 if (X86FI->getRestoreBasePointer())
575 if (X86FI->getRestoreBasePointer())
576576 FrameSize += SlotSize;
577577 if (RegInfo->needsStackRealignment(MF)) {
578578 // Callee-saved registers are pushed on stack before the stack
11511151 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
11521152 const MachineFrameInfo *MFI = MF.getFrameInfo();
11531153 // Does not include any dynamic realign.
1154 const uint64_t StackSize = MFI->getStackSize();
1154 const uint64_t StackSize = MFI->getStackSize();
11551155 {
11561156 #ifndef NDEBUG
11571157 const X86RegisterInfo *RegInfo =
11661166 // refer to arguments to the function which are stored in the *callers*
11671167 // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
11681168 // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
1169
1169
11701170 assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
11711171
11721172 // We don't handle tail calls, and shouldn't be seeing them
25032503 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
25042504 } else {
25052505 // Zero out the high part, effectively zero extending the input.
2506 SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
2506 SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
25072507 switch (NVT.SimpleTy) {
25082508 case MVT::i16:
25092509 ClrNode =
247247
248248 // Bypass expensive divides on Atom when compiling with O2.
249249 if (TM.getOptLevel() >= CodeGenOpt::Default) {
250 if (Subtarget->hasSlowDivide32())
250 if (Subtarget->hasSlowDivide32())
251251 addBypassSlowDiv(32, 8);
252252 if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
253253 addBypassSlowDiv(64, 16);
20022002 ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
20032003
20042004 assert(VA.getLocInfo() != CCValAssign::FPExt &&
2005 "Unexpected FP-extend for return value.");
2005 "Unexpected FP-extend for return value.");
20062006
20072007 // If this is x86-64, and we disabled SSE, we can't return FP values,
20082008 // or SSE or MMX vectors.
34913491 // In PIC we need an extra register to formulate the address computation
34923492 // for the callee.
34933493 unsigned MaxInRegs =
3494 (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
3494 (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
34953495
34963496 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
34973497 CCValAssign &VA = ArgLocs[i];
58145814 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
58155815 if (Zeroable[i])
58165816 continue;
5817
5817
58185818 SDValue Current = Op->getOperand(i);
58195819 SDValue SrcVector = Current->getOperand(0);
58205820 if (!V1.getNode())
63396339 AllContants = false;
63406340 NonConstIdx = idx;
63416341 NumNonConsts++;
6342 }
6343 else {
6342 } else {
63446343 NumConsts++;
63456344 if (cast(In)->getZExtValue())
63466345 Immediate |= (1ULL << idx);
63636362 MVT::getIntegerVT(VT.getSizeInBits()));
63646363 DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
63656364 }
6366 else
6365 else
63676366 DstVec = DAG.getUNDEF(VT);
63686367 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
63696368 Op.getOperand(NonConstIdx),
63866385
63876386 /// \brief Return true if \p N implements a horizontal binop and return the
63886387 /// operands for the horizontal binop into V0 and V1.
6389 ///
6388 ///
63906389 /// This is a helper function of PerformBUILD_VECTORCombine.
63916390 /// This function checks that the build_vector \p N in input implements a
63926391 /// horizontal operation. Parameter \p Opcode defines the kind of horizontal
64076406 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
64086407 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
64096408 "Invalid Vector in input!");
6410
6409
64116410 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
64126411 bool CanFold = true;
64136412 unsigned ExpectedVExtractIdx = BaseIdx;
64766475 }
64776476
64786477 /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
6479 /// a concat_vector.
6478 /// a concat_vector.
64806479 ///
64816480 /// This is a helper function of PerformBUILD_VECTORCombine.
64826481 /// This function expects two 256-bit vectors called V0 and V1.
64836482 /// At first, each vector is split into two separate 128-bit vectors.
64846483 /// Then, the resulting 128-bit vectors are used to implement two
6485 /// horizontal binary operations.
6484 /// horizontal binary operations.
64866485 ///
64876486 /// The kind of horizontal binary operation is defined by \p X86Opcode.
64886487 ///
66766675 // Try to match an SSE3 float HADD/HSUB.
66776676 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
66786677 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
6679
6678
66806679 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
66816680 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
66826681 } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
66836682 // Try to match an SSSE3 integer HADD/HSUB.
66846683 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
66856684 return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
6686
6685
66876686 if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
66886687 return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
66896688 }
6690
6689
66916690 if (!Subtarget->hasAVX())
66926691 return SDValue();
66936692
67386737 // Do this only if the target has AVX2.
67396738 if (Subtarget->hasAVX2())
67406739 return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
6741
6740
67426741 // Do not try to expand this build_vector into a pair of horizontal
67436742 // add/sub if we can emit a pair of scalar add/sub.
67446743 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
74927491 /// does not check for the profitability of lowering either as PALIGNR or
74937492 /// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
74947493 /// This matches shuffle vectors that look like:
7495 ///
7494 ///
74967495 /// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
7497 ///
7496 ///
74987497 /// Essentially it concatenates V1 and V2, shifts right by some number of
74997498 /// elements, and takes the low elements as the result. Note that while this is
75007499 /// specified as a *right shift* because x86 is little-endian, it is a *left
1278412783
1278512784 /// Insert one bit to mask vector, like v16i1 or v8i1.
1278612785 /// AVX-512 feature.
12787 SDValue
12786 SDValue
1278812787 X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
1278912788 SDLoc dl(Op);
1279012789 SDValue Vec = Op.getOperand(0);
1279712796 // insert element and then truncate the result.
1279812797 MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
1279912798 MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
12800 SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
12799 SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
1280112800 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
1280212801 DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
1280312802 return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
1354513544 }
1354613545 return SDValue();
1354713546 }
13548
13547
1354913548 assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
1355013549 "Unknown SINT_TO_FP to lower!");
1355113550
1419214191 In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
1419314192 InVT = ExtVT;
1419414193 }
14195
14194
1419614195 SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
1419714196 const Constant *C = (dyn_cast(Cst))->getConstantIntValue();
1419814197 SDValue CP = DAG.getConstantPool(C, getPointerTy());
1438614385 EltVT = VT.getVectorElementType();
1438714386 NumElts = VT.getVectorNumElements();
1438814387 }
14389
14388
1439014389 unsigned EltBits = EltVT.getSizeInBits();
1439114390 LLVMContext *Context = DAG.getContext();
1439214391 // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
1441314412 return DAG.getNode(ISD::BITCAST, dl, VT,
1441414413 DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
1441514414 }
14416
14415
1441714416 // If not vector, then scalar.
1441814417 unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
1441914418 SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
1487014869 if (Op0.getValueType() == MVT::i1)
1487114870 llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
1487214871 }
14873
14872
1487414873 if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
1487514874 Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
14876 // Do the comparison at i32 if it's smaller, besides the Atom case.
14877 // This avoids subregister aliasing issues. Keep the smaller reference
14878 // if we're optimizing for size, however, as that'll allow better folding
14875 // Do the comparison at i32 if it's smaller, besides the Atom case.
14876 // This avoids subregister aliasing issues. Keep the smaller reference
14877 // if we're optimizing for size, however, as that'll allow better folding
1487914878 // of memory operations.
1488014879 if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
1488114880 !DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
1493314932 return SDValue();
1493414933
1493514934 EVT VT = Op.getValueType();
14936
14935
1493714936 // SSE1 has rsqrtss and rsqrtps.
1493814937 // TODO: Add support for AVX512 (v16f32).
1493914938 // It is likely not profitable to do this for f64 because a double-precision
1496114960 // significant digits in the divisor.
1496214961 if (!Subtarget->useReciprocalEst())
1496314962 return SDValue();
14964
14963
1496514964 EVT VT = Op.getValueType();
14966
14965
1496714966 // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
1496814967 // TODO: Add support for AVX512 (v16f32).
1496914968 // It is likely not profitable to do this for f64 because a double-precision
1578215781
1578315782 ((Subtarget->hasDQI() && Subtarget->hasVLX() &&
1578415783 VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
15785
15784
1578615785 ((Subtarget->hasDQI() && VT.is512BitVector() &&
1578715786 VTElt.getSizeInBits() >= 32))))
1578815787 return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
15789
15788
1579015789 unsigned int NumElts = VT.getVectorNumElements();
1579115790
1579215791 if (NumElts != 8 && NumElts != 16)
1688016879 return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
1688116880 RoundingMode),
1688216881 Mask, Src0, Subtarget, DAG);
16883 }
16882 }
1688416883 case INTR_TYPE_2OP_MASK: {
1688516884 return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
1688616885 Op.getOperand(2)),
1688716886 Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
16888 }
16887 }
1688916888 case CMP_MASK:
1689016889 case CMP_MASK_CC: {
1689116890 // Comparison intrinsics with masks.
1748917488 switch(IntrData->Type) {
1749017489 default:
1749117490 llvm_unreachable("Unknown Intrinsic Type");
17492 break;
17491 break;
1749317492 case RDSEED:
1749417493 case RDRAND: {
1749517494 // Emit the node with the right value type.
1859918598 // If possible, lower this packed shift into a vector multiply instead of
1860018599 // expanding it into a sequence of scalar shifts.
1860118600 // Do this only if the vector shift count is a constant build_vector.
18602 if (Op.getOpcode() == ISD::SHL &&
18601 if (Op.getOpcode() == ISD::SHL &&
1860318602 (VT == MVT::v8i16 || VT == MVT::v4i32 ||
1860418603 (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
1860518604 ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
1869118690 CanBeSimplified = Amt2 == Amt->getOperand(j);
1869218691 }
1869318692 }
18694
18693
1869518694 if (CanBeSimplified && isa(Amt1) &&
1869618695 isa(Amt2)) {
1869718696 // Replace this node with two shifts followed by a MOVSS/MOVSD.
1869818697 EVT CastVT = MVT::v4i32;
18699 SDValue Splat1 =
18698 SDValue Splat1 =
1870018699 DAG.getConstant(cast(Amt1)->getAPIntValue(), VT);
1870118700 SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
18702 SDValue Splat2 =
18701 SDValue Splat2 =
1870318702 DAG.getConstant(cast(Amt2)->getAPIntValue(), VT);
1870418703 SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
1870518704 if (TargetOpcode == X86ISD::MOVSD)
2100821007 .addReg(restoreDstReg).addMBB(restoreMBB);
2100921008
2101021009 // restoreMBB:
21011 if (RegInfo->hasBasePointer(*MF)) {
21010 if (RegInfo->hasBasePointer(*MF)) {
2101221011 const X86Subtarget &STI = MF->getTarget().getSubtarget();
2101321012 const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
2101421013 X86MachineFunctionInfo *X86FI = MF->getInfo();
2109321092
2109421093 // Replace 213-type (isel default) FMA3 instructions with 231-type for
2109521094 // accumulator loops. Writing back to the accumulator allows the coalescer
21096 // to remove extra copies in the loop.
21095 // to remove extra copies in the loop.
2109721096 MachineBasicBlock *
2109821097 X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
2109921098 MachineBasicBlock *MBB) const {
2237922378 EVT SVT = BC0.getValueType();
2238022379 unsigned Opcode = BC0.getOpcode();
2238122380 unsigned NumElts = VT.getVectorNumElements();
22382
22381
2238322382 if (BC0.hasOneUse() && SVT.isVector() &&
2238422383 SVT.getVectorNumElements() * 2 == NumElts &&
2238522384 TLI.isOperationLegal(Opcode, VT)) {
2364523644 // fold (blend A, B, allOnes) -> B
2364623645 if (ISD::isBuildVectorAllOnes(Mask.getNode()))
2364723646 return Op1;
23648
23647
2364923648 // Simplify the case where the mask is a constant i32 value.
2365023649 if (ConstantSDNode *C = dyn_cast(Mask)) {
2365123650 if (C->isNullValue())
2631326312 // "load" ports instead of the dedicated "store" port.
2631426313 // E.g., on Haswell:
2631526314 // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
26316 // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
26315 // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
2631726316 if (isLegalAddressingMode(AM, Ty))
2631826317 // Scale represents reg2 * scale, thus account for 1
2631926318 // as soon as we use a second register.
2525 int NumEltsInVT = !if (!eq (NumElts, 1),
2626 !if (!eq (EltVT.Size, 32), 4,
2727 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts);
28
28
2929 string VTName = "v" # NumEltsInVT # EltVT;
3030
3131 // The vector VT.
661661 def Zrr : AVX5128I
662662 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
663663 []>, EVEX, EVEX_V512;
664 def Zkrr : AVX5128I
664 def Zkrr : AVX5128I
665665 (ins KRC:$mask, SrcRC:$src),
666 !strconcat(OpcodeStr,
666 !strconcat(OpcodeStr,
667667 "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
668668 []>, EVEX, EVEX_V512, EVEX_KZ;
669669 }
671671 defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
672672 defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
673673 VEX_W;
674
674
675675 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
676676 (VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
677677
709709 (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
710710 def krr : AVX5128I
711711 VR128X:$src),
712 !strconcat(OpcodeStr,
712 !strconcat(OpcodeStr,
713713 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
714714 [(set DstRC:$dst,
715715 (OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
717717 let mayLoad = 1 in {
718718 def rm : AVX5128I
719719 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
720 [(set DstRC:$dst,
720 [(set DstRC:$dst,
721721 (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
722722 def krm : AVX5128I
723723 x86memop:$src),
724 !strconcat(OpcodeStr,
724 !strconcat(OpcodeStr,
725725 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
726 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
726 [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
727727 (ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
728728 }
729729 }
776776 (VBROADCASTSSZr VR128X:$src)>;
777777 def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
778778 (VBROADCASTSDZr VR128X:$src)>;
779
779
780780 // Provide fallback in case the load node that is used in the patterns above
781781 // is used by additional users, which prevents the pattern selection.
782782 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
787787
788788 let Predicates = [HasAVX512] in {
789789 def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
790 (EXTRACT_SUBREG
790 (EXTRACT_SUBREG
791791 (v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
792792 addr:$src)), sub_ymm)>;
793793 }
801801 def Zrr : AVX512XS8I
802802 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
803803 []>, EVEX, EVEX_V512;
804
804
805805 let Predicates = [HasCDI, HasVLX] in {
806806 def Z128rr : AVX512XS8I
807807 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
883883 (VPERMILPDZri VR512:$src1, imm:$imm)>;
884884
885885 // -- VPERM - register form --
886 multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC,
886 multiclass avx512_perm opc, string OpcodeStr, RegisterClass RC,
887887 PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
888888
889889 def rr : AVX5128I
904904
905905 defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
906906 v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
907 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
907 defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
908908 v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
909909 let ExeDomain = SSEPackedSingle in
910910 defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
911911 v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
912912 let ExeDomain = SSEPackedDouble in
913 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
913 defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
914914 v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
915915
916916 // -- VPERM2I - 3 source operands form --
10491049 }
10501050
10511051 let ExeDomain = SSEPackedSingle in
1052 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
1052 defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
10531053 VK16WM, VR512, f512mem,
1054 memopv16f32, vselect, v16f32>,
1054 memopv16f32, vselect, v16f32>,
10551055 EVEX_CD8<32, CD8VF>, EVEX_V512;
10561056 let ExeDomain = SSEPackedDouble in
1057 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
1057 defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
10581058 VK8WM, VR512, f512mem,
1059 memopv8f64, vselect, v8f64>,
1059 memopv8f64, vselect, v8f64>,
10601060 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
10611061
10621062 def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
10691069 (VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
10701070 VR512:$src1, VR512:$src2)>;
10711071
1072 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
1073 VK16WM, VR512, f512mem,
1074 memopv16i32, vselect, v16i32>,
1072 defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
1073 VK16WM, VR512, f512mem,
1074 memopv16i32, vselect, v16i32>,
10751075 EVEX_CD8<32, CD8VF>, EVEX_V512;
10761076
1077 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
1078 VK8WM, VR512, f512mem,
1079 memopv8i64, vselect, v8i64>,
1077 defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
1078 VK8WM, VR512, f512mem,
1079 memopv8i64, vselect, v8i64>,
10801080 VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
10811081
10821082 def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
10921092 let Predicates = [HasAVX512] in {
10931093 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
10941094 (v8f32 VR256X:$src2))),
1095 (EXTRACT_SUBREG
1096 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1095 (EXTRACT_SUBREG
1096 (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
10971097 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
10981098 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
10991099
11001100 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
11011101 (v8i32 VR256X:$src2))),
1102 (EXTRACT_SUBREG
1103 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
1102 (EXTRACT_SUBREG
1103 (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
11041104 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
11051105 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
11061106 }
14931493 FROUND_NO_EXC)),
14941494 (COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
14951495 (I8Imm imm:$cc)), GR16)>;
1496
1496
14971497 def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
14981498 (v8f64 VR512:$src2), imm:$cc, (i8 -1),
14991499 FROUND_NO_EXC)),
23012301 // AVX-512 MOVSS, MOVSD
23022302 //===----------------------------------------------------------------------===//
23032303
2304 multiclass avx512_move_scalar
2304 multiclass avx512_move_scalar
23052305 SDNode OpNode, ValueType vt,
23062306 X86MemOperand x86memop, PatFrag mem_pat> {
23072307 let hasSideEffects = 0 in {
2308 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
2308 def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
23092309 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
23102310 [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
23112311 (scalar_to_vector RC:$src2))))],
23832383 // Move low f32 and clear high bits.
23842384 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
23852385 (SUBREG_TO_REG (i32 0),
2386 (VMOVSSZrr (v4f32 (V_SET0)),
2386 (VMOVSSZrr (v4f32 (V_SET0)),
23872387 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
23882388 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
23892389 (SUBREG_TO_REG (i32 0),
25122512 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
25132513 (ins VR128X:$src),
25142514 "vmovq\t{$src, $dst|$dst, $src}",
2515 [(set VR128X:$dst, (v2i64 (X86vzmovl
2515 [(set VR128X:$dst, (v2i64 (X86vzmovl
25162516 (v2i64 VR128X:$src))))],
25172517 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
25182518
25342534 (VMOV64toPQIZrr GR64:$src)>;
25352535 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
25362536 (VMOVDI2PDIZrr GR32:$src)>;
2537
2537
25382538 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
25392539 (VMOVDI2PDIZrm addr:$src)>;
25402540 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
29452945 def rr : AVX512BI
29462946 (ins RC:$src1, RC:$src2),
29472947 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2948 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
2948 [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
29492949 IIC_SSE_UNPCK>, EVEX_4V;
29502950 def rm : AVX512BI
29512951 (ins RC:$src1, x86memop:$src2),
29712971 //
29722972
29732973 multiclass avx512_pshuf_imm opc, string OpcodeStr, RegisterClass RC,
2974 SDNode OpNode, PatFrag mem_frag,
2974 SDNode OpNode, PatFrag mem_frag,
29752975 X86MemOperand x86memop, ValueType OpVT> {
29762976 def ri : AVX512Ii8
29772977 (ins RC:$src1, i8imm:$src2),
31073107 // AVX-512 VPTESTM instructions
31083108 //===----------------------------------------------------------------------===//
31093109
3110 multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC,
3111 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
3110 multiclass avx512_vptest opc, string OpcodeStr, RegisterClass KRC,
3111 RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
31123112 SDNode OpNode, ValueType vt> {
31133113 def rr : AVX512PI
3114 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
3114 (outs KRC:$dst), (ins RC:$src1, RC:$src2),
31153115 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
31163116 [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
31173117 SSEPackedInt>, EVEX_4V;
31183118 def rm : AVX512PI
3119 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
3119 (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
31203120 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3121 [(set KRC:$dst, (OpNode (vt RC:$src1),
3121 [(set KRC:$dst, (OpNode (vt RC:$src1),
31223122 (bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
31233123 }
31243124
31503150 // AVX-512 Shift instructions
31513151 //===----------------------------------------------------------------------===//
31523152 multiclass avx512_shift_rmi opc, Format ImmFormR, Format ImmFormM,
3153 string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
3153 string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
31543154 defm ri : AVX512_maskable
31553155 (ins _.RC:$src1, i8imm:$src2), OpcodeStr,
31563156 "$src2, $src1", "$src1, $src2",
31833183 defm Z : avx512_shift_rrm, EVEX_V512;
31843184 }
31853185
3186 multiclass avx512_varshift_types opcd, bits<8> opcq, string OpcodeStr,
3186 multiclass avx512_varshift_types opcd, bits<8> opcq, string OpcodeStr,
31873187 SDNode OpNode> {
3188 defm D : avx512_varshift_sizes
3189 v16i32_info>, EVEX_CD8<32, CD8VQ>;
3190 defm Q : avx512_varshift_sizes
3188 defm D : avx512_varshift_sizes
3189 v16i32_info>, EVEX_CD8<32, CD8VQ>;
3190 defm Q : avx512_varshift_sizes
31913191 v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
31923192 }
31933193
32363236 EVEX_4V;
32373237 }
32383238
3239 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
3239 defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
32403240 i512mem, memopv16i32>, EVEX_V512,
32413241 EVEX_CD8<32, CD8VF>;
3242 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
3242 defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
32433243 i512mem, memopv8i64>, EVEX_V512, VEX_W,
32443244 EVEX_CD8<64, CD8VF>;
3245 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
3245 defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
32463246 i512mem, memopv16i32>, EVEX_V512,
32473247 EVEX_CD8<32, CD8VF>;
3248 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
3248 defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
32493249 i512mem, memopv8i64>, EVEX_V512, VEX_W,
32503250 EVEX_CD8<64, CD8VF>;
3251 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
3251 defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
32523252 i512mem, memopv16i32>, EVEX_V512,
32533253 EVEX_CD8<32, CD8VF>;
3254 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
3254 defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
32553255 i512mem, memopv8i64>, EVEX_V512, VEX_W,
32563256 EVEX_CD8<64, CD8VF>;
32573257
32593259 // AVX-512 - MOVDDUP
32603260 //===----------------------------------------------------------------------===//
32613261
3262 multiclass avx512_movddup
3262 multiclass avx512_movddup
32633263 X86MemOperand x86memop, PatFrag memop_frag> {
32643264 def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
32653265 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
34643464
34653465 // Scalar FMA
34663466 let Constraints = "$src1 = $dst" in {
3467 multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode,
3468 RegisterClass RC, ValueType OpVT,
3469 X86MemOperand x86memop, Operand memop,
3467 multiclass avx512_fma3s_rm opc, string OpcodeStr, SDNode OpNode,
3468 RegisterClass RC, ValueType OpVT,
3469 X86MemOperand x86memop, Operand memop,
34703470 PatFrag mem_frag> {
34713471 let isCommutable = 1 in
34723472 def r : AVX512FMA3
37643764 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
37653765 Requires<[HasAVX512]>;
37663766
3767 multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC,
3768 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
3767 multiclass avx512_vcvt_fp_with_rc opc, string asm, RegisterClass SrcRC,
3768 RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
37693769 X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
37703770 Domain d> {
37713771 let hasSideEffects = 0 in {
38123812 EVEX_CD8<32, CD8VH>;
38133813 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
38143814 (VCVTPS2PDZrm addr:$src)>;
3815
3815
38163816 def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
38173817 (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
38183818 (VCVTPD2PSZrr VR512:$src)>;
38413841 EVEX_CD8<32, CD8VF>;
38423842
38433843 defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
3844 memopv8f64, f512mem, v8i32, v8f64,
3844 memopv8f64, f512mem, v8i32, v8f64,
38453845 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
38463846 EVEX_CD8<64, CD8VF>;
38473847
38593859 memopv8f64, f512mem, v8i32, v8f64,
38603860 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
38613861 EVEX_CD8<64, CD8VF>;
3862
3862
38633863 // cvttpd2udq (src, 0, mask-all-ones, sae-current)
38643864 def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
38653865 (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
38693869 memopv4i64, f256mem, v8f64, v8i32,
38703870 SSEPackedDouble>, EVEX_V512, XS,
38713871 EVEX_CD8<32, CD8VH>;
3872
3872
38733873 defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
38743874 memopv16i32, f512mem, v16f32, v16i32,
38753875 SSEPackedSingle>, EVEX_V512, XD,
38763876 EVEX_CD8<32, CD8VF>;
38773877
38783878 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
3879 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
3879 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
38803880 (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3881
3881
38823882 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
38833883 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
38843884 (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
38863886 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
38873887 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
38883888 (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
3889
3889
38903890 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
38913891 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
38923892 (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
40314031 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
40324032 }
40334033 }
4034
4034
40354035 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
40364036 multiclass avx512_fp14_s opc, string OpcodeStr, RegisterClass RC,
40374037 X86MemOperand x86memop> {
42094209 }
42104210
42114211 let Predicates = [HasERI], hasSideEffects = 0 in {
4212
4212
42134213 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
42144214 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
42154215 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
42484248 (ins VR128X:$src1, VR128X:$src2),
42494249 !strconcat(OpcodeStr,
42504250 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4251 [(set VR128X:$dst,
4251 [(set VR128X:$dst,
42524252 (F32Int VR128X:$src1, VR128X:$src2))],
42534253 itins_s.rr>, XS, EVEX_4V;
42544254 let mayLoad = 1 in {
42624262 (ins VR128X:$src1, ssmem:$src2),
42634263 !strconcat(OpcodeStr,
42644264 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4265 [(set VR128X:$dst,
4265 [(set VR128X:$dst,
42664266 (F32Int VR128X:$src1, sse_load_f32:$src2))],
42674267 itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
42684268 }
42764276 (ins VR128X:$src1, VR128X:$src2),
42774277 !strconcat(OpcodeStr,
42784278 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4279 [(set VR128X:$dst,
4279 [(set VR128X:$dst,
42804280 (F64Int VR128X:$src1, VR128X:$src2))],
42814281 itins_s.rr>, XD, EVEX_4V, VEX_W;
42824282 let mayLoad = 1 in {
42904290 (ins VR128X:$src1, sdmem:$src2),
42914291 !strconcat(OpcodeStr,
42924292 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4293 [(set VR128X:$dst,
4294 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
4293 [(set VR128X:$dst,
4294 (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
42954295 XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
42964296 }
42974297 }
43234323
43244324 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
43254325
4326 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4327 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
4326 defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
4327 int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
43284328 SSE_SQRTSS, SSE_SQRTSD>;
43294329
43304330 let Predicates = [HasAVX512] in {
43344334 def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
43354335 (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
43364336 (VSQRTPDZr VR512:$src1)>;
4337
4337
43384338 def : Pat<(f32 (fsqrt FR32X:$src)),
43394339 (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
43404340 def : Pat<(f32 (fsqrt (load addr:$src))),
44434443 (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
44444444 !strconcat(OpcodeStr,
44454445 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4446 [(set VR128X:$dst, (F32Int VR128X:$src1,
4446 [(set VR128X:$dst, (F32Int VR128X:$src1,
44474447 sse_load_f32:$src2, imm:$src3))]>,
44484448 EVEX_CD8<32, CD8VT1>;
44494449
45354535
45364536 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
45374537 SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
4538
4538
45394539 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
45404540 SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
45414541
46164616 []>, EVEX, EVEX_K;
46174617
46184618 }
4619 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
4619 defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
46204620 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
46214621 defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
46224622 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
47644764 defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
47654765 EVEX_V512, EVEX_CD8<32, CD8VT1>;
47664766 }
4767
4767
47684768 defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
47694769 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
47704770 defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
48294829
48304830 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
48314831 VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
4832
4832
48334833 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
48344834 VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
48354835
50035003 (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
50045004 (VPABSQZrr VR512:$src)>;
50055005
5006 multiclass avx512_conflict opc, string OpcodeStr,
5006 multiclass avx512_conflict opc, string OpcodeStr,
50075007 RegisterClass RC, RegisterClass KRC,
50085008 X86MemOperand x86memop,
50095009 X86MemOperand x86scalar_mop, string BrdcstStr> {
50365036 ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
50375037 BrdcstStr, "}"),
50385038 []>, EVEX, EVEX_KZ, EVEX_B;
5039
5039
50405040 let Constraints = "$src1 = $dst" in {
50415041 def rrk : AVX5128I
50425042 (ins RC:$src1, KRC:$mask, RC:$src2),
51295129 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
51305130 [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
51315131 }
5132
5132
51335133 multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo,
51345134 string OpcodeStr, Predicate prd> {
51355135 let Predicates = [prd] in
51515151 defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
51525152 HasDQI>, VEX_W;
51535153 }
5154
5154
51555155 defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
12791279 // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
12801280 // register class is constrained to GR8_NOREX. This pseudo is explicitly
12811281 // marked side-effect free, since it doesn't have an isel pattern like
1282 // other test instructions.
1282 // other test instructions.
12831283 let isPseudo = 1, hasSideEffects = 0 in
12841284 def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
12851285 "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
1616 // FPStack specific DAG Nodes.
1717 //===----------------------------------------------------------------------===//
1818
19 def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
19 def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
2020 SDTCisVT<1, f80>]>;
2121 def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
22 SDTCisPtrTy<1>,
22 SDTCisPtrTy<1>,
2323 SDTCisVT<2, OtherVT>]>;
2424 def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
25 SDTCisPtrTy<1>,
25 SDTCisPtrTy<1>,
2626 SDTCisVT<2, OtherVT>]>;
2727 def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
2828 SDTCisVT<2, OtherVT>]>;
9797 // All FP Stack operations are represented with four instructions here. The
9898 // first three instructions, generated by the instruction selector, use "RFP32"
9999 // "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
100 // 64-bit or 80-bit floating point values. These sizes apply to the values,
100 // 64-bit or 80-bit floating point values. These sizes apply to the values,
101101 // not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
102102 // copied to each other without losing information. These instructions are all
103103 // pseudo instructions and use the "_Fp" suffix.
106106 // The second instruction is defined with FPI, which is the actual instruction
107107 // emitted by the assembler. These use "RST" registers, although frequently
108108 // the actual register(s) used are implicit. These are always 80 bits.
109 // The FP stackifier pass converts one to the other after register allocation
109 // The FP stackifier pass converts one to the other after register allocation
110110 // occurs.
111111 //
112112 // Note that the FpI instruction should have instruction selection info (e.g.
138138 // These instructions cannot address 80-bit memory.
139139 multiclass FPBinary {
140140 // ST(0) = ST(0) + [mem]
141 def _Fp32m : FpIf32<(outs RFP32:$dst),
141 def _Fp32m : FpIf32<(outs RFP32:$dst),
142142 (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
143 [(set RFP32:$dst,
143 [(set RFP32:$dst,
144144 (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
145 def _Fp64m : FpIf64<(outs RFP64:$dst),
145 def _Fp64m : FpIf64<(outs RFP64:$dst),
146146 (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
147 [(set RFP64:$dst,
147 [(set RFP64:$dst,
148148 (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
149 def _Fp64m32: FpIf64<(outs RFP64:$dst),
149 def _Fp64m32: FpIf64<(outs RFP64:$dst),
150150 (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
151 [(set RFP64:$dst,
151 [(set RFP64:$dst,
152152 (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
153 def _Fp80m32: FpI_<(outs RFP80:$dst),
153 def _Fp80m32: FpI_<(outs RFP80:$dst),
154154 (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
155 [(set RFP80:$dst,
155 [(set RFP80:$dst,
156156 (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
157 def _Fp80m64: FpI_<(outs RFP80:$dst),
157 def _Fp80m64: FpI_<(outs RFP80:$dst),
158158 (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
159 [(set RFP80:$dst,
159 [(set RFP80:$dst,
160160 (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
161 def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
162 !strconcat("f", asmstring, "{s}\t$src")> {
163 let mayLoad = 1;
164 }
165 def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
166 !strconcat("f", asmstring, "{l}\t$src")> {
167 let mayLoad = 1;
161 def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
162 !strconcat("f", asmstring, "{s}\t$src")> {
163 let mayLoad = 1;
164 }
165 def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
166 !strconcat("f", asmstring, "{l}\t$src")> {
167 let mayLoad = 1;
168168 }
169169 // ST(0) = ST(0) + [memint]
170 def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
170 def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
171171 OneArgFPRW,
172172 [(set RFP32:$dst, (OpNode RFP32:$src1,
173173 (X86fild addr:$src2, i16)))]>;
174 def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
174 def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
175175 OneArgFPRW,
176176 [(set RFP32:$dst, (OpNode RFP32:$src1,
177177 (X86fild addr:$src2, i32)))]>;
178 def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
178 def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
179179 OneArgFPRW,
180180 [(set RFP64:$dst, (OpNode RFP64:$src1,
181181 (X86fild addr:$src2, i16)))]>;
182 def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
182 def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
183183 OneArgFPRW,
184184 [(set RFP64:$dst, (OpNode RFP64:$src1,
185185 (X86fild addr:$src2, i32)))]>;
186 def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
186 def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
187187 OneArgFPRW,
188188 [(set RFP80:$dst, (OpNode RFP80:$src1,
189189 (X86fild addr:$src2, i16)))]>;
190 def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
190 def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
191191 OneArgFPRW,
192192 [(set RFP80:$dst, (OpNode RFP80:$src1,
193193 (X86fild addr:$src2, i32)))]>;
194 def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
195 !strconcat("fi", asmstring, "{s}\t$src")> {
196 let mayLoad = 1;
197 }
198 def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
199 !strconcat("fi", asmstring, "{l}\t$src")> {
200 let mayLoad = 1;
194 def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
195 !strconcat("fi", asmstring, "{s}\t$src")> {
196 let mayLoad = 1;
197 }
198 def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
199 !strconcat("fi", asmstring, "{l}\t$src")> {
200 let mayLoad = 1;
201201 }
202202 }
203203
499499 IIC_FST>;
500500 def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
501501 IIC_FST>;
502 def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
502 def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
503503 "fisttp{ll}\t$dst", IIC_FST>;
504504 }
505505
635635 def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
636636 "fxsave\t$dst", [], IIC_FXSAVE>, TB;
637637 def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
638 "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
638 "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
639639 Requires<[In64BitMode]>;
640640 def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
641641 "fxrstor\t$src", [], IIC_FXRSTOR>, TB;
655655
656656 // Required for CALL which return f32 / f64 / f80 values.
657657 def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
658 def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
658 def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
659659 RFP64:$src)>;
660660 def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
661 def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
661 def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
662662 RFP80:$src)>;
663 def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
663 def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
664664 RFP80:$src)>;
665665 def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
666666 RFP80:$src)>;
326326 let Pattern = pattern;
327327 let CodeSize = 3;
328328 }
329 class Ii8 o, Format f, dag outs, dag ins, string asm,
329 class Ii8 o, Format f, dag outs, dag ins, string asm,
330330 list pattern, InstrItinClass itin = NoItinerary,
331331 Domain d = GenericDomain>
332332 : X86Inst {
333333 let Pattern = pattern;
334334 let CodeSize = 3;
335335 }
336 class Ii8PCRel o, Format f, dag outs, dag ins, string asm,
336 class Ii8PCRel o, Format f, dag outs, dag ins, string asm,
337337 list pattern, InstrItinClass itin = NoItinerary>
338338 : X86Inst {
339339 let Pattern = pattern;
340340 let CodeSize = 3;
341341 }
342 class Ii16 o, Format f, dag outs, dag ins, string asm,
342 class Ii16 o, Format f, dag outs, dag ins, string asm,
343343 list pattern, InstrItinClass itin = NoItinerary>
344344 : X86Inst {
345345 let Pattern = pattern;
346346 let CodeSize = 3;
347347 }
348 class Ii32 o, Format f, dag outs, dag ins, string asm,
348 class Ii32 o, Format f, dag outs, dag ins, string asm,
349349 list pattern, InstrItinClass itin = NoItinerary>
350350 : X86Inst {
351351 let Pattern = pattern;
358358 let CodeSize = 3;
359359 }
360360
361 class Ii16PCRel o, Format f, dag outs, dag ins, string asm,
361 class Ii16PCRel o, Format f, dag outs, dag ins, string asm,
362362 list pattern, InstrItinClass itin = NoItinerary>
363363 : X86Inst {
364364 let Pattern = pattern;
365365 let CodeSize = 3;
366366 }
367367
368 class Ii32PCRel o, Format f, dag outs, dag ins, string asm,
368 class Ii32PCRel o, Format f, dag outs, dag ins, string asm,
369369 list pattern, InstrItinClass itin = NoItinerary>
370370 : X86Inst {
371371 let Pattern = pattern;
392392 // Iseg16 - 16-bit segment selector, 16-bit offset
393393 // Iseg32 - 16-bit segment selector, 32-bit offset
394394
395 class Iseg16 o, Format f, dag outs, dag ins, string asm,
395 class Iseg16 o, Format f, dag outs, dag ins, string asm,
396396 list pattern, InstrItinClass itin = NoItinerary>
397397 : X86Inst {
398398 let Pattern = pattern;
399399 let CodeSize = 3;
400400 }
401401
402 class Iseg32 o, Format f, dag outs, dag ins, string asm,
402 class Iseg32 o, Format f, dag outs, dag ins, string asm,
403403 list pattern, InstrItinClass itin = NoItinerary>
404404 : X86Inst {
405405 let Pattern = pattern;
477477 }
478478
479479 // SSE1 Instruction Templates:
480 //
480 //
481481 // SSI - SSE1 instructions with XS prefix.
482482 // PSI - SSE1 instructions with PS prefix.
483483 // PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
508508 Requires<[HasAVX]>;
509509
510510 // SSE2 Instruction Templates:
511 //
511 //
512512 // SDI - SSE2 instructions with XD prefix.
513513 // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
514514 // S2SI - SSE2 instructions with XS prefix.
572572 : Ii8, XS, Requires<[HasSSE2]>;
573573
574574 // SSE3 Instruction Templates:
575 //
575 //
576576 // S3I - SSE3 instructions with PD prefixes.
577577 // S3SI - SSE3 instructions with XS prefix.
578578 // S3DI - SSE3 instructions with XD prefix.
579579
580 class S3SI o, Format F, dag outs, dag ins, string asm,
580 class S3SI o, Format F, dag outs, dag ins, string asm,
581581 list pattern, InstrItinClass itin = NoItinerary>
582582 : I, XS,
583583 Requires<[UseSSE3]>;
584 class S3DI o, Format F, dag outs, dag ins, string asm,
584 class S3DI o, Format F, dag outs, dag ins, string asm,
585585 list pattern, InstrItinClass itin = NoItinerary>
586586 : I, XD,
587587 Requires<[UseSSE3]>;
592592
593593
594594 // SSSE3 Instruction Templates:
595 //
595 //
596596 // SS38I - SSSE3 instructions with T8 prefix.
597597 // SS3AI - SSSE3 instructions with TA prefix.
598598 // MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
620620 Requires<[HasSSSE3]>;
621621
622622 // SSE4.1 Instruction Templates:
623 //
623 //
624624 // SS48I - SSE 4.1 instructions with T8 prefix.
625625 // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
626626 //
634634 Requires<[UseSSE41]>;
635635
636636 // SSE4.2 Instruction Templates:
637 //
637 //
638638 // SS428I - SSE 4.2 instructions with T8 prefix.
639639 class SS428I o, Format F, dag outs, dag ins, string asm,
640640 list pattern, InstrItinClass itin = NoItinerary>
867867 // MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
868868 // MMXID - MMX instructions with XD prefix.
869869 // MMXIS - MMX instructions with XS prefix.
870 class MMXI o, Format F, dag outs, dag ins, string asm,
870 class MMXI o, Format F, dag outs, dag ins, string asm,
871871 list pattern, InstrItinClass itin = NoItinerary>
872872 : I, PS, Requires<[HasMMX]>;
873 class MMXI32 o, Format F, dag outs, dag ins, string asm,
873 class MMXI32 o, Format F, dag outs, dag ins, string asm,
874874 list pattern, InstrItinClass itin = NoItinerary>
875875 : I, PS, Requires<[HasMMX,Not64BitMode]>;
876 class MMXI64 o, Format F, dag outs, dag ins, string asm,
876 class MMXI64 o, Format F, dag outs, dag ins, string asm,
877877 list pattern, InstrItinClass itin = NoItinerary>
878878 : I, PS, Requires<[HasMMX,In64BitMode]>;
879 class MMXRI o, Format F, dag outs, dag ins, string asm,
879 class MMXRI o, Format F, dag outs, dag ins, string asm,
880880 list pattern, InstrItinClass itin = NoItinerary>
881881 : I, PS, REX_W, Requires<[HasMMX]>;
882 class MMX2I o, Format F, dag outs, dag ins, string asm,
882 class MMX2I o, Format F, dag outs, dag ins, string asm,
883883 list pattern, InstrItinClass itin = NoItinerary>
884884 : I, PD, Requires<[HasMMX]>;
885 class MMXIi8 o, Format F, dag outs, dag ins, string asm,
885 class MMXIi8 o, Format F, dag outs, dag ins, string asm,
886886 list pattern, InstrItinClass itin = NoItinerary>
887887 : Ii8, PS, Requires<[HasMMX]>;
888 class MMXID o, Format F, dag outs, dag ins, string asm,
888 class MMXID o, Format F, dag outs, dag ins, string asm,
889889 list pattern, InstrItinClass itin = NoItinerary>
890890 : Ii8, XD, Requires<[HasMMX]>;
891 class MMXIS o, Format F, dag outs, dag ins, string asm,
891 class MMXIS o, Format F, dag outs, dag ins, string asm,
892892 list pattern, InstrItinClass itin = NoItinerary>
893893 : Ii8, XS, Requires<[HasMMX]>;
40984098 case X86::TEST8ri_NOREX:
40994099 MI->setDesc(get(X86::TEST8ri));
41004100 return true;
4101 case X86::KSET0B:
4101 case X86::KSET0B:
41024102 case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
41034103 case X86::KSET1B:
41044104 case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
187187 def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
188188 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
189189 def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
190 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
190 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
191191
192192 def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
193193 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
19811981
19821982 let Predicates = [HasLZCNT] in {
19831983 def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E),
1984 (X86cmp GR16:$src, (i16 0))),
1984 (X86cmp GR16:$src, (i16 0))),
19851985 (LZCNT16rr GR16:$src)>;
19861986 def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E),
19871987 (X86cmp GR32:$src, (i32 0))),
20002000 (LZCNT64rr GR64:$src)>;
20012001
20022002 def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
2003 (X86cmp (loadi16 addr:$src), (i16 0))),
2003 (X86cmp (loadi16 addr:$src), (i16 0))),
20042004 (LZCNT16rm addr:$src)>;
20052005 def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
2006 (X86cmp (loadi32 addr:$src), (i32 0))),
2006 (X86cmp (loadi32 addr:$src), (i32 0))),
20072007 (LZCNT32rm addr:$src)>;
20082008 def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
2009 (X86cmp (loadi64 addr:$src), (i64 0))),
2009 (X86cmp (loadi64 addr:$src), (i64 0))),
20102010 (LZCNT64rm addr:$src)>;
20112011 def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E),
2012 (X86cmp (loadi16 addr:$src), (i16 0))),
2012 (X86cmp (loadi16 addr:$src), (i16 0))),
20132013 (LZCNT16rm addr:$src)>;
20142014 def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E),
2015 (X86cmp (loadi32 addr:$src), (i32 0))),
2015 (X86cmp (loadi32 addr:$src), (i32 0))),
20162016 (LZCNT32rm addr:$src)>;
20172017 def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E),
2018 (X86cmp (loadi64 addr:$src), (i64 0))),
2018 (X86cmp (loadi64 addr:$src), (i64 0))),
20192019 (LZCNT64rm addr:$src)>;
20202020 }
20212021
21162116 (TZCNT64rr GR64:$src)>;
21172117
21182118 def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
2119 (X86cmp (loadi16 addr:$src), (i16 0))),
2119 (X86cmp (loadi16 addr:$src), (i16 0))),
21202120 (TZCNT16rm addr:$src)>;
21212121 def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
2122 (X86cmp (loadi32 addr:$src), (i32 0))),
2122 (X86cmp (loadi32 addr:$src), (i32 0))),
21232123 (TZCNT32rm addr:$src)>;
21242124 def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
2125 (X86cmp (loadi64 addr:$src), (i64 0))),
2125 (X86cmp (loadi64 addr:$src), (i64 0))),
21262126 (TZCNT64rm addr:$src)>;
21272127 def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E),
2128 (X86cmp (loadi16 addr:$src), (i16 0))),
2128 (X86cmp (loadi16 addr:$src), (i16 0))),
21292129 (TZCNT16rm addr:$src)>;
21302130 def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E),
2131 (X86cmp (loadi32 addr:$src), (i32 0))),
2131 (X86cmp (loadi32 addr:$src), (i32 0))),
21322132 (TZCNT32rm addr:$src)>;
21332133 def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E),
2134 (X86cmp (loadi64 addr:$src), (i64 0))),
2134 (X86cmp (loadi64 addr:$src), (i64 0))),
21352135 (TZCNT64rm addr:$src)>;
21362136 }
21372137
170170 multiclass ssse3_palign_mm {
171171 def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
172172 (ins VR64:$src1, VR64:$src2, i8imm:$src3),
173 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
173 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
174174 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
175175 Sched<[WriteShuffle]>;
176176 def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
219219 // Data Transfer Instructions
220220 def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
221221 "movd\t{$src, $dst|$dst, $src}",
222 [(set VR64:$dst,
222 [(set VR64:$dst,
223223 (x86mmx (scalar_to_vector GR32:$src)))],
224224 IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
225225 let canFoldAsLoad = 1 in
253253 let SchedRW = [WriteMove] in {
254254 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
255255 (outs GR64:$dst), (ins VR64:$src),
256 "movd\t{$src, $dst|$dst, $src}",
256 "movd\t{$src, $dst|$dst, $src}",
257257 [(set GR64:$dst,
258258 (bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
259259 let hasSideEffects = 0 in
485485 MMX_INTALU_ITINS>;
486486
487487 // -- Unpack Instructions
488 defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
488 defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
489489 int_x86_mmx_punpckhbw,
490490 MMX_UNPCK_H_ITINS>;
491 defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
491 defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
492492 int_x86_mmx_punpckhwd,
493493 MMX_UNPCK_H_ITINS>;
494 defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
494 defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
495495 int_x86_mmx_punpckhdq,
496496 MMX_UNPCK_H_ITINS>;
497 defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
497 defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
498498 int_x86_mmx_punpcklbw,
499499 MMX_UNPCK_L_ITINS>;
500 defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
500 defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
501501 int_x86_mmx_punpcklwd,
502502 MMX_UNPCK_L_ITINS>;
503503 defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
565565 IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
566566 let Constraints = "$src1 = $dst" in {
567567 def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
568 (outs VR64:$dst),
568 (outs VR64:$dst),
569569 (ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
570570 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
571571 [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
30983098 // previously we generated:
30993099 // addss %xmm0, %xmm1
31003100 // movss %xmm1, %xmm0
3101 //
3101 //
31023102 // we now generate:
31033103 // addss %xmm1, %xmm0
31043104
33203320 // previously we generated:
33213321 // addps %xmm0, %xmm1
33223322 // movss %xmm1, %xmm0
3323 //
3323 //
33243324 // we now generate:
33253325 // addss %xmm1, %xmm0
33263326
33283328 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
33293329 (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
33303330 (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3331 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3331 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
33323332 (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
33333333 (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
33343334 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
33353335 (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
33363336 (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
3337 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
3337 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
33383338 (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
33393339 (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
33403340 }
33633363 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
33643364 (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
33653365 (ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3366 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3366 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
33673367 (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
33683368 (SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
33693369 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
33703370 (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
33713371 (MULSSrr_Int v4f32:$dst, v4f32:$src)>;
3372 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3372 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
33733373 (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
33743374 (DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
33753375
34343434 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
34353435 (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
34363436 (VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
3437 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3437 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
34383438 (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
34393439 (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
34403440 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
34413441 (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
34423442 (VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
3443 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
3443 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
34443444 (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
34453445 (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
34463446
39793979 let SchedRW = [WriteNop] in {
39803980 // Pause. This "instruction" is encoded as "rep; nop", so even though it
39813981 // was introduced with SSE2, it's backward compatible.
3982 def PAUSE : I<0x90, RawFrm, (outs), (ins),
3983 "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
3982 def PAUSE : I<0x90, RawFrm, (outs), (ins),
3983 "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
39843984 OBXS, Requires<[HasSSE2]>;
39853985 }
39863986
64276427 def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
64286428 (VPMOVSXDQYrm addr:$src)>;
64296429
6430 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
6430 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
64316431 (scalar_to_vector (loadi64 addr:$src))))))),
64326432 (VPMOVSXBDYrm addr:$src)>;
6433 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
6433 def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
64346434 (scalar_to_vector (loadf64 addr:$src))))))),
64356435 (VPMOVSXBDYrm addr:$src)>;
64366436
6437 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
6437 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
64386438 (scalar_to_vector (loadi64 addr:$src))))))),
64396439 (VPMOVSXWQYrm addr:$src)>;
6440 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
6440 def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
64416441 (scalar_to_vector (loadf64 addr:$src))))))),
64426442 (VPMOVSXWQYrm addr:$src)>;
64436443
6444 def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
6444 def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
64456445 (scalar_to_vector (loadi32 addr:$src))))))),
64466446 (VPMOVSXBQYrm addr:$src)>;
64476447 }
288288 "sar{w}\t{%cl, $dst|$dst, cl}",
289289 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
290290 IIC_SR>, OpSize16;
291 def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
291 def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
292292 "sar{l}\t{%cl, $dst|$dst, cl}",
293293 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
294294 IIC_SR>, OpSize32;
295 def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
295 def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
296296 "sar{q}\t{%cl, $dst|$dst, cl}",
297297 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
298298 IIC_SR>;
346346 let Uses = [CL] in
347347 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
348348 "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
349
349
350350 def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
351351 "rcl{w}\t$dst", [], IIC_SR>, OpSize16;
352352 def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
380380 let Uses = [CL] in
381381 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
382382 "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
383
383
384384 def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
385385 "rcr{w}\t$dst", [], IIC_SR>, OpSize16;
386386 def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
396396 let Uses = [CL] in
397397 def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
398398 "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
399
399
400400 def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
401401 "rcr{q}\t$dst", [], IIC_SR>;
402402 def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
492492 "rol{l}\t{$src2, $dst|$dst, $src2}",
493493 [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
494494 IIC_SR>, OpSize32;
495 def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
495 def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
496496 (ins GR64:$src1, i8imm:$src2),
497497 "rol{q}\t{$src2, $dst|$dst, $src2}",
498498 [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
599599 "ror{l}\t{$src2, $dst|$dst, $src2}",
600600 [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
601601 IIC_SR>, OpSize32;
602 def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
602 def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
603603 (ins GR64:$src1, i8imm:$src2),
604604 "ror{q}\t{$src2, $dst|$dst, $src2}",
605605 [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
634634 "ror{w}\t{%cl, $dst|$dst, cl}",
635635 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
636636 IIC_SR>, OpSize16;
637 def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
637 def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
638638 "ror{l}\t{%cl, $dst|$dst, cl}",
639639 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
640640 IIC_SR>, OpSize32;
641 def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
641 def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
642642 "ror{q}\t{%cl, $dst|$dst, cl}",
643643 [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
644644 IIC_SR>;
687687 let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
688688
689689 let Uses = [CL] in {
690 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
690 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
691691 (ins GR16:$src1, GR16:$src2),
692692 "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
693693 [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
694694 IIC_SHD16_REG_CL>,
695695 TB, OpSize16;
696 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
696 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
697697 (ins GR16:$src1, GR16:$src2),
698698 "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
699699 [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
700700 IIC_SHD16_REG_CL>,
701701 TB, OpSize16;
702 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
702 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
703703 (ins GR32:$src1, GR32:$src2),
704704 "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
705705 [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
709709 "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
710710 [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
711711 IIC_SHD32_REG_CL>, TB, OpSize32;
712 def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
712 def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
713713 (ins GR64:$src1, GR64:$src2),
714714 "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
715715 [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
716 IIC_SHD64_REG_CL>,
716 IIC_SHD64_REG_CL>,
717717 TB;
718 def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
718 def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
719719 (ins GR64:$src1, GR64:$src2),
720720 "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
721721 [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
722 IIC_SHD64_REG_CL>,
722 IIC_SHD64_REG_CL>,
723723 TB;
724724 }
725725
726726 let isCommutable = 1 in { // These instructions commute to each other.
727727 def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
728 (outs GR16:$dst),
728 (outs GR16:$dst),
729729 (ins GR16:$src1, GR16:$src2, i8imm:$src3),
730730 "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
731731 [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
732732 (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
733733 TB, OpSize16;
734734 def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
735 (outs GR16:$dst),
735 (outs GR16:$dst),
736736 (ins GR16:$src1, GR16:$src2, i8imm:$src3),
737737 "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
738738 [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
739739 (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
740740 TB, OpSize16;
741741 def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
742 (outs GR32:$dst),
742 (outs GR32:$dst),
743743 (ins GR32:$src1, GR32:$src2, i8imm:$src3),
744744 "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
745745 [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
746746 (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
747747 TB, OpSize32;
748748 def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
749 (outs GR32:$dst),
749 (outs GR32:$dst),
750750 (ins GR32:$src1, GR32:$src2, i8imm:$src3),
751751 "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
752752 [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
753753 (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
754754 TB, OpSize32;
755755 def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
756 (outs GR64:$dst),
756 (outs GR64:$dst),
757757 (ins GR64:$src1, GR64:$src2, i8imm:$src3),
758758 "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
759759 [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
760760 (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
761761 TB;
762762 def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
763 (outs GR64:$dst),
763 (outs GR64:$dst),
764764 (ins GR64:$src1, GR64:$src2, i8imm:$src3),
765765 "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
766766 [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
788788 "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
789789 [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
790790 addr:$dst)], IIC_SHD32_MEM_CL>, TB, OpSize32;
791
791
792792 def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
793793 "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
794794 [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
806806 (i8 imm:$src3)), addr:$dst)],
807807 IIC_SHD16_MEM_IM>,
808808 TB, OpSize16;
809 def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
809 def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
810810 (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
811811 "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
812812 [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
821821 (i8 imm:$src3)), addr:$dst)],
822822 IIC_SHD32_MEM_IM>,
823823 TB, OpSize32;
824 def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
824 def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
825825 (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
826826 "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
827827 [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
836836 (i8 imm:$src3)), addr:$dst)],
837837 IIC_SHD64_MEM_IM>,
838838 TB;
839 def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
839 def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
840840 (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
841841 "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
842842 [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
206206 let SchedRW = [WriteSystem] in {
207207 def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
208208
209 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
209 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
210210 "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
211211 OpSize16;
212212 def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
214214 OpSize16;
215215
216216 // i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
217 def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
217 def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
218218 "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
219219 OpSize32;
220220 def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
221221 "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB,
222222 OpSize32;
223223 // i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
224 def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
224 def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
225225 "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
226226 def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
227227 "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
239239 "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
240240 OpSize32;
241241 def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
242 "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
242 "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
243243 def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
244244 "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
245245
259259 "ltr{w}\t$src", [], IIC_LTR>, TB;
260260 def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
261261 "ltr{w}\t$src", [], IIC_LTR>, TB;
262
262
263263 def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
264264 "push{w}\t{%cs|cs}", [], IIC_PUSH_SR>,
265265 OpSize16, Requires<[Not64BitMode]>;
346346 "lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
347347 def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
348348 "lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
349
349
350350 def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
351351 "lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
352352 def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
353353 "lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
354354 def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
355355 "lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
356
356
357357 def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
358358 "les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
359359 def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
360360 "les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
361
361
362362 def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
363363 "lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
364364 def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
365365 "lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
366366 def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
367367 "lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
368
368
369369 def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
370370 "lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
371371 def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
372372 "lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
373
373
374374 def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
375375 "lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
376376
407407 "sldt{w}\t$dst", [], IIC_SLDT>, TB;
408408 def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
409409 "sldt{l}\t$dst", [], IIC_SLDT>, OpSize32, TB;
410
410
411411 // LLDT is not interpreted specially in 64-bit mode because there is no sign
412412 // extension.
413413 def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
443443 def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
444444 TB;
445445
446 def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
446 def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
447447 "smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
448 def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
448 def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
449449 "smsw{l}\t$dst", [], IIC_SMSW>, OpSize32, TB;
450450 // no m form encodable; use SMSW16m
451 def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
451 def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
452452 "smsw{q}\t$dst", [], IIC_SMSW>, TB;
453453
454454 // For memory operands, there is only a 16-bit form
1919 INTR_NO_TYPE,
2020 GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
2121 INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
22 CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
22 CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
2323 INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM
2424 };
2525
5050 X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
5151 X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
5252 X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
53
53
5454 X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
5555 X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
5656 X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
5959 X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
6060 X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
6161 X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
62
62
6363 X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
6464 X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
6565 X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
6868 X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
6969 X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
7070 X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
71
71
7272 X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
7373 X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
7474 X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
7777 X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
7878 X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
7979 X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
80
80
8181 X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
8282 X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
8383 X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
8686 X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
8787 X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
8888 X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
89
89
9090 X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
9191 X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
9292 X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
9696 X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
9797 X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
9898 X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
99
99
100100 X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
101101 X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
102102 X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
389389 Inst.addOperand(Seg);
390390 }
391391
392 static unsigned getRetOpcode(const X86Subtarget &Subtarget)
393 {
394 return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
392 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
393 return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
395394 }
396395
397396 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
855854
856855 // Record our statepoint node in the same section used by STACKMAP
857856 // and PATCHPOINT
858 SM.recordStatepoint(MI);
857 SM.recordStatepoint(MI);
859858 }
860859
861860
10841083 case TargetOpcode::STATEPOINT:
10851084 return LowerSTATEPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), TM,
10861085 getSubtargetInfo(), MCInstLowering);
1087
1086
10881087 case TargetOpcode::STACKMAP:
10891088 return LowerSTACKMAP(*MI);
10901089
1919 const X86RegisterInfo *RegInfo = static_cast(
2020 MF->getSubtarget().getRegisterInfo());
2121 unsigned SlotSize = RegInfo->getSlotSize();
22 for (const MCPhysReg *CSR =
22 for (const MCPhysReg *CSR =
2323 RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
2424 unsigned Reg = *CSR;
2525 ++CSR)
3030 /// contains stack pointer re-alignment code which requires FP.
3131 bool ForceFramePointer;
3232
33 /// RestoreBasePointerOffset - Non-zero if the function has base pointer
34 /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
35 /// displacement from the frame pointer to a slot where the base pointer
36 /// is stashed.
33 /// RestoreBasePointerOffset - Non-zero if the function has base pointer
34 /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
35 /// displacement from the frame pointer to a slot where the base pointer
36 /// is stashed.
3737 signed char RestoreBasePointerOffset;
38
38
3939 /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
4040 /// stack frame in bytes.
4141 unsigned CalleeSavedFrameSize;
160160
161161 /// True if unaligned 32-byte memory accesses are slow.
162162 bool IsUAMem32Slow;
163
163
164164 /// HasVectorUAMem - True if SIMD operations can have unaligned memory
165165 /// operands. This may require setting a feature bit in the processor.
166166 bool HasVectorUAMem;
207207 /// For this to be profitable, the cost of FDIV must be
208208 /// substantially higher than normal FP ops like FADD and FMUL.
209209 bool UseReciprocalEst;
210
210
211211 /// Processor has AVX-512 PreFetch Instructions
212212 bool HasPFI;
213213
351351 { ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
352352 { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
353353 { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
354 { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
354 { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
355355
356356 { ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
357357 { ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
524524 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
525525 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
526526 };
527
527
528528 if (ST->hasSSSE3()) {
529529 int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
530530 if (Idx != -1)
537537
538538 {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
539539 {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
540
540
541541 // This is expanded into a long sequence of four extract + four insert.
542542 {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
543543
545545 {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
546546 };
547547
548 // Fall-back (SSE3 and SSE2).
548 // Fall-back (SSE3 and SSE2).
549549 int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
550550 if (Idx != -1)
551551 return LT.first * SSEAltShuffleTbl[Idx].Cost;
929929
930930 unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
931931 bool IsPairwise) const {
932
932
933933 std::pair LT = TLI->getTypeLegalizationCost(ValTy);
934
934
935935 MVT MTy = LT.second;
936
936
937937 int ISD = TLI->InstructionOpcodeToISD(Opcode);
938938 assert(ISD && "Invalid opcode");
939
940 // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
941 // and make it as the cost.
942
939
940 // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
941 // and make it as the cost.
942
943943 static const CostTblEntry SSE42CostTblPairWise[] = {
944944 { ISD::FADD, MVT::v2f64, 2 },
945945 { ISD::FADD, MVT::v4f32, 4 },
947947 { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
948948 { ISD::ADD, MVT::v8i16, 5 },
949949 };
950
950
951951 static const CostTblEntry AVX1CostTblPairWise[] = {
952952 { ISD::FADD, MVT::v4f32, 4 },
953953 { ISD::FADD, MVT::v4f64, 5 },
966966 { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
967967 { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
968968 };
969
969
970970 static const CostTblEntry AVX1CostTblNoPairWise[] = {
971971 { ISD::FADD, MVT::v4f32, 3 },
972972 { ISD::FADD, MVT::v4f64, 3 },
977977 { ISD::ADD, MVT::v8i16, 4 },
978978 { ISD::ADD, MVT::v8i32, 5 },
979979 };
980
980
981981 if (IsPairwise) {
982982 if (ST->hasAVX()) {
983983 int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
984984 if (Idx != -1)
985985 return LT.first * AVX1CostTblPairWise[Idx].Cost;
986986 }
987
987
988988 if (ST->hasSSE42()) {
989989 int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
990990 if (Idx != -1)
996996 if (Idx != -1)
997997 return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
998998 }
999
999
10001000 if (ST->hasSSE42()) {
10011001 int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
10021002 if (Idx != -1)