llvm.org GIT mirror llvm / 5631544
[X86] Add a pass to convert instruction chains between domains. The pass scans the function to find instruction chains that define registers in the same domain (closures). It then calculates the cost of converting the closure to another domain. If found profitable, the instructions are converted to instructions in the other domain and the register classes are changed accordingly. This commit adds the pass infrastructure and a simple conversion from the GPR domain to the Mask domain. Differential Revision: https://reviews.llvm.org/D37251 Change-Id: Ic2cf1d76598110401168326d411128ae2580a604 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316288 91177308-0d34-0410-b5e6-96231b3b80d8 Guy Blank 2 years ago
12 changed file(s) with 3277 addition(s) and 666 deletion(s). Raw diff Collapse all Expand all
2424 X86CallFrameOptimization.cpp
2525 X86CallLowering.cpp
2626 X86CmovConversion.cpp
27 X86DomainReassignment.cpp
2728 X86ExpandPseudo.cpp
2829 X86FastISel.cpp
2930 X86FixupBWInsts.cpp
9191 /// the upper portions of registers, and to save code size.
9292 FunctionPass *createX86FixupBWInsts();
9393
94 /// Return a Machine IR pass that reassigns instruction chains from one domain
95 /// to another, when profitable.
96 FunctionPass *createX86DomainReassignmentPass();
97
9498 void initializeFixupBWInstPassPass(PassRegistry &);
9599
96100 /// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX
0 //===--- X86DomainReassignment.cpp - Selectively switch register classes---===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass attempts to find instruction chains (closures) in one domain,
10 // and convert them to equivalent instructions in a different domain,
11 // if profitable.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "X86.h"
16 #include "X86InstrInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/DenseMapInfo.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Target/TargetRegisterInfo.h"
29
30 using namespace llvm;
31
32 namespace llvm {
33 void initializeX86DomainReassignmentPass(PassRegistry &);
34 }
35
36 #define DEBUG_TYPE "x86-domain-reassignment"
37
38 STATISTIC(NumClosuresConverted, "Number of closures converted by the pass");
39
40 static cl::opt DisableX86DomainReassignment(
41 "disable-x86-domain-reassignment", cl::Hidden,
42 cl::desc("X86: Disable Virtual Register Reassignment."), cl::init(false));
43
44 namespace {
45 enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain };
46
47 static bool isGPR(const TargetRegisterClass *RC) {
48 return X86::GR64RegClass.hasSubClassEq(RC) ||
49 X86::GR32RegClass.hasSubClassEq(RC) ||
50 X86::GR16RegClass.hasSubClassEq(RC) ||
51 X86::GR8RegClass.hasSubClassEq(RC);
52 }
53
54 static bool isMask(const TargetRegisterClass *RC,
55 const TargetRegisterInfo *TRI) {
56 return X86::VK16RegClass.hasSubClassEq(RC);
57 }
58
59 static RegDomain getDomain(const TargetRegisterClass *RC,
60 const TargetRegisterInfo *TRI) {
61 if (isGPR(RC))
62 return GPRDomain;
63 if (isMask(RC, TRI))
64 return MaskDomain;
65 return OtherDomain;
66 }
67
68 /// Return a register class equivalent to \p SrcRC, in \p Domain.
69 static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC,
70 RegDomain Domain) {
71 assert(Domain == MaskDomain && "add domain");
72 if (SrcRC == &X86::GR8RegClass)
73 return &X86::VK8RegClass;
74 if (SrcRC == &X86::GR16RegClass)
75 return &X86::VK16RegClass;
76 if (SrcRC == &X86::GR32RegClass)
77 return &X86::VK32RegClass;
78 if (SrcRC == &X86::GR64RegClass)
79 return &X86::VK64RegClass;
80 llvm_unreachable("add register class");
81 return nullptr;
82 }
83
84 /// Abstract Instruction Converter class.
85 class InstrConverterBase {
86 protected:
87 unsigned SrcOpcode;
88
89 public:
90 InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {}
91
92 virtual ~InstrConverterBase() {}
93
94 /// \returns true if \p MI is legal to convert.
95 virtual bool isLegal(const MachineInstr *MI,
96 const TargetInstrInfo *TII) const {
97 assert(MI->getOpcode() == SrcOpcode &&
98 "Wrong instruction passed to converter");
99 return true;
100 }
101
102 /// Applies conversion to \p MI.
103 ///
104 /// \returns true if \p MI is no longer need, and can be deleted.
105 virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
106 MachineRegisterInfo *MRI) const = 0;
107
108 /// \returns the cost increment incurred by converting \p MI.
109 virtual double getExtraCost(const MachineInstr *MI,
110 MachineRegisterInfo *MRI) const = 0;
111 };
112
113 /// An Instruction Converter which ignores the given instruction.
114 /// For example, PHI instructions can be safely ignored since only the registers
115 /// need to change.
116 class InstrIgnore : public InstrConverterBase {
117 public:
118 InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {}
119
120 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
121 MachineRegisterInfo *MRI) const override {
122 assert(isLegal(MI, TII) && "Cannot convert instruction");
123 return false;
124 }
125
126 double getExtraCost(const MachineInstr *MI,
127 MachineRegisterInfo *MRI) const override {
128 return 0;
129 }
130 };
131
132 /// An Instruction Converter which replaces an instruction with another.
133 class InstrReplacer : public InstrConverterBase {
134 public:
135 /// Opcode of the destination instruction.
136 unsigned DstOpcode;
137
138 InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode)
139 : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {}
140
141 bool isLegal(const MachineInstr *MI,
142 const TargetInstrInfo *TII) const override {
143 if (!InstrConverterBase::isLegal(MI, TII))
144 return false;
145 // It's illegal to replace an instruction that implicitly defines a register
146 // with an instruction that doesn't, unless that register dead.
147 for (auto &MO : MI->implicit_operands())
148 if (MO.isReg() && MO.isDef() && !MO.isDead() &&
149 !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg()))
150 return false;
151 return true;
152 }
153
154 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
155 MachineRegisterInfo *MRI) const override {
156 assert(isLegal(MI, TII) && "Cannot convert instruction");
157 MachineInstrBuilder Bld =
158 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(DstOpcode));
159 // Transfer explicit operands from original instruction. Implicit operands
160 // are handled by BuildMI.
161 for (auto &Op : MI->explicit_operands())
162 Bld.add(Op);
163 return true;
164 }
165
166 double getExtraCost(const MachineInstr *MI,
167 MachineRegisterInfo *MRI) const override {
168 // Assuming instructions have the same cost.
169 return 0;
170 }
171 };
172
173 /// An Instruction Converter which replaces an instruction with another, and
174 /// adds a COPY from the new instruction's destination to the old one's.
175 class InstrReplacerDstCOPY : public InstrConverterBase {
176 public:
177 unsigned DstOpcode;
178
179 InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode)
180 : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {}
181
182 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
183 MachineRegisterInfo *MRI) const override {
184 assert(isLegal(MI, TII) && "Cannot convert instruction");
185 MachineBasicBlock *MBB = MI->getParent();
186 auto &DL = MI->getDebugLoc();
187
188 unsigned Reg = MRI->createVirtualRegister(
189 TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
190 *MBB->getParent()));
191 MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
192 for (unsigned Idx = 1, End = MI->getNumOperands(); Idx < End; ++Idx)
193 Bld.add(MI->getOperand(Idx));
194
195 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY))
196 .add(MI->getOperand(0))
197 .addReg(Reg);
198
199 return true;
200 }
201
202 double getExtraCost(const MachineInstr *MI,
203 MachineRegisterInfo *MRI) const override {
204 // Assuming instructions have the same cost, and that COPY is in the same
205 // domain so it will be eliminated.
206 return 0;
207 }
208 };
209
210 /// An Instruction Converter for replacing COPY instructions.
211 class InstrCOPYReplacer : public InstrReplacer {
212 public:
213 RegDomain DstDomain;
214
215 InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode)
216 : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {}
217
218 double getExtraCost(const MachineInstr *MI,
219 MachineRegisterInfo *MRI) const override {
220 assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY");
221
222 for (auto &MO : MI->operands()) {
223 // Physical registers will not be converted. Assume that converting the
224 // COPY to the destination domain will eventually result in a actual
225 // instruction.
226 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
227 return 1;
228
229 RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()),
230 MRI->getTargetRegisterInfo());
231 // Converting a cross domain COPY to a same domain COPY should eliminate
232 // an insturction
233 if (OpDomain == DstDomain)
234 return -1;
235 }
236 return 0;
237 }
238 };
239
240 /// An Instruction Converter which replaces an instruction with a COPY.
241 class InstrReplaceWithCopy : public InstrConverterBase {
242 public:
243 // Source instruction operand Index, to be used as the COPY source.
244 unsigned SrcOpIdx;
245
246 InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx)
247 : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {}
248
249 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
250 MachineRegisterInfo *MRI) const override {
251 assert(isLegal(MI, TII) && "Cannot convert instruction");
252 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
253 TII->get(TargetOpcode::COPY))
254 .add({MI->getOperand(0), MI->getOperand(SrcOpIdx)});
255 return true;
256 }
257
258 double getExtraCost(const MachineInstr *MI,
259 MachineRegisterInfo *MRI) const override {
260 return 0;
261 }
262 };
263
264 /// An Instruction Converter which completely deletes an instruction.
265 /// For example, IMPLICIT_DEF instructions can be deleted when converting from
266 /// GPR to mask.
267 class InstrDeleter : public InstrConverterBase {
268 public:
269 InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {}
270
271 bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
272 MachineRegisterInfo *MRI) const override {
273 assert(isLegal(MI, TII) && "Cannot convert instruction");
274 return true;
275 }
276
277 double getExtraCost(const MachineInstr *MI,
278 MachineRegisterInfo *MRI) const override {
279 return 0;
280 }
281 };
282
283 // Key type to be used by the Instruction Converters map.
284 // A converter is identified by
285 typedef std::pair InstrConverterBaseKeyTy;
286
287 typedef DenseMap
288 InstrConverterBaseMap;
289
290 /// A closure is a set of virtual register representing all of the edges in
291 /// the closure, as well as all of the instructions connected by those edges.
292 ///
293 /// A closure may encompass virtual registers in the same register bank that
294 /// have different widths. For example, it may contain 32-bit GPRs as well as
295 /// 64-bit GPRs.
296 ///
297 /// A closure that computes an address (i.e. defines a virtual register that is
298 /// used in a memory operand) excludes the instructions that contain memory
299 /// operands using the address. Such an instruction will be included in a
300 /// different closure that manipulates the loaded or stored value.
301 class Closure {
302 private:
303 const TargetInstrInfo *TII;
304 MachineRegisterInfo *MRI;
305
306 /// Virtual registers in the closure.
307 DenseSet Edges;
308
309 /// Instructions in the closure.
310 SmallVector Instrs;
311
312 /// A map of available Instruction Converters.
313 const InstrConverterBaseMap &Converters;
314
315 /// The register domain of this closure.
316 RegDomain Domain;
317
318 /// Domains which this closure can legally be reassigned to.
319 SmallVector LegalDstDomains;
320
321 SmallVector getLegalDstDomains() const {
322 return LegalDstDomains;
323 }
324
325 /// Enqueue \p Reg to be considered for addition to the closure.
326 void visitRegister(unsigned Reg, SmallVectorImpl &Worklist);
327
328 /// Add \p MI to this closure.
329 void encloseInstr(MachineInstr *MI);
330
331 /// Calculate the total cost of reassigning the closure to \p Domain.
332 double calculateCost(RegDomain Domain) const;
333
334 /// All edges that are included in some closure.
335 DenseSet &EnclosedEdges;
336
337 /// All instructions that are included in some closure.
338 DenseMap &EnclosedInstrs;
339
340 public:
341 Closure(const TargetInstrInfo *TII, MachineRegisterInfo *MRI,
342 const InstrConverterBaseMap &Converters,
343 const SmallVector &LegalDstDomains,
344 DenseSet &EnclosedEdges,
345 DenseMap &EnclosedInstrs)
346 : TII(TII), MRI(MRI), Converters(Converters), Domain(NoDomain),
347 LegalDstDomains(LegalDstDomains), EnclosedEdges(EnclosedEdges),
348 EnclosedInstrs(EnclosedInstrs) {}
349
350 /// Starting from \Reg, expand the closure as much as possible.
351 void buildClosure(unsigned E);
352
353 /// /returns true if it is profitable to reassign the closure to \p Domain.
354 bool isReassignmentProfitable(RegDomain Domain) const;
355
356 /// Reassign the closure to \p Domain.
357 void Reassign(RegDomain Domain) const;
358
359 /// Mark this closure as illegal for reassignment to all domains.
360 void setAllIllegal() { LegalDstDomains.clear(); }
361
362 /// \returns true if this closure has domains which are legal to reassign to.
363 bool hasLegalDstDomain() const { return !LegalDstDomains.empty(); }
364
365 /// \returns true if is legal to reassign this closure to domain \p RD.
366 bool isLegal(RegDomain RD) const { return is_contained(LegalDstDomains, RD); }
367
368 bool empty() const { return Edges.empty(); }
369 };
370
371 class X86DomainReassignment : public MachineFunctionPass {
372 public:
373 static char ID;
374
375 X86DomainReassignment() : MachineFunctionPass(ID) {
376 initializeX86DomainReassignmentPass(*PassRegistry::getPassRegistry());
377 }
378
379 bool runOnMachineFunction(MachineFunction &MF) override;
380
381 void getAnalysisUsage(AnalysisUsage &AU) const override {
382 AU.setPreservesCFG();
383 MachineFunctionPass::getAnalysisUsage(AU);
384 }
385
386 StringRef getPassName() const override {
387 return "X86 Domain Reassignment Pass";
388 }
389
390 private:
391 const X86Subtarget *STI;
392 MachineRegisterInfo *MRI;
393 const X86InstrInfo *TII;
394
395 /// A map of available Instruction Converters.
396 InstrConverterBaseMap Converters;
397
398 /// Initialize Converters map.
399 void initConverters();
400 };
401
402 char X86DomainReassignment::ID = 0;
403
404 } // End anonymous namespace.
405
406 void Closure::visitRegister(unsigned Reg, SmallVectorImpl &Worklist) {
407 if (EnclosedEdges.count(Reg))
408 return;
409
410 if (!TargetRegisterInfo::isVirtualRegister(Reg))
411 return;
412
413 if (!MRI->hasOneDef(Reg))
414 return;
415
416 RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo());
417 // First edge in closure sets the domain.
418 if (Domain == NoDomain)
419 Domain = RD;
420
421 if (Domain != RD)
422 return;
423
424 Worklist.push_back(Reg);
425 }
426
427 void Closure::encloseInstr(MachineInstr *MI) {
428 auto I = EnclosedInstrs.find(MI);
429 if (I != EnclosedInstrs.end()) {
430 if (I->second != this)
431 // Instruction already belongs to another closure, avoid conflicts between
432 // closure and mark this closure as illegal.
433 setAllIllegal();
434 return;
435 }
436
437 EnclosedInstrs[MI] = this;
438 Instrs.push_back(MI);
439
440 // Mark closure as illegal for reassignment to domains, if there is no
441 // converter for the instruction or if the converter cannot convert the
442 // instruction.
443 erase_if(LegalDstDomains, [&](RegDomain D) {
444 InstrConverterBase *IC = Converters.lookup({D, MI->getOpcode()});
445 return !IC || !IC->isLegal(MI, TII);
446 });
447 }
448
449 double Closure::calculateCost(RegDomain DstDomain) const {
450 assert(isLegal(DstDomain) && "Cannot calculate cost for illegal closure");
451
452 double Cost = 0.0;
453 for (auto MI : Instrs)
454 Cost +=
455 Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI);
456 return Cost;
457 }
458
459 bool Closure::isReassignmentProfitable(RegDomain Domain) const {
460 return calculateCost(Domain) < 0.0;
461 }
462
463 void Closure::Reassign(RegDomain Domain) const {
464 assert(isLegal(Domain) && "Cannot convert illegal closure");
465
466 // Iterate all instructions in the closure, convert each one using the
467 // appropriate converter.
468 SmallVector ToErase;
469 for (auto MI : Instrs)
470 if (Converters.lookup({Domain, MI->getOpcode()})
471 ->convertInstr(MI, TII, MRI))
472 ToErase.push_back(MI);
473
474 // Iterate all registers in the closure, replace them with registers in the
475 // destination domain.
476 for (unsigned Reg : Edges) {
477 MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain));
478 for (auto &MO : MRI->use_operands(Reg)) {
479 if (MO.isReg())
480 // Remove all subregister references as they are not valid in the
481 // destination domain.
482 MO.setSubReg(0);
483 }
484 }
485
486 for (auto MI : ToErase)
487 MI->eraseFromParent();
488 }
489
490 /// \returns true when \p Reg is used as part of an address calculation in \p
491 /// MI.
492 static bool usedAsAddr(const MachineInstr &MI, unsigned Reg,
493 const TargetInstrInfo *TII) {
494 if (!MI.mayLoadOrStore())
495 return false;
496
497 const MCInstrDesc &Desc = TII->get(MI.getOpcode());
498 int MemOpStart = X86II::getMemoryOperandNo(Desc.TSFlags);
499 if (MemOpStart == -1)
500 return false;
501
502 MemOpStart += X86II::getOperandBias(Desc);
503 for (unsigned MemOpIdx = MemOpStart,
504 MemOpEnd = MemOpStart + X86::AddrNumOperands;
505 MemOpIdx < MemOpEnd; ++MemOpIdx) {
506 auto &Op = MI.getOperand(MemOpIdx);
507 if (Op.isReg() && Op.getReg() == Reg)
508 return true;
509 }
510 return false;
511 }
512
513 void Closure::buildClosure(unsigned Reg) {
514 SmallVector Worklist;
515 visitRegister(Reg, Worklist);
516 while (!Worklist.empty()) {
517 unsigned CurReg = Worklist.pop_back_val();
518
519 // Register already in this closure.
520 if (!Edges.insert(CurReg).second)
521 continue;
522
523 MachineInstr *DefMI = MRI->getVRegDef(CurReg);
524 encloseInstr(DefMI);
525
526 // Add register used by the defining MI to the worklist.
527 // Do not add registers which are used in address calculation, they will be
528 // added to a different closure.
529 int OpEnd = DefMI->getNumOperands();
530 const MCInstrDesc &Desc = DefMI->getDesc();
531 int MemOp = X86II::getMemoryOperandNo(Desc.TSFlags);
532 if (MemOp != -1)
533 MemOp += X86II::getOperandBias(Desc);
534 for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) {
535 if (OpIdx == MemOp) {
536 // skip address calculation.
537 OpIdx += (X86::AddrNumOperands - 1);
538 continue;
539 }
540 auto &Op = DefMI->getOperand(OpIdx);
541 if (!Op.isReg() || !Op.isUse())
542 continue;
543 visitRegister(Op.getReg(), Worklist);
544 }
545
546 // Expand closure through register uses.
547 for (auto &UseMI : MRI->use_nodbg_instructions(CurReg)) {
548 // We would like to avoid converting closures which calculare addresses,
549 // as this should remain in GPRs.
550 if (usedAsAddr(UseMI, CurReg, TII)) {
551 setAllIllegal();
552 continue;
553 }
554 encloseInstr(&UseMI);
555
556 for (auto &DefOp : UseMI.defs()) {
557 if (!DefOp.isReg())
558 continue;
559
560 unsigned DefReg = DefOp.getReg();
561 if (!TargetRegisterInfo::isVirtualRegister(DefReg)) {
562 setAllIllegal();
563 continue;
564 }
565 visitRegister(DefReg, Worklist);
566 }
567 }
568 }
569 }
570
571 void X86DomainReassignment::initConverters() {
572 Converters[{MaskDomain, TargetOpcode::PHI}] =
573 new InstrIgnore(TargetOpcode::PHI);
574
575 Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] =
576 new InstrDeleter(TargetOpcode::IMPLICIT_DEF);
577
578 Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] =
579 new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2);
580
581 Converters[{MaskDomain, TargetOpcode::COPY}] =
582 new InstrCOPYReplacer(TargetOpcode::COPY, MaskDomain, TargetOpcode::COPY);
583
584 auto createReplacerDstCOPY = [&](unsigned From, unsigned To) {
585 Converters[{MaskDomain, From}] = new InstrReplacerDstCOPY(From, To);
586 };
587
588 createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
589 createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
590
591 createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
592 createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
593
594 if (STI->hasDQI()) {
595 createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
596 createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
597 createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
598
599 createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
600 createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
601 createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
602 }
603
604 auto createReplacer = [&](unsigned From, unsigned To) {
605 Converters[{MaskDomain, From}] = new InstrReplacer(From, To);
606 };
607
608 createReplacer(X86::MOV16rm, X86::KMOVWkm);
609 createReplacer(X86::MOV16mr, X86::KMOVWmk);
610 createReplacer(X86::MOV16rr, X86::KMOVWkk);
611 createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
612 createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
613 createReplacer(X86::NOT16r, X86::KNOTWrr);
614 createReplacer(X86::OR16rr, X86::KORWrr);
615 createReplacer(X86::AND16rr, X86::KANDWrr);
616 createReplacer(X86::XOR16rr, X86::KXORWrr);
617
618 if (STI->hasBWI()) {
619 createReplacer(X86::MOV32rm, X86::KMOVDkm);
620 createReplacer(X86::MOV64rm, X86::KMOVQkm);
621
622 createReplacer(X86::MOV32mr, X86::KMOVDmk);
623 createReplacer(X86::MOV64mr, X86::KMOVQmk);
624
625 createReplacer(X86::MOV32rr, X86::KMOVDkk);
626 createReplacer(X86::MOV64rr, X86::KMOVQkk);
627
628 createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
629 createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
630
631 createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
632 createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
633
634 createReplacer(X86::ADD32rr, X86::KADDDrr);
635 createReplacer(X86::ADD64rr, X86::KADDQrr);
636
637 createReplacer(X86::NOT32r, X86::KNOTDrr);
638 createReplacer(X86::NOT64r, X86::KNOTQrr);
639
640 createReplacer(X86::OR32rr, X86::KORDrr);
641 createReplacer(X86::OR64rr, X86::KORQrr);
642
643 createReplacer(X86::AND32rr, X86::KANDDrr);
644 createReplacer(X86::AND64rr, X86::KANDQrr);
645
646 createReplacer(X86::ANDN32rr, X86::KANDNDrr);
647 createReplacer(X86::ANDN64rr, X86::KANDNQrr);
648
649 createReplacer(X86::XOR32rr, X86::KXORDrr);
650 createReplacer(X86::XOR64rr, X86::KXORQrr);
651
652 createReplacer(X86::TEST32rr, X86::KTESTDrr);
653 createReplacer(X86::TEST64rr, X86::KTESTQrr);
654 }
655
656 if (STI->hasDQI()) {
657 createReplacer(X86::ADD8rr, X86::KADDBrr);
658 createReplacer(X86::ADD16rr, X86::KADDWrr);
659
660 createReplacer(X86::AND8rr, X86::KANDBrr);
661
662 createReplacer(X86::MOV8rm, X86::KMOVBkm);
663 createReplacer(X86::MOV8mr, X86::KMOVBmk);
664 createReplacer(X86::MOV8rr, X86::KMOVBkk);
665
666 createReplacer(X86::NOT8r, X86::KNOTBrr);
667
668 createReplacer(X86::OR8rr, X86::KORBrr);
669
670 createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
671 createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
672
673 createReplacer(X86::TEST8rr, X86::KTESTBrr);
674 createReplacer(X86::TEST16rr, X86::KTESTWrr);
675
676 createReplacer(X86::XOR8rr, X86::KXORBrr);
677 }
678 }
679
680 bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
681 if (skipFunction(*MF.getFunction()))
682 return false;
683 if (DisableX86DomainReassignment)
684 return false;
685
686 DEBUG(dbgs() << "***** Machine Function before Domain Reassignment *****\n");
687 DEBUG(MF.print(dbgs()));
688
689 STI = &MF.getSubtarget();
690 // GPR->K is the only transformation currently supported, bail out early if no
691 // AVX512.
692 if (!STI->hasAVX512())
693 return false;
694
695 MRI = &MF.getRegInfo();
696 assert(MRI->isSSA() && "Expected MIR to be in SSA form");
697
698 TII = STI->getInstrInfo();
699 initConverters();
700 bool Changed = false;
701
702 DenseSet EnclosedEdges;
703 DenseMap EnclosedInstrs;
704
705 std::vector Closures;
706
707 // Go over all virtual registers and calculate a closure.
708 for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
709 unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx);
710
711 // GPR only current source domain supported.
712 if (!isGPR(MRI->getRegClass(Reg)))
713 continue;
714
715 // Register already in closure.
716 if (EnclosedEdges.count(Reg))
717 continue;
718
719 // Calculate closure starting with Reg.
720 Closure C(TII, MRI, Converters, {MaskDomain}, EnclosedEdges,
721 EnclosedInstrs);
722 C.buildClosure(Reg);
723
724 // Collect all closures that can potentially be converted.
725 if (!C.empty() && C.isLegal(MaskDomain))
726 Closures.push_back(std::move(C));
727 }
728
729 for (Closure &C : Closures)
730 if (C.isReassignmentProfitable(MaskDomain)) {
731 C.Reassign(MaskDomain);
732 ++NumClosuresConverted;
733 Changed = true;
734 }
735
736 for (auto I : Converters)
737 delete I.second;
738
739 DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n");
740 DEBUG(MF.print(dbgs()));
741
742 return Changed;
743 }
744
745 INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment",
746 "X86 Domain Reassignment Pass", false, false);
747
748 /// Returns an instance of the Domain Reassignment pass.
749 FunctionPass *llvm::createX86DomainReassignmentPass() {
750 return new X86DomainReassignment();
751 }
5959 void initializeFixupLEAPassPass(PassRegistry &);
6060 void initializeX86CmovConverterPassPass(PassRegistry &);
6161 void initializeX86ExecutionDepsFixPass(PassRegistry &);
62 void initializeX86DomainReassignmentPass(PassRegistry &);
6263
6364 } // end namespace llvm
6465
7576 initializeFixupLEAPassPass(PR);
7677 initializeX86CmovConverterPassPass(PR);
7778 initializeX86ExecutionDepsFixPass(PR);
79 initializeX86DomainReassignmentPass(PR);
7880 }
7981
8082 static std::unique_ptr createTLOF(const Triple &TT) {
313315 bool addGlobalInstructionSelect() override;
314316 bool addILPOpts() override;
315317 bool addPreISel() override;
318 void addMachineSSAOptimization() override;
316319 void addPreRegAlloc() override;
317320 void addPostRegAlloc() override;
318321 void addPreEmitPass() override;
406409
407410 addPass(createX86WinAllocaExpander());
408411 }
412 void X86PassConfig::addMachineSSAOptimization() {
413 addPass(createX86DomainReassignmentPass());
414 TargetPassConfig::addMachineSSAOptimization();
415 }
409416
410417 void X86PassConfig::addPostRegAlloc() {
411418 addPass(createX86FloatingPointStackifierPass());
323323 ;
324324 ; SKX-LABEL: test16:
325325 ; SKX: ## BB#0:
326 ; SKX-NEXT: movb (%rdi), %al
327 ; SKX-NEXT: kmovd %esi, %k0
328 ; SKX-NEXT: kmovd %eax, %k1
329 ; SKX-NEXT: vpmovm2d %k1, %zmm0
330 ; SKX-NEXT: vpmovm2d %k0, %zmm1
326 ; SKX-NEXT: kmovb (%rdi), %k0
327 ; SKX-NEXT: kmovd %esi, %k1
328 ; SKX-NEXT: vpmovm2d %k0, %zmm0
329 ; SKX-NEXT: vpmovm2d %k1, %zmm1
331330 ; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
332331 ; SKX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
333332 ; SKX-NEXT: vpmovd2m %zmm2, %k0
361360 ;
362361 ; SKX-LABEL: test17:
363362 ; SKX: ## BB#0:
364 ; SKX-NEXT: movb (%rdi), %al
365 ; SKX-NEXT: kmovd %esi, %k0
366 ; SKX-NEXT: kmovd %eax, %k1
367 ; SKX-NEXT: vpmovm2q %k1, %zmm0
368 ; SKX-NEXT: vpmovm2q %k0, %zmm1
363 ; SKX-NEXT: kmovb (%rdi), %k0
364 ; SKX-NEXT: kmovd %esi, %k1
365 ; SKX-NEXT: vpmovm2q %k0, %zmm0
366 ; SKX-NEXT: vpmovm2q %k1, %zmm1
369367 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
370368 ; SKX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
371369 ; SKX-NEXT: vpmovq2m %zmm2, %k0
77 ; X32-LABEL: test_mm512_mask_set1_epi32:
88 ; X32: # BB#0: # %entry
99 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
10 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
11 ; X32-NEXT: kmovw %ecx, %k1
10 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1211 ; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1}
1312 ; X32-NEXT: retl
1413 ;
3130 ; X32-LABEL: test_mm512_maskz_set1_epi32:
3231 ; X32: # BB#0: # %entry
3332 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
34 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
35 ; X32-NEXT: kmovw %ecx, %k1
33 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
3634 ; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
3735 ; X32-NEXT: retl
3836 ;
126124 define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
127125 ; X32-LABEL: test_mm512_mask_broadcastd_epi32:
128126 ; X32: # BB#0:
129 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
130 ; X32-NEXT: kmovw %eax, %k1
127 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
131128 ; X32-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
132129 ; X32-NEXT: retl
133130 ;
148145 define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
149146 ; X32-LABEL: test_mm512_maskz_broadcastd_epi32:
150147 ; X32: # BB#0:
151 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
152 ; X32-NEXT: kmovw %eax, %k1
148 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
153149 ; X32-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
154150 ; X32-NEXT: retl
155151 ;
287283 define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
288284 ; X32-LABEL: test_mm512_mask_broadcastss_ps:
289285 ; X32: # BB#0:
290 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
291 ; X32-NEXT: kmovw %eax, %k1
286 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
292287 ; X32-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
293288 ; X32-NEXT: retl
294289 ;
306301 define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
307302 ; X32-LABEL: test_mm512_maskz_broadcastss_ps:
308303 ; X32: # BB#0:
309 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
310 ; X32-NEXT: kmovw %eax, %k1
304 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
311305 ; X32-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
312306 ; X32-NEXT: retl
313307 ;
391385 define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
392386 ; X32-LABEL: test_mm512_mask_movehdup_ps:
393387 ; X32: # BB#0:
394 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
395 ; X32-NEXT: kmovw %eax, %k1
388 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
396389 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
397390 ; X32-NEXT: retl
398391 ;
410403 define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
411404 ; X32-LABEL: test_mm512_maskz_movehdup_ps:
412405 ; X32: # BB#0:
413 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
414 ; X32-NEXT: kmovw %eax, %k1
406 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
415407 ; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
416408 ; X32-NEXT: retl
417409 ;
443435 define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
444436 ; X32-LABEL: test_mm512_mask_moveldup_ps:
445437 ; X32: # BB#0:
446 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
447 ; X32-NEXT: kmovw %eax, %k1
438 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
448439 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
449440 ; X32-NEXT: retl
450441 ;
462453 define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
463454 ; X32-LABEL: test_mm512_maskz_moveldup_ps:
464455 ; X32: # BB#0:
465 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
466 ; X32-NEXT: kmovw %eax, %k1
456 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
467457 ; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
468458 ; X32-NEXT: retl
469459 ;
547537 define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
548538 ; X32-LABEL: test_mm512_mask_permute_ps:
549539 ; X32: # BB#0:
550 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
551 ; X32-NEXT: kmovw %eax, %k1
540 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
552541 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
553542 ; X32-NEXT: retl
554543 ;
566555 define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
567556 ; X32-LABEL: test_mm512_maskz_permute_ps:
568557 ; X32: # BB#0:
569 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
570 ; X32-NEXT: kmovw %eax, %k1
558 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
571559 ; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
572560 ; X32-NEXT: retl
573561 ;
705693 define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
706694 ; X32-LABEL: test_mm512_mask_shuffle_epi32:
707695 ; X32: # BB#0:
708 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
709 ; X32-NEXT: kmovw %eax, %k1
696 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
710697 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
711698 ; X32-NEXT: retl
712699 ;
727714 define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
728715 ; X32-LABEL: test_mm512_maskz_shuffle_epi32:
729716 ; X32: # BB#0:
730 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
731 ; X32-NEXT: kmovw %eax, %k1
717 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
732718 ; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
733719 ; X32-NEXT: retl
734720 ;
817803 define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
818804 ; X32-LABEL: test_mm512_mask_unpackhi_epi32:
819805 ; X32: # BB#0:
820 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
821 ; X32-NEXT: kmovw %eax, %k1
806 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
822807 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
823808 ; X32-NEXT: retl
824809 ;
840825 define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
841826 ; X32-LABEL: test_mm512_maskz_unpackhi_epi32:
842827 ; X32: # BB#0:
843 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
844 ; X32-NEXT: kmovw %eax, %k1
828 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
845829 ; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
846830 ; X32-NEXT: retl
847831 ;
980964 define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
981965 ; X32-LABEL: test_mm512_mask_unpackhi_ps:
982966 ; X32: # BB#0:
983 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
984 ; X32-NEXT: kmovw %eax, %k1
967 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
985968 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
986969 ; X32-NEXT: retl
987970 ;
999982 define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
1000983 ; X32-LABEL: test_mm512_maskz_unpackhi_ps:
1001984 ; X32: # BB#0:
1002 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1003 ; X32-NEXT: kmovw %eax, %k1
985 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
1004986 ; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1005987 ; X32-NEXT: retl
1006988 ;
10351017 define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
10361018 ; X32-LABEL: test_mm512_mask_unpacklo_epi32:
10371019 ; X32: # BB#0:
1038 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1039 ; X32-NEXT: kmovw %eax, %k1
1020 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
10401021 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
10411022 ; X32-NEXT: retl
10421023 ;
10581039 define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
10591040 ; X32-LABEL: test_mm512_maskz_unpacklo_epi32:
10601041 ; X32: # BB#0:
1061 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1062 ; X32-NEXT: kmovw %eax, %k1
1042 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
10631043 ; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
10641044 ; X32-NEXT: retl
10651045 ;
11981178 define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
11991179 ; X32-LABEL: test_mm512_mask_unpacklo_ps:
12001180 ; X32: # BB#0:
1201 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1202 ; X32-NEXT: kmovw %eax, %k1
1181 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
12031182 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
12041183 ; X32-NEXT: retl
12051184 ;
12171196 define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
12181197 ; X32-LABEL: test_mm512_maskz_unpacklo_ps:
12191198 ; X32: # BB#0:
1220 ; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
1221 ; X32-NEXT: kmovw %eax, %k1
1199 ; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
12221200 ; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
12231201 ; X32-NEXT: retl
12241202 ;
11591159 ; KNL-LABEL: test18:
11601160 ; KNL: ## BB#0:
11611161 ; KNL-NEXT: kmovw %edi, %k1
1162 ; KNL-NEXT: kmovw %esi, %k0
1163 ; KNL-NEXT: kshiftlw $7, %k0, %k2
1162 ; KNL-NEXT: kmovw %esi, %k2
1163 ; KNL-NEXT: kshiftlw $7, %k2, %k0
1164 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1165 ; KNL-NEXT: kshiftlw $6, %k2, %k2
11641166 ; KNL-NEXT: kshiftrw $15, %k2, %k2
1165 ; KNL-NEXT: kmovw %k2, %eax
1166 ; KNL-NEXT: kshiftlw $6, %k0, %k0
1167 ; KNL-NEXT: kshiftrw $15, %k0, %k0
1168 ; KNL-NEXT: kmovw %k0, %ecx
11691167 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1170 ; KNL-NEXT: kmovw %ecx, %k1
1171 ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1168 ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
11721169 ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
11731170 ; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
11741171 ; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
1175 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
1176 ; KNL-NEXT: kshiftlw $1, %k0, %k0
1177 ; KNL-NEXT: kshiftrw $1, %k0, %k0
1178 ; KNL-NEXT: kmovw %eax, %k1
1179 ; KNL-NEXT: kshiftlw $7, %k1, %k1
1180 ; KNL-NEXT: korw %k1, %k0, %k1
1172 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
1173 ; KNL-NEXT: kshiftlw $1, %k1, %k1
1174 ; KNL-NEXT: kshiftrw $1, %k1, %k1
1175 ; KNL-NEXT: kshiftlw $7, %k0, %k0
1176 ; KNL-NEXT: korw %k0, %k1, %k1
11811177 ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
11821178 ; KNL-NEXT: vpmovqw %zmm0, %xmm0
11831179 ; KNL-NEXT: vzeroupper
11851181 ;
11861182 ; SKX-LABEL: test18:
11871183 ; SKX: ## BB#0:
1188 ; SKX-NEXT: kmovd %edi, %k0
1189 ; SKX-NEXT: kmovd %esi, %k1
1190 ; SKX-NEXT: kshiftlw $7, %k1, %k2
1184 ; SKX-NEXT: kmovd %edi, %k1
1185 ; SKX-NEXT: kmovd %esi, %k2
1186 ; SKX-NEXT: kshiftlw $7, %k2, %k0
1187 ; SKX-NEXT: kshiftrw $15, %k0, %k0
1188 ; SKX-NEXT: kshiftlw $6, %k2, %k2
11911189 ; SKX-NEXT: kshiftrw $15, %k2, %k2
1192 ; SKX-NEXT: kmovd %k2, %eax
1193 ; SKX-NEXT: kshiftlw $6, %k1, %k1
1194 ; SKX-NEXT: kshiftrw $15, %k1, %k1
1195 ; SKX-NEXT: kmovd %k1, %ecx
1196 ; SKX-NEXT: vpmovm2q %k0, %zmm0
1197 ; SKX-NEXT: kmovd %ecx, %k0
1198 ; SKX-NEXT: vpmovm2q %k0, %zmm1
1190 ; SKX-NEXT: vpmovm2q %k1, %zmm0
1191 ; SKX-NEXT: vpmovm2q %k2, %zmm1
11991192 ; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
12001193 ; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
1201 ; SKX-NEXT: vpmovq2m %zmm2, %k0
1202 ; SKX-NEXT: kshiftlb $1, %k0, %k0
1203 ; SKX-NEXT: kshiftrb $1, %k0, %k0
1204 ; SKX-NEXT: kmovd %eax, %k1
1205 ; SKX-NEXT: kshiftlb $7, %k1, %k1
1206 ; SKX-NEXT: korb %k1, %k0, %k0
1194 ; SKX-NEXT: vpmovq2m %zmm2, %k1
1195 ; SKX-NEXT: kshiftlb $1, %k1, %k1
1196 ; SKX-NEXT: kshiftrb $1, %k1, %k1
1197 ; SKX-NEXT: kshiftlb $7, %k0, %k0
1198 ; SKX-NEXT: korb %k0, %k1, %k0
12071199 ; SKX-NEXT: vpmovm2w %k0, %xmm0
12081200 ; SKX-NEXT: vzeroupper
12091201 ; SKX-NEXT: retq
12111203 ; AVX512BW-LABEL: test18:
12121204 ; AVX512BW: ## BB#0:
12131205 ; AVX512BW-NEXT: kmovd %edi, %k1
1214 ; AVX512BW-NEXT: kmovd %esi, %k0
1215 ; AVX512BW-NEXT: kshiftlw $7, %k0, %k2
1206 ; AVX512BW-NEXT: kmovd %esi, %k2
1207 ; AVX512BW-NEXT: kshiftlw $7, %k2, %k0
1208 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
1209 ; AVX512BW-NEXT: kshiftlw $6, %k2, %k2
12161210 ; AVX512BW-NEXT: kshiftrw $15, %k2, %k2
1217 ; AVX512BW-NEXT: kmovd %k2, %eax
1218 ; AVX512BW-NEXT: kshiftlw $6, %k0, %k0
1219 ; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
1220 ; AVX512BW-NEXT: kmovd %k0, %ecx
12211211 ; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1222 ; AVX512BW-NEXT: kmovd %ecx, %k1
1223 ; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
1212 ; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
12241213 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
12251214 ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
12261215 ; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
1227 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
1228 ; AVX512BW-NEXT: kshiftlw $1, %k0, %k0
1229 ; AVX512BW-NEXT: kshiftrw $1, %k0, %k0
1230 ; AVX512BW-NEXT: kmovd %eax, %k1
1231 ; AVX512BW-NEXT: kshiftlw $7, %k1, %k1
1232 ; AVX512BW-NEXT: korw %k1, %k0, %k0
1216 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k1
1217 ; AVX512BW-NEXT: kshiftlw $1, %k1, %k1
1218 ; AVX512BW-NEXT: kshiftrw $1, %k1, %k1
1219 ; AVX512BW-NEXT: kshiftlw $7, %k0, %k0
1220 ; AVX512BW-NEXT: korw %k0, %k1, %k0
12331221 ; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
12341222 ; AVX512BW-NEXT: ## kill: %XMM0 %XMM0 %ZMM0
12351223 ; AVX512BW-NEXT: vzeroupper
12371225 ;
12381226 ; AVX512DQ-LABEL: test18:
12391227 ; AVX512DQ: ## BB#0:
1240 ; AVX512DQ-NEXT: kmovw %edi, %k0
1241 ; AVX512DQ-NEXT: kmovw %esi, %k1
1242 ; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2
1228 ; AVX512DQ-NEXT: kmovw %edi, %k1
1229 ; AVX512DQ-NEXT: kmovw %esi, %k2
1230 ; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
1231 ; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
1232 ; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2
12431233 ; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2
1244 ; AVX512DQ-NEXT: kmovw %k2, %eax
1245 ; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1
1246 ; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
1247 ; AVX512DQ-NEXT: kmovw %k1, %ecx
1248 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
1249 ; AVX512DQ-NEXT: kmovw %ecx, %k0
1250 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm1
1234 ; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
1235 ; AVX512DQ-NEXT: vpmovm2q %k2, %zmm1
12511236 ; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
12521237 ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
1253 ; AVX512DQ-NEXT: vpmovq2m %zmm2, %k0
1254 ; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
1255 ; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
1256 ; AVX512DQ-NEXT: kmovw %eax, %k1
1257 ; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
1258 ; AVX512DQ-NEXT: korb %k1, %k0, %k0
1238 ; AVX512DQ-NEXT: vpmovq2m %zmm2, %k1
1239 ; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
1240 ; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
1241 ; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
1242 ; AVX512DQ-NEXT: korb %k0, %k1, %k0
12591243 ; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
12601244 ; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0
12611245 ; AVX512DQ-NEXT: vzeroupper
55 ; CHECK-LABEL: addpd512:
66 ; CHECK: # BB#0: # %entry
77 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
8 ; CHECK-NEXT: retq # sched: [7:1.00]
8 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
99 entry:
1010 %add.i = fadd <8 x double> %x, %y
1111 ret <8 x double> %add.i
1515 ; CHECK-LABEL: addpd512fold:
1616 ; CHECK: # BB#0: # %entry
1717 ; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
18 ; CHECK-NEXT: retq # sched: [7:1.00]
18 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
1919 entry:
2020 %add.i = fadd <8 x double> %y,
2121 ret <8 x double> %add.i
2525 ; CHECK-LABEL: addps512:
2626 ; CHECK: # BB#0: # %entry
2727 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
28 ; CHECK-NEXT: retq # sched: [7:1.00]
28 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2929 entry:
3030 %add.i = fadd <16 x float> %x, %y
3131 ret <16 x float> %add.i
3535 ; CHECK-LABEL: addps512fold:
3636 ; CHECK: # BB#0: # %entry
3737 ; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
38 ; CHECK-NEXT: retq # sched: [7:1.00]
38 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
3939 entry:
4040 %add.i = fadd <16 x float> %y,
4141 ret <16 x float> %add.i
4545 ; CHECK-LABEL: subpd512:
4646 ; CHECK: # BB#0: # %entry
4747 ; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
48 ; CHECK-NEXT: retq # sched: [7:1.00]
48 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
4949 entry:
5050 %sub.i = fsub <8 x double> %x, %y
5151 ret <8 x double> %sub.i
5555 ; CHECK-LABEL: subpd512fold:
5656 ; CHECK: # BB#0: # %entry
5757 ; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
58 ; CHECK-NEXT: retq # sched: [7:1.00]
58 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
5959 entry:
6060 %tmp2 = load <8 x double>, <8 x double>* %x, align 8
6161 %sub.i = fsub <8 x double> %y, %tmp2
6666 ; CHECK-LABEL: subps512:
6767 ; CHECK: # BB#0: # %entry
6868 ; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
69 ; CHECK-NEXT: retq # sched: [7:1.00]
69 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
7070 entry:
7171 %sub.i = fsub <16 x float> %x, %y
7272 ret <16 x float> %sub.i
7676 ; CHECK-LABEL: subps512fold:
7777 ; CHECK: # BB#0: # %entry
7878 ; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
79 ; CHECK-NEXT: retq # sched: [7:1.00]
79 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
8080 entry:
8181 %tmp2 = load <16 x float>, <16 x float>* %x, align 4
8282 %sub.i = fsub <16 x float> %y, %tmp2
8787 ; CHECK-LABEL: imulq512:
8888 ; CHECK: # BB#0:
8989 ; CHECK-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00]
90 ; CHECK-NEXT: retq # sched: [7:1.00]
90 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
91 ; SKX-LABEL: imulq512:
92 ; SKX: # BB#0:
93 ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
94 ; SKX-NEXT: retq
9195 %z = mul <8 x i64>%x, %y
9296 ret <8 x i64>%z
9397 }
96100 ; CHECK-LABEL: imulq256:
97101 ; CHECK: # BB#0:
98102 ; CHECK-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
99 ; CHECK-NEXT: retq # sched: [7:1.00]
103 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
104 ; SKX-LABEL: imulq256:
105 ; SKX: # BB#0:
106 ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0
107 ; SKX-NEXT: retq
100108 %z = mul <4 x i64>%x, %y
101109 ret <4 x i64>%z
102110 }
105113 ; CHECK-LABEL: imulq128:
106114 ; CHECK: # BB#0:
107115 ; CHECK-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
108 ; CHECK-NEXT: retq # sched: [7:1.00]
116 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
117 ; SKX-LABEL: imulq128:
118 ; SKX: # BB#0:
119 ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0
120 ; SKX-NEXT: retq
109121 %z = mul <2 x i64>%x, %y
110122 ret <2 x i64>%z
111123 }
114126 ; CHECK-LABEL: mulpd512:
115127 ; CHECK: # BB#0: # %entry
116128 ; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
117 ; CHECK-NEXT: retq # sched: [7:1.00]
129 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
118130 entry:
119131 %mul.i = fmul <8 x double> %x, %y
120132 ret <8 x double> %mul.i
124136 ; CHECK-LABEL: mulpd512fold:
125137 ; CHECK: # BB#0: # %entry
126138 ; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
127 ; CHECK-NEXT: retq # sched: [7:1.00]
139 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
128140 entry:
129141 %mul.i = fmul <8 x double> %y,
130142 ret <8 x double> %mul.i
134146 ; CHECK-LABEL: mulps512:
135147 ; CHECK: # BB#0: # %entry
136148 ; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33]
137 ; CHECK-NEXT: retq # sched: [7:1.00]
149 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
138150 entry:
139151 %mul.i = fmul <16 x float> %x, %y
140152 ret <16 x float> %mul.i
144156 ; CHECK-LABEL: mulps512fold:
145157 ; CHECK: # BB#0: # %entry
146158 ; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
147 ; CHECK-NEXT: retq # sched: [7:1.00]
159 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
148160 entry:
149161 %mul.i = fmul <16 x float> %y,
150162 ret <16 x float> %mul.i
154166 ; CHECK-LABEL: divpd512:
155167 ; CHECK: # BB#0: # %entry
156168 ; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:2.00]
157 ; CHECK-NEXT: retq # sched: [7:1.00]
169 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
158170 entry:
159171 %div.i = fdiv <8 x double> %x, %y
160172 ret <8 x double> %div.i
164176 ; CHECK-LABEL: divpd512fold:
165177 ; CHECK: # BB#0: # %entry
166178 ; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:2.00]
167 ; CHECK-NEXT: retq # sched: [7:1.00]
179 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
168180 entry:
169181 %div.i = fdiv <8 x double> %y,
170182 ret <8 x double> %div.i
174186 ; CHECK-LABEL: divps512:
175187 ; CHECK: # BB#0: # %entry
176188 ; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [23:2.00]
177 ; CHECK-NEXT: retq # sched: [7:1.00]
189 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
178190 entry:
179191 %div.i = fdiv <16 x float> %x, %y
180192 ret <16 x float> %div.i
184196 ; CHECK-LABEL: divps512fold:
185197 ; CHECK: # BB#0: # %entry
186198 ; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [24:2.00]
187 ; CHECK-NEXT: retq # sched: [7:1.00]
199 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
188200 entry:
189201 %div.i = fdiv <16 x float> %y,
190202 ret <16 x float> %div.i
194206 ; CHECK-LABEL: vpaddq_test:
195207 ; CHECK: # BB#0:
196208 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
197 ; CHECK-NEXT: retq # sched: [7:1.00]
209 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
198210 %x = add <8 x i64> %i, %j
199211 ret <8 x i64> %x
200212 }
203215 ; CHECK-LABEL: vpaddq_fold_test:
204216 ; CHECK: # BB#0:
205217 ; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
206 ; CHECK-NEXT: retq # sched: [7:1.00]
218 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
207219 %tmp = load <8 x i64>, <8 x i64>* %j, align 4
208220 %x = add <8 x i64> %i, %tmp
209221 ret <8 x i64> %x
213225 ; CHECK-LABEL: vpaddq_broadcast_test:
214226 ; CHECK: # BB#0:
215227 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
216 ; CHECK-NEXT: retq # sched: [7:1.00]
228 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
217229 %x = add <8 x i64> %i,
218230 ret <8 x i64> %x
219231 }
222234 ; CHECK-LABEL: vpaddq_broadcast2_test:
223235 ; CHECK: # BB#0:
224236 ; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
225 ; CHECK-NEXT: retq # sched: [7:1.00]
237 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
226238 %tmp = load i64, i64* %j
227239 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
228240 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
240252 ; CHECK-LABEL: vpaddd_test:
241253 ; CHECK: # BB#0:
242254 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
243 ; CHECK-NEXT: retq # sched: [7:1.00]
255 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
244256 %x = add <16 x i32> %i, %j
245257 ret <16 x i32> %x
246258 }
249261 ; CHECK-LABEL: vpaddd_fold_test:
250262 ; CHECK: # BB#0:
251263 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
252 ; CHECK-NEXT: retq # sched: [7:1.00]
264 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
253265 %tmp = load <16 x i32>, <16 x i32>* %j, align 4
254266 %x = add <16 x i32> %i, %tmp
255267 ret <16 x i32> %x
259271 ; CHECK-LABEL: vpaddd_broadcast_test:
260272 ; CHECK: # BB#0:
261273 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
262 ; CHECK-NEXT: retq # sched: [7:1.00]
274 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
263275 %x = add <16 x i32> %i,
264276 ret <16 x i32> %x
265277 }
270282 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
271283 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
272284 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33]
273 ; CHECK-NEXT: retq # sched: [7:1.00]
285 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
274286 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
275287 %x = add <16 x i32> %i, %j
276288 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
283295 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
284296 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
285297 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
286 ; CHECK-NEXT: retq # sched: [7:1.00]
298 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
287299 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
288300 %x = add <16 x i32> %i, %j
289301 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
296308 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
297309 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
298310 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
299 ; CHECK-NEXT: retq # sched: [7:1.00]
311 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
300312 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
301313 %j = load <16 x i32>, <16 x i32>* %j.ptr
302314 %x = add <16 x i32> %i, %j
310322 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
311323 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
312324 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
313 ; CHECK-NEXT: retq # sched: [7:1.00]
325 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
314326 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
315327 %x = add <16 x i32> %i,
316328 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
323335 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
324336 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
325337 ; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
326 ; CHECK-NEXT: retq # sched: [7:1.00]
338 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
327339 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
328340 %j = load <16 x i32>, <16 x i32>* %j.ptr
329341 %x = add <16 x i32> %i, %j
337349 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
338350 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
339351 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
340 ; CHECK-NEXT: retq # sched: [7:1.00]
352 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
341353 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
342354 %x = add <16 x i32> %i,
343355 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
348360 ; CHECK-LABEL: vpsubq_test:
349361 ; CHECK: # BB#0:
350362 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
351 ; CHECK-NEXT: retq # sched: [7:1.00]
363 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
352364 %x = sub <8 x i64> %i, %j
353365 ret <8 x i64> %x
354366 }
357369 ; CHECK-LABEL: vpsubd_test:
358370 ; CHECK: # BB#0:
359371 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
360 ; CHECK-NEXT: retq # sched: [7:1.00]
372 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
361373 %x = sub <16 x i32> %i, %j
362374 ret <16 x i32> %x
363375 }
366378 ; CHECK-LABEL: vpmulld_test:
367379 ; CHECK: # BB#0:
368380 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [8:0.67]
369 ; CHECK-NEXT: retq # sched: [7:1.00]
381 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
370382 %x = mul <16 x i32> %i, %j
371383 ret <16 x i32> %x
372384 }
376388 ; CHECK-LABEL: sqrtA:
377389 ; CHECK: # BB#0: # %entry
378390 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
379 ; CHECK-NEXT: retq # sched: [7:1.00]
391 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
380392 entry:
381393 %conv1 = tail call float @sqrtf(float %a) nounwind readnone
382394 ret float %conv1
387399 ; CHECK-LABEL: sqrtB:
388400 ; CHECK: # BB#0: # %entry
389401 ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
390 ; CHECK-NEXT: retq # sched: [7:1.00]
402 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
391403 entry:
392404 %call = tail call double @sqrt(double %a) nounwind readnone
393405 ret double %call
398410 ; CHECK-LABEL: sqrtC:
399411 ; CHECK: # BB#0:
400412 ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
401 ; CHECK-NEXT: retq # sched: [7:1.00]
413 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
402414 %b = call float @llvm.sqrt.f32(float %a)
403415 ret float %b
404416 }
408420 ; CHECK-LABEL: sqrtD:
409421 ; CHECK: # BB#0:
410422 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 # sched: [19:2.00]
411 ; CHECK-NEXT: retq # sched: [7:1.00]
423 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
412424 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
413425 ret <16 x float> %b
414426 }
418430 ; CHECK-LABEL: sqrtE:
419431 ; CHECK: # BB#0:
420432 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [31:2.00]
421 ; CHECK-NEXT: retq # sched: [7:1.00]
433 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
422434 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
423435 ret <8 x double> %b
424436 }
427439 ; CHECK-LABEL: fadd_broadcast:
428440 ; CHECK: # BB#0:
429441 ; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
430 ; CHECK-NEXT: retq # sched: [7:1.00]
442 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
431443 %b = fadd <16 x float> %a,
432444 ret <16 x float> %b
433445 }
436448 ; CHECK-LABEL: addq_broadcast:
437449 ; CHECK: # BB#0:
438450 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
439 ; CHECK-NEXT: retq # sched: [7:1.00]
451 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
440452 %b = add <8 x i64> %a,
441453 ret <8 x i64> %b
442454 }
445457 ; CHECK-LABEL: orq_broadcast:
446458 ; CHECK: # BB#0:
447459 ; CHECK-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
448 ; CHECK-NEXT: retq # sched: [7:1.00]
460 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
461 ; SKX-LABEL: orq_broadcast:
462 ; SKX: # BB#0:
463 ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
464 ; SKX-NEXT: retq
449465 %b = or <8 x i64> %a,
450466 ret <8 x i64> %b
451467 }
454470 ; CHECK-LABEL: andd512fold:
455471 ; CHECK: # BB#0: # %entry
456472 ; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
457 ; CHECK-NEXT: retq # sched: [7:1.00]
473 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
474 ; SKX-LABEL: andd512fold:
475 ; SKX: # BB#0: # %entry
476 ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
477 ; SKX-NEXT: retq
458478 entry:
459479 %a = load <16 x i32>, <16 x i32>* %x, align 4
460480 %b = and <16 x i32> %y, %a
465485 ; CHECK-LABEL: andqbrst:
466486 ; CHECK: # BB#0: # %entry
467487 ; CHECK-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
468 ; CHECK-NEXT: retq # sched: [7:1.00]
488 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
489 ; SKX-LABEL: andqbrst:
490 ; SKX: # BB#0: # %entry
491 ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0
492 ; SKX-NEXT: retq
469493 entry:
470494 %a = load i64, i64* %ap, align 8
471495 %b = insertelement <8 x i64> undef, i64 %a, i32 0
480504 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
481505 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
482506 ; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
483 ; CHECK-NEXT: retq # sched: [7:1.00]
507 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
484508 <16 x float> %j, <16 x i32> %mask1)
485509 nounwind readnone {
486510 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
495519 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
496520 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
497521 ; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
498 ; CHECK-NEXT: retq # sched: [7:1.00]
522 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
499523 <16 x float> %j, <16 x i32> %mask1)
500524 nounwind readnone {
501525 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
510534 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
511535 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
512536 ; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
513 ; CHECK-NEXT: retq # sched: [7:1.00]
537 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
514538 <16 x float> %j, <16 x i32> %mask1)
515539 nounwind readnone {
516540 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
526550 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
527551 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
528552 ; CHECK-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
529 ; CHECK-NEXT: retq # sched: [7:1.00]
553 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
554 ; SKX-LABEL: test_mask_vminpd:
555 ; SKX: # BB#0:
556 ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
557 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
558 ; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
559 ; SKX-NEXT: retq
530560 <8 x double> %j, <8 x i32> %mask1)
531561 nounwind readnone {
532562 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
542572 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
543573 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
544574 ; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
545 ; CHECK-NEXT: retq # sched: [7:1.00]
575 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
546576 <16 x float> %j, <16 x i32> %mask1)
547577 nounwind readnone {
548578 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
558588 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
559589 ; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
560590 ; CHECK-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
561 ; CHECK-NEXT: retq # sched: [7:1.00]
591 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
592 ; SKX-LABEL: test_mask_vmaxpd:
593 ; SKX: # BB#0:
594 ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
595 ; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
596 ; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
597 ; SKX-NEXT: retq
562598 <8 x double> %j, <8 x i32> %mask1)
563599 nounwind readnone {
564600 %mask = icmp ne <8 x i32> %mask1, zeroinitializer
574610 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
575611 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
576612 ; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
577 ; CHECK-NEXT: retq # sched: [7:1.00]
613 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
578614 <16 x float> %j, <16 x i32> %mask1)
579615 nounwind readnone {
580616 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
589625 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
590626 ; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
591627 ; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [23:2.00]
592 ; CHECK-NEXT: retq # sched: [7:1.00]
628 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
593629 <16 x float> %j, <16 x i32> %mask1)
594630 nounwind readnone {
595631 %mask = icmp ne <16 x i32> %mask1, zeroinitializer
604640 ; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
605641 ; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
606642 ; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33]
607 ; CHECK-NEXT: retq # sched: [7:1.00]
643 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
608644 <8 x double> %j, <8 x i64> %mask1)
609645 nounwind readnone {
610646 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
619655 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
620656 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
621657 ; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33]
622 ; CHECK-NEXT: retq # sched: [7:1.00]
658 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
623659 <8 x i64> %mask1) nounwind readnone {
624660 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
625661 %x = fadd <8 x double> %i, %j
633669 ; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
634670 ; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
635671 ; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50]
636 ; CHECK-NEXT: retq # sched: [7:1.00]
672 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
637673 <8 x double>* %j, <8 x i64> %mask1)
638674 nounwind {
639675 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
649685 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
650686 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
651687 ; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
652 ; CHECK-NEXT: retq # sched: [7:1.00]
688 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
653689 <8 x i64> %mask1) nounwind {
654690 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
655691 %tmp = load <8 x double>, <8 x double>* %j, align 8
662698 ; CHECK-LABEL: test_broadcast_vaddpd:
663699 ; CHECK: # BB#0:
664700 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50]
665 ; CHECK-NEXT: retq # sched: [7:1.00]
701 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
666702 %tmp = load double, double* %j
667703 %b = insertelement <8 x double> undef, double %tmp, i32 0
668704 %c = shufflevector <8 x double> %b, <8 x double> undef,
678714 ; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00]
679715 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50]
680716 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
681 ; CHECK-NEXT: retq # sched: [7:1.00]
717 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
682718 double* %j, <8 x i64> %mask1) nounwind {
683719 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
684720 %tmp = load double, double* %j
696732 ; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
697733 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
698734 ; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
699 ; CHECK-NEXT: retq # sched: [7:1.00]
735 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
700736 <8 x i64> %mask1) nounwind {
701737 %mask = icmp ne <8 x i64> %mask1, zeroinitializer
702738 %tmp = load double, double* %j
712748 ; CHECK-LABEL: test_fxor:
713749 ; CHECK: # BB#0:
714750 ; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
715 ; CHECK-NEXT: retq # sched: [7:1.00]
751 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
752 ; SKX-LABEL: test_fxor:
753 ; SKX: # BB#0:
754 ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0
755 ; SKX-NEXT: retq
716756
717757 %res = fsub <16 x float> , %a
718758 ret <16 x float>%res
722762 ; CHECK-LABEL: test_fxor_8f32:
723763 ; CHECK: # BB#0:
724764 ; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
725 ; CHECK-NEXT: retq # sched: [7:1.00]
765 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
766 ; SKX-LABEL: test_fxor_8f32:
767 ; SKX: # BB#0:
768 ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0
769 ; SKX-NEXT: retq
726770 %res = fsub <8 x float> , %a
727771 ret <8 x float>%res
728772 }
731775 ; CHECK-LABEL: fabs_v8f64:
732776 ; CHECK: # BB#0:
733777 ; CHECK-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
734 ; CHECK-NEXT: retq # sched: [7:1.00]
778 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
779 ; SKX-LABEL: fabs_v8f64:
780 ; SKX: # BB#0:
781 ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
782 ; SKX-NEXT: retq
735783 {
736784 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
737785 ret <8 x double> %t
742790 ; CHECK-LABEL: fabs_v16f32:
743791 ; CHECK: # BB#0:
744792 ; CHECK-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
745 ; CHECK-NEXT: retq # sched: [7:1.00]
793 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
794 ; SKX-LABEL: fabs_v16f32:
795 ; SKX: # BB#0:
796 ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
797 ; SKX-NEXT: retq
746798 {
747799 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
748800 ret <16 x float> %t
757809 ; CHECK-NEXT: jnp .LBB64_2 # sched: [1:0.50]
758810 ; CHECK-NEXT: .LBB64_1: # %l1
759811 ; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
760 ; CHECK-NEXT: retq # sched: [7:1.00]
812 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
761813 ; CHECK-NEXT: .LBB64_2: # %l2
762814 ; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
763 ; CHECK-NEXT: retq # sched: [7:1.00]
815 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
764816 %tobool = fcmp une double %a, %b
765817 br i1 %tobool, label %l1, label %l2
766818
779831 ; CHECK-NEXT: jbe .LBB65_2 # sched: [1:0.50]
780832 ; CHECK-NEXT: # BB#1: # %l1
781833 ; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
782 ; CHECK-NEXT: retq # sched: [7:1.00]
834 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
783835 ; CHECK-NEXT: .LBB65_2: # %l2
784836 ; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
785 ; CHECK-NEXT: retq # sched: [7:1.00]
837 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
786838 %tobool = fcmp olt float %a, %b
787839 br i1 %tobool, label %l1, label %l2
788840
800852 ; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %k0
801853 ; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00]
802854 ; CHECK-NEXT: movzbl %al, %eax # sched: [1:0.25]
803 ; CHECK-NEXT: retq # sched: [7:1.00]
855 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
856 ; SKX-LABEL: test3:
857 ; SKX: ## BB#0:
858 ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0
859 ; SKX-NEXT: kmovd %k0, %eax
860 ; SKX-NEXT: movzbl %al, %eax
861 ; SKX-NEXT: retq
804862
805863 %cmp10.i = fcmp oeq float %a, %b
806864 %conv11.i = zext i1 %cmp10.i to i32
815873 ; CHECK-NEXT: jne .LBB67_1 # sched: [1:0.50]
816874 ; CHECK-NEXT: jp .LBB67_1 # sched: [1:0.50]
817875 ; CHECK-NEXT: # BB#2: # %return
818 ; CHECK-NEXT: retq # sched: [7:1.00]
876 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
819877 ; CHECK-NEXT: .LBB67_1: # %if.end
820878 ; CHECK-NEXT: seta %al # sched: [2:1.00]
821879 ; CHECK-NEXT: movzbl %al, %eax # sched: [1:0.25]
822880 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
823 ; CHECK-NEXT: retq # sched: [7:1.00]
881 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
824882 entry:
825883 %cmp = fcmp oeq float %p, 0.000000e+00
826884 br i1 %cmp, label %return, label %if.end
841899 ; CHECK-NEXT: xorl %eax, %eax # sched: [1:0.25]
842900 ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25]
843901 ; CHECK-NEXT: sete %al # sched: [1:0.50]
844 ; CHECK-NEXT: retq # sched: [7:1.00]
902 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
845903 %cmp = icmp eq i32 %a, %b
846904 %res = zext i1 %cmp to i32
847905 ret i32 %res
853911 ; CHECK-NEXT: xorl %eax, %eax # sched: [1:0.25]
854912 ; CHECK-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
855913 ; CHECK-NEXT: setne %al # sched: [1:0.50]
856 ; CHECK-NEXT: retq # sched: [7:1.00]
914 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
857915 entry:
858916 %0 = fcmp one double %x, %y
859917 %or = zext i1 %0 to i32
871929 ; CHECK-NEXT: cmovel %eax, %edx # sched: [1:0.50]
872930 ; CHECK-NEXT: orl %edi, %esi # sched: [1:0.25]
873931 ; CHECK-NEXT: cmovnel %edx, %eax # sched: [1:0.50]
874 ; CHECK-NEXT: retq # sched: [7:1.00]
932 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
875933 %tmp1 = icmp eq i32 %a1, -1
876934 %tmp2 = icmp eq i32 %a2, -2147483648
877935 %tmp3 = and i1 %tmp1, %tmp2
888946 ; CHECK-NEXT: jne .LBB71_2 # sched: [1:0.50]
889947 ; CHECK-NEXT: # BB#1: # %A
890948 ; CHECK-NEXT: movl $6, %eax # sched: [1:0.25]
891 ; CHECK-NEXT: retq # sched: [7:1.00]
949 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
892950 ; CHECK-NEXT: .LBB71_2: # %B
893951 ; CHECK-NEXT: movl $7, %eax # sched: [1:0.25]
894 ; CHECK-NEXT: retq # sched: [7:1.00]
952 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
895953 %b = and i64 %a, 1
896954 %cmp10.i = icmp eq i64 %b, 0
897955 br i1 %cmp10.i, label %A, label %B
914972 ; CHECK-NEXT: je .LBB72_1 # sched: [1:0.50]
915973 ; CHECK-NEXT: # BB#2: # %if.end.i
916974 ; CHECK-NEXT: movl $6, %eax # sched: [1:0.25]
917 ; CHECK-NEXT: retq # sched: [7:1.00]
975 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
918976 ; CHECK-NEXT: .LBB72_1: # %if.then.i
919977 ; CHECK-NEXT: movl $5, %eax # sched: [1:0.25]
920 ; CHECK-NEXT: retq # sched: [7:1.00]
978 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
921979
922980 %cmp8.i = icmp eq i64 %b, %c
923981 %or1 = or i1 %d, %cmp8.i
935993 ; CHECK-LABEL: sitof32:
936994 ; CHECK: # BB#0:
937995 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
938 ; CHECK-NEXT: retq # sched: [7:1.00]
996 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
939997 %b = sitofp <16 x i32> %a to <16 x float>
940998 ret <16 x float> %b
941999 }
9441002 ; CHECK-LABEL: sltof864:
9451003 ; CHECK: # BB#0:
9461004 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
947 ; CHECK-NEXT: retq # sched: [7:1.00]
1005 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
9481006 %b = sitofp <8 x i64> %a to <8 x double>
9491007 ret <8 x double> %b
9501008 }
9531011 ; CHECK-LABEL: slto4f64:
9541012 ; CHECK: # BB#0:
9551013 ; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33]
956 ; CHECK-NEXT: retq # sched: [7:1.00]
1014 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
9571015 ; VLDQ-LABEL: slto4f64:
9581016 ; VLDQ: # BB#0:
9591017 ; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
9661024 ; CHECK-LABEL: slto2f64:
9671025 ; CHECK: # BB#0:
9681026 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
969 ; CHECK-NEXT: retq # sched: [7:1.00]
1027 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
9701028 ; VLDQ-LABEL: slto2f64:
9711029 ; VLDQ: # BB#0:
9721030 ; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0
9791037 ; CHECK-LABEL: sltof2f32:
9801038 ; CHECK: # BB#0:
9811039 ; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00]
982 ; CHECK-NEXT: retq # sched: [7:1.00]
1040 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
9831041 ; VLDQ-LABEL: sltof2f32:
9841042 ; VLDQ: # BB#0:
9851043 ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
9921050 ; CHECK-LABEL: slto4f32_mem:
9931051 ; CHECK: # BB#0:
9941052 ; CHECK-NEXT: vcvtqq2psy (%rdi), %xmm0
995 ; CHECK-NEXT: retq # sched: [7:1.00]
1053 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
9961054 ; VLDQ-LABEL: slto4f32_mem:
9971055 ; VLDQ: # BB#0:
9981056 ; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
10061064 ; CHECK-LABEL: f64to4sl:
10071065 ; CHECK: # BB#0:
10081066 ; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33]
1009 ; CHECK-NEXT: retq # sched: [7:1.00]
1067 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10101068 ; VLDQ-LABEL: f64to4sl:
10111069 ; VLDQ: # BB#0:
10121070 ; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
10191077 ; CHECK-LABEL: f32to4sl:
10201078 ; CHECK: # BB#0:
10211079 ; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00]
1022 ; CHECK-NEXT: retq # sched: [7:1.00]
1080 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10231081 ; VLDQ-LABEL: f32to4sl:
10241082 ; VLDQ: # BB#0:
10251083 ; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
10331091 ; CHECK: # BB#0:
10341092 ; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
10351093 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1036 ; CHECK-NEXT: retq # sched: [7:1.00]
1094 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10371095 ; VLDQ-LABEL: slto4f32:
10381096 ; VLDQ: # BB#0:
10391097 ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
10481106 ; CHECK: # BB#0:
10491107 ; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
10501108 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1051 ; CHECK-NEXT: retq # sched: [7:1.00]
1109 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10521110 ; VLDQ-LABEL: ulto4f32:
10531111 ; VLDQ: # BB#0:
10541112 ; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
10621120 ; CHECK-LABEL: ulto8f64:
10631121 ; CHECK: # BB#0:
10641122 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
1065 ; CHECK-NEXT: retq # sched: [7:1.00]
1123 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10661124 %b = uitofp <8 x i64> %a to <8 x double>
10671125 ret <8 x double> %b
10681126 }
10721130 ; CHECK: # BB#0:
10731131 ; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33]
10741132 ; CHECK-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33]
1075 ; CHECK-NEXT: retq # sched: [7:1.00]
1133 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10761134 %b = uitofp <16 x i64> %a to <16 x double>
10771135 ret <16 x double> %b
10781136 }
10811139 ; CHECK-LABEL: f64to16si:
10821140 ; CHECK: # BB#0:
10831141 ; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
1084 ; CHECK-NEXT: retq # sched: [7:1.00]
1142 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10851143 %b = fptosi <16 x float> %a to <16 x i32>
10861144 ret <16 x i32> %b
10871145 }
10901148 ; CHECK-LABEL: f32to16ui:
10911149 ; CHECK: # BB#0:
10921150 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
1093 ; CHECK-NEXT: retq # sched: [7:1.00]
1151 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
10941152 %b = fptoui <16 x float> %a to <16 x i32>
10951153 ret <16 x i32> %b
10961154 }
11011159 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
11021160 ; CHECK-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
11031161 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1104 ; CHECK-NEXT: retq # sched: [7:1.00]
1162 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11051163 %res = fptoui <16 x float> %f to <16 x i8>
11061164 ret <16 x i8> %res
11071165 }
11111169 ; CHECK: # BB#0:
11121170 ; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
11131171 ; CHECK-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
1114 ; CHECK-NEXT: retq # sched: [7:1.00]
1172 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11151173 %res = fptoui <16 x float> %f to <16 x i16>
11161174 ret <16 x i16> %res
11171175 }
11201178 ; CHECK-LABEL: f32to8ui:
11211179 ; CHECK: # BB#0:
11221180 ; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33]
1123 ; CHECK-NEXT: retq # sched: [7:1.00]
1181 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11241182 %b = fptoui <8 x float> %a to <8 x i32>
11251183 ret <8 x i32> %b
11261184 }
11291187 ; CHECK-LABEL: f32to4ui:
11301188 ; CHECK: # BB#0:
11311189 ; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33]
1132 ; CHECK-NEXT: retq # sched: [7:1.00]
1190 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11331191 %b = fptoui <4 x float> %a to <4 x i32>
11341192 ret <4 x i32> %b
11351193 }
11381196 ; CHECK-LABEL: f64to8ui:
11391197 ; CHECK: # BB#0:
11401198 ; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00]
1141 ; CHECK-NEXT: retq # sched: [7:1.00]
1199 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11421200 %b = fptoui <8 x double> %a to <8 x i32>
11431201 ret <8 x i32> %b
11441202 }
11491207 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
11501208 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
11511209 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1152 ; CHECK-NEXT: retq # sched: [7:1.00]
1210 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11531211 %res = fptoui <8 x double> %f to <8 x i16>
11541212 ret <8 x i16> %res
11551213 }
11601218 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
11611219 ; CHECK-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
11621220 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1163 ; CHECK-NEXT: retq # sched: [7:1.00]
1221 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11641222 %res = fptoui <8 x double> %f to <8 x i8>
11651223 ret <8 x i8> %res
11661224 }
11701228 ; CHECK: # BB#0:
11711229 ; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
11721230 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1173 ; CHECK-NEXT: retq # sched: [7:1.00]
1231 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11741232 %b = fptoui <4 x double> %a to <4 x i32>
11751233 ret <4 x i32> %b
11761234 }
11791237 ; CHECK-LABEL: sito8f64:
11801238 ; CHECK: # BB#0:
11811239 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1182 ; CHECK-NEXT: retq # sched: [7:1.00]
1240 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11831241 %b = sitofp <8 x i32> %a to <8 x double>
11841242 ret <8 x double> %b
11851243 }
11881246 ; CHECK: # BB#0:
11891247 ; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
11901248 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
1191 ; CHECK-NEXT: retq # sched: [7:1.00]
1249 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
11921250 ; VLBW-LABEL: i32to8f64_mask:
11931251 ; VLBW: # BB#0:
11941252 ; VLBW-NEXT: kmovd %edi, %k1
12091267 ; CHECK: # BB#0:
12101268 ; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
12111269 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
1212 ; CHECK-NEXT: retq # sched: [7:1.00]
1270 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12131271 ; VLBW-LABEL: sito8f64_maskz:
12141272 ; VLBW: # BB#0:
12151273 ; VLBW-NEXT: kmovd %edi, %k1
12301288 ; CHECK-LABEL: f64to8si:
12311289 ; CHECK: # BB#0:
12321290 ; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
1233 ; CHECK-NEXT: retq # sched: [7:1.00]
1291 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12341292 %b = fptosi <8 x double> %a to <8 x i32>
12351293 ret <8 x i32> %b
12361294 }
12401298 ; CHECK: # BB#0:
12411299 ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
12421300 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1243 ; CHECK-NEXT: retq # sched: [7:1.00]
1301 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12441302 %b = fptosi <4 x double> %a to <4 x i32>
12451303 ret <4 x i32> %b
12461304 }
12511309 ; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
12521310 ; CHECK-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00]
12531311 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
1254 ; CHECK-NEXT: retq # sched: [7:1.00]
1312 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12551313 %a = fptrunc <16 x double> %b to <16 x float>
12561314 ret <16 x float> %a
12571315 }
12611319 ; CHECK: # BB#0:
12621320 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
12631321 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1264 ; CHECK-NEXT: retq # sched: [7:1.00]
1322 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12651323 %a = fptrunc <4 x double> %b to <4 x float>
12661324 ret <4 x float> %a
12671325 }
12731331 ; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
12741332 ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
12751333 ; CHECK-NEXT: vzeroupper # sched: [4:1.00]
1276 ; CHECK-NEXT: retq # sched: [7:1.00]
1334 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12771335 %a = fptrunc <4 x double> %b to <4 x float>
12781336 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
12791337 ret <4 x float> %c
12831341 ; CHECK-LABEL: f64tof32_inreg:
12841342 ; CHECK: # BB#0:
12851343 ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1286 ; CHECK-NEXT: retq # sched: [7:1.00]
1344 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12871345 %ext = extractelement <2 x double> %a0, i32 0
12881346 %cvt = fptrunc double %ext to float
12891347 %res = insertelement <4 x float> %a1, float %cvt, i32 0
12941352 ; CHECK-LABEL: f32to8f64:
12951353 ; CHECK: # BB#0:
12961354 ; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
1297 ; CHECK-NEXT: retq # sched: [7:1.00]
1355 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
12981356 %a = fpext <8 x float> %b to <8 x double>
12991357 ret <8 x double> %a
13001358 }
13041362 ; CHECK: # BB#0:
13051363 ; CHECK-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
13061364 ; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00]
1307 ; CHECK-NEXT: retq # sched: [7:1.00]
1365 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13081366 %a = fpext <4 x float> %b to <4 x double>
13091367 %mask = fcmp ogt <4 x double> %a1, %b1
13101368 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
13151373 ; CHECK-LABEL: f32tof64_inreg:
13161374 ; CHECK: # BB#0:
13171375 ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1318 ; CHECK-NEXT: retq # sched: [7:1.00]
1376 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13191377 %ext = extractelement <4 x float> %a1, i32 0
13201378 %cvt = fpext float %ext to double
13211379 %res = insertelement <2 x double> %a0, double %cvt, i32 0
13261384 ; CHECK-LABEL: sltof64_load:
13271385 ; CHECK: # BB#0: # %entry
13281386 ; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1329 ; CHECK-NEXT: retq # sched: [7:1.00]
1387 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13301388 entry:
13311389 %tmp1 = load i64, i64* %e, align 8
13321390 %conv = sitofp i64 %tmp1 to double
13371395 ; CHECK-LABEL: sitof64_load:
13381396 ; CHECK: # BB#0: # %entry
13391397 ; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1340 ; CHECK-NEXT: retq # sched: [7:1.00]
1398 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13411399 entry:
13421400 %tmp1 = load i32, i32* %e, align 4
13431401 %conv = sitofp i32 %tmp1 to double
13481406 ; CHECK-LABEL: sitof32_load:
13491407 ; CHECK: # BB#0: # %entry
13501408 ; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1351 ; CHECK-NEXT: retq # sched: [7:1.00]
1409 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13521410 entry:
13531411 %tmp1 = load i32, i32* %e, align 4
13541412 %conv = sitofp i32 %tmp1 to float
13591417 ; CHECK-LABEL: sltof32_load:
13601418 ; CHECK: # BB#0: # %entry
13611419 ; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1362 ; CHECK-NEXT: retq # sched: [7:1.00]
1420 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13631421 entry:
13641422 %tmp1 = load i64, i64* %e, align 8
13651423 %conv = sitofp i64 %tmp1 to float
13721430 ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
13731431 ; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
13741432 ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1375 ; CHECK-NEXT: retq # sched: [7:1.00]
1433 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13761434 entry:
13771435 %f = alloca float, align 4
13781436 %d = alloca double, align 8
13881446 ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
13891447 ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
13901448 ; CHECK-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1391 ; CHECK-NEXT: retq # sched: [7:1.00]
1449 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
13921450 entry:
13931451 %f = alloca float, align 4
13941452 %d = alloca double, align 8
14021460 ; CHECK-LABEL: long_to_double:
14031461 ; CHECK: # BB#0:
14041462 ; CHECK-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.25]
1405 ; CHECK-NEXT: retq # sched: [7:1.00]
1463 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14061464 %res = bitcast i64 %x to double
14071465 ret double %res
14081466 }
14111469 ; CHECK-LABEL: double_to_long:
14121470 ; CHECK: # BB#0:
14131471 ; CHECK-NEXT: vmovq %xmm0, %rax # sched: [1:0.25]
1414 ; CHECK-NEXT: retq # sched: [7:1.00]
1472 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14151473 %res = bitcast double %x to i64
14161474 ret i64 %res
14171475 }
14201478 ; CHECK-LABEL: int_to_float:
14211479 ; CHECK: # BB#0:
14221480 ; CHECK-NEXT: vmovd %edi, %xmm0 # sched: [1:0.25]
1423 ; CHECK-NEXT: retq # sched: [7:1.00]
1481 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14241482 %res = bitcast i32 %x to float
14251483 ret float %res
14261484 }
14291487 ; CHECK-LABEL: float_to_int:
14301488 ; CHECK: # BB#0:
14311489 ; CHECK-NEXT: vmovd %xmm0, %eax # sched: [1:0.25]
1432 ; CHECK-NEXT: retq # sched: [7:1.00]
1490 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14331491 %res = bitcast float %x to i32
14341492 ret i32 %res
14351493 }
14411499 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
14421500 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00]
14431501 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
1444 ; CHECK-NEXT: retq # sched: [7:1.00]
1502 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14451503 %b = uitofp <16 x i32> %a to <16 x double>
14461504 ret <16 x double> %b
14471505 }
14501508 ; CHECK-LABEL: slto8f32:
14511509 ; CHECK: # BB#0:
14521510 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
1453 ; CHECK-NEXT: retq # sched: [7:1.00]
1511 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14541512 %b = sitofp <8 x i64> %a to <8 x float>
14551513 ret <8 x float> %b
14561514 }
14611519 ; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
14621520 ; CHECK-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00]
14631521 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
1464 ; CHECK-NEXT: retq # sched: [7:1.00]
1522 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14651523 %b = sitofp <16 x i64> %a to <16 x float>
14661524 ret <16 x float> %b
14671525 }
14701528 ; CHECK-LABEL: slto8f64:
14711529 ; CHECK: # BB#0:
14721530 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
1473 ; CHECK-NEXT: retq # sched: [7:1.00]
1531 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14741532 %b = sitofp <8 x i64> %a to <8 x double>
14751533 ret <8 x double> %b
14761534 }
14801538 ; CHECK: # BB#0:
14811539 ; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33]
14821540 ; CHECK-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33]
1483 ; CHECK-NEXT: retq # sched: [7:1.00]
1541 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14841542 %b = sitofp <16 x i64> %a to <16 x double>
14851543 ret <16 x double> %b
14861544 }
14891547 ; CHECK-LABEL: ulto8f32:
14901548 ; CHECK: # BB#0:
14911549 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
1492 ; CHECK-NEXT: retq # sched: [7:1.00]
1550 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
14931551 %b = uitofp <8 x i64> %a to <8 x float>
14941552 ret <8 x float> %b
14951553 }
15001558 ; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
15011559 ; CHECK-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00]
15021560 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
1503 ; CHECK-NEXT: retq # sched: [7:1.00]
1561 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15041562 %b = uitofp <16 x i64> %a to <16 x float>
15051563 ret <16 x float> %b
15061564 }
15101568 ; CHECK: # BB#0:
15111569 ; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
15121570 ; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00]
1513 ; CHECK-NEXT: retq # sched: [7:1.00]
1571 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15141572 ; VLBW-LABEL: uito8f64_mask:
15151573 ; VLBW: # BB#0:
15161574 ; VLBW-NEXT: kmovd %edi, %k1
15311589 ; CHECK: # BB#0:
15321590 ; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
15331591 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00]
1534 ; CHECK-NEXT: retq # sched: [7:1.00]
1592 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15351593 ; VLBW-LABEL: uito8f64_maskz:
15361594 ; VLBW: # BB#0:
15371595 ; VLBW-NEXT: kmovd %edi, %k1
15521610 ; CHECK-LABEL: uito4f64:
15531611 ; CHECK: # BB#0:
15541612 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
1555 ; CHECK-NEXT: retq # sched: [7:1.00]
1613 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15561614 %b = uitofp <4 x i32> %a to <4 x double>
15571615 ret <4 x double> %b
15581616 }
15611619 ; CHECK-LABEL: uito16f32:
15621620 ; CHECK: # BB#0:
15631621 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33]
1564 ; CHECK-NEXT: retq # sched: [7:1.00]
1622 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15651623 %b = uitofp <16 x i32> %a to <16 x float>
15661624 ret <16 x float> %b
15671625 }
15701628 ; CHECK-LABEL: uito8f64:
15711629 ; CHECK: # BB#0:
15721630 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
1573 ; CHECK-NEXT: retq # sched: [7:1.00]
1631 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15741632 %b = uitofp <8 x i32> %a to <8 x double>
15751633 ret <8 x double> %b
15761634 }
15791637 ; CHECK-LABEL: uito8f32:
15801638 ; CHECK: # BB#0:
15811639 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33]
1582 ; CHECK-NEXT: retq # sched: [7:1.00]
1640 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15831641 %b = uitofp <8 x i32> %a to <8 x float>
15841642 ret <8 x float> %b
15851643 }
15881646 ; CHECK-LABEL: uito4f32:
15891647 ; CHECK: # BB#0:
15901648 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33]
1591 ; CHECK-NEXT: retq # sched: [7:1.00]
1649 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
15921650 %b = uitofp <4 x i32> %a to <4 x float>
15931651 ret <4 x float> %b
15941652 }
15971655 ; CHECK-LABEL: fptosi:
15981656 ; CHECK: # BB#0:
15991657 ; CHECK-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00]
1600 ; CHECK-NEXT: retq # sched: [7:1.00]
1658 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16011659 %b = fptosi float %a to i32
16021660 ret i32 %b
16031661 }
16061664 ; CHECK-LABEL: fptoui:
16071665 ; CHECK: # BB#0:
16081666 ; CHECK-NEXT: vcvttss2usi %xmm0, %eax
1609 ; CHECK-NEXT: retq # sched: [7:1.00]
1667 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16101668 %b = fptoui float %a to i32
16111669 ret i32 %b
16121670 }
16151673 ; CHECK-LABEL: uitof32:
16161674 ; CHECK: # BB#0:
16171675 ; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
1618 ; CHECK-NEXT: retq # sched: [7:1.00]
1676 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16191677 %b = uitofp i32 %a to float
16201678 ret float %b
16211679 }
16241682 ; CHECK-LABEL: uitof64:
16251683 ; CHECK: # BB#0:
16261684 ; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
1627 ; CHECK-NEXT: retq # sched: [7:1.00]
1685 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16281686 %b = uitofp i32 %a to double
16291687 ret double %b
16301688 }
16361694 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
16371695 ; CHECK-NEXT: vpmovm2d %k0, %zmm0
16381696 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1639 ; CHECK-NEXT: retq # sched: [7:1.00]
1697 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16401698 %mask = icmp slt <16 x i32> %a, zeroinitializer
16411699 %1 = sitofp <16 x i1> %mask to <16 x float>
16421700 ret <16 x float> %1
16471705 ; CHECK: # BB#0:
16481706 ; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
16491707 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1650 ; CHECK-NEXT: retq # sched: [7:1.00]
1708 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16511709 %1 = sitofp <16 x i8> %a to <16 x float>
16521710 ret <16 x float> %1
16531711 }
16571715 ; CHECK: # BB#0:
16581716 ; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
16591717 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1660 ; CHECK-NEXT: retq # sched: [7:1.00]
1718 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16611719 %1 = sitofp <16 x i16> %a to <16 x float>
16621720 ret <16 x float> %1
16631721 }
16671725 ; CHECK: # BB#0:
16681726 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
16691727 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1670 ; CHECK-NEXT: retq # sched: [7:1.00]
1728 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16711729 %1 = sitofp <8 x i16> %a to <8 x double>
16721730 ret <8 x double> %1
16731731 }
16791737 ; CHECK-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50]
16801738 ; CHECK-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50]
16811739 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1682 ; CHECK-NEXT: retq # sched: [7:1.00]
1740 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16831741 %1 = sitofp <8 x i8> %a to <8 x double>
16841742 ret <8 x double> %1
16851743 }
16911749 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
16921750 ; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
16931751 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
1694 ; CHECK-NEXT: retq # sched: [7:1.00]
1752 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
16951753 %b = sitofp <16 x i8> %a to <16 x double>
16961754 ret <16 x double> %b
16971755 }
17421800 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
17431801 ; CHECK-NEXT: vpmovm2d %k0, %ymm1
17441802 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
1745 ; CHECK-NEXT: retq # sched: [7:1.00]
1803 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
17461804 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
17471805 %1 = sitofp <16 x i1> %cmpres to <16 x double>
17481806 ret <16 x double> %1
17811839 ; CHECK-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
17821840 ; CHECK-NEXT: vpmovm2d %k0, %ymm0
17831841 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1784 ; CHECK-NEXT: retq # sched: [7:1.00]
1842 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
17851843 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
17861844 %1 = sitofp <8 x i1> %cmpres to <8 x double>
17871845 ret <8 x double> %1
18211879 ; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
18221880 ; CHECK-NEXT: vpmovm2d %k0, %ymm0
18231881 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
1824 ; CHECK-NEXT: retq # sched: [7:1.00]
1882 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
18251883 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
18261884 %1 = sitofp <8 x i1> %cmpres to <8 x float>
18271885 ret <8 x float> %1
18341892 ; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
18351893 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
18361894 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
1837 ; CHECK-NEXT: retq # sched: [7:1.00]
1895 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
18381896 ; VLDQ-LABEL: sbto4f32:
18391897 ; VLDQ: # BB#0:
18401898 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
18621920 ; CHECK-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
18631921 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
18641922 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
1865 ; CHECK-NEXT: retq # sched: [7:1.00]
1923 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
18661924 ; VLDQ-LABEL: sbto4f64:
18671925 ; VLDQ: # BB#0:
18681926 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
18901948 ; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
18911949 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
18921950 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
1893 ; CHECK-NEXT: retq # sched: [7:1.00]
1951 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
18941952 ; VLDQ-LABEL: sbto2f32:
18951953 ; VLDQ: # BB#0:
18961954 ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
19181976 ; CHECK-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
19191977 ; CHECK-NEXT: vpmovm2q %k0, %xmm0
19201978 ; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
1921 ; CHECK-NEXT: retq # sched: [7:1.00]
1979 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19221980 ; VLDQ-LABEL: sbto2f64:
19231981 ; VLDQ: # BB#0:
19241982 ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
19482006 ; CHECK: # BB#0:
19492007 ; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
19502008 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1951 ; CHECK-NEXT: retq # sched: [7:1.00]
2009 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19522010 %b = uitofp <16 x i8> %a to <16 x float>
19532011 ret <16 x float>%b
19542012 }
19592017 ; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
19602018 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
19612019 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1962 ; CHECK-NEXT: retq # sched: [7:1.00]
2020 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19632021 %b = uitofp <8 x i8> %a to <8 x double>
19642022 ret <8 x double> %b
19652023 }
19692027 ; CHECK: # BB#0:
19702028 ; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
19712029 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
1972 ; CHECK-NEXT: retq # sched: [7:1.00]
2030 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19732031 %b = sitofp <16 x i16> %a to <16 x float>
19742032 ret <16 x float> %b
19752033 }
19792037 ; CHECK: # BB#0:
19802038 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
19812039 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
1982 ; CHECK-NEXT: retq # sched: [7:1.00]
2040 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19832041 %b = sitofp <8 x i16> %a to <8 x double>
19842042 ret <8 x double> %b
19852043 }
19912049 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
19922050 ; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
19932051 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
1994 ; CHECK-NEXT: retq # sched: [7:1.00]
2052 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
19952053 %b = sitofp <16 x i16> %a to <16 x double>
19962054 ret <16 x double> %b
19972055 }
20032061 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
20042062 ; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
20052063 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2006 ; CHECK-NEXT: retq # sched: [7:1.00]
2064 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20072065 %b = uitofp <16 x i8> %a to <16 x double>
20082066 ret <16 x double> %b
20092067 }
20132071 ; CHECK: # BB#0:
20142072 ; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
20152073 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2016 ; CHECK-NEXT: retq # sched: [7:1.00]
2074 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20172075 %b = uitofp <16 x i16> %a to <16 x float>
20182076 ret <16 x float> %b
20192077 }
20232081 ; CHECK: # BB#0:
20242082 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
20252083 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
2026 ; CHECK-NEXT: retq # sched: [7:1.00]
2084 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20272085 %b = uitofp <8 x i16> %a to <8 x double>
20282086 ret <8 x double> %b
20292087 }
20352093 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
20362094 ; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
20372095 ; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
2038 ; CHECK-NEXT: retq # sched: [7:1.00]
2096 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20392097 %b = uitofp <16 x i16> %a to <16 x double>
20402098 ret <16 x double> %b
20412099 }
20442102 ; CHECK-LABEL: sito16f32:
20452103 ; CHECK: # BB#0:
20462104 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2047 ; CHECK-NEXT: retq # sched: [7:1.00]
2105 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20482106 %b = sitofp <16 x i32> %a to <16 x float>
20492107 ret <16 x float> %b
20502108 }
20562114 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
20572115 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00]
20582116 ; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
2059 ; CHECK-NEXT: retq # sched: [7:1.00]
2117 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20602118 %b = sitofp <16 x i32> %a to <16 x double>
20612119 ret <16 x double> %b
20622120 }
20662124 ; CHECK: # BB#0:
20672125 ; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
20682126 ; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
2069 ; CHECK-NEXT: retq # sched: [7:1.00]
2127 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20702128 %b = uitofp <16 x i16> %a to <16 x float>
20712129 ret <16 x float> %b
20722130 }
20782136 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00]
20792137 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
20802138 ; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33]
2081 ; CHECK-NEXT: retq # sched: [7:1.00]
2139 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20822140 %mask = icmp slt <16 x i32> %a, zeroinitializer
20832141 %1 = uitofp <16 x i1> %mask to <16 x float>
20842142 ret <16 x float> %1
20952153 ; CHECK-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00]
20962154 ; CHECK-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00]
20972155 ; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm1 # sched: [7:1.00]
2098 ; CHECK-NEXT: retq # sched: [7:1.00]
2156 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
20992157 %mask = icmp slt <16 x i32> %a, zeroinitializer
21002158 %1 = uitofp <16 x i1> %mask to <16 x double>
21012159 ret <16 x double> %1
21082166 ; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
21092167 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
21102168 ; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33]
2111 ; CHECK-NEXT: retq # sched: [7:1.00]
2169 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21122170 %mask = icmp slt <8 x i32> %a, zeroinitializer
21132171 %1 = uitofp <8 x i1> %mask to <8 x float>
21142172 ret <8 x float> %1
21212179 ; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
21222180 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
21232181 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
2124 ; CHECK-NEXT: retq # sched: [7:1.00]
2182 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21252183 %mask = icmp slt <8 x i32> %a, zeroinitializer
21262184 %1 = uitofp <8 x i1> %mask to <8 x double>
21272185 ret <8 x double> %1
21342192 ; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
21352193 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
21362194 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33]
2137 ; CHECK-NEXT: retq # sched: [7:1.00]
2195 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21382196 %mask = icmp slt <4 x i32> %a, zeroinitializer
21392197 %1 = uitofp <4 x i1> %mask to <4 x float>
21402198 ret <4 x float> %1
21472205 ; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
21482206 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
21492207 ; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
2150 ; CHECK-NEXT: retq # sched: [7:1.00]
2208 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21512209 %mask = icmp slt <4 x i32> %a, zeroinitializer
21522210 %1 = uitofp <4 x i1> %mask to <4 x double>
21532211 ret <4 x double> %1
21612219 ; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
21622220 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
21632221 ; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33]
2164 ; CHECK-NEXT: retq # sched: [7:1.00]
2222 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21652223 %mask = icmp ult <2 x i32> %a, zeroinitializer
21662224 %1 = uitofp <2 x i1> %mask to <2 x float>
21672225 ret <2 x float> %1
21752233 ; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
21762234 ; CHECK-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
21772235 ; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 # sched: [4:0.33]
2178 ; CHECK-NEXT: retq # sched: [7:1.00]
2236 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
21792237 ; VLDQ-LABEL: ubto2f64:
21802238 ; VLDQ: # BB#0:
21812239 ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
22072265 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
22082266 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
22092267 ; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
2210 ; CHECK-NEXT: retq # sched: [7:1.00]
2268 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2269 ; SKX-LABEL: zext_8x8mem_to_8x16:
2270 ; SKX: # BB#0:
2271 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2272 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2273 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
2274 ; SKX-NEXT: retq
22112275 %a = load <8 x i8>,<8 x i8> *%i,align 1
22122276 %x = zext <8 x i8> %a to <8 x i16>
22132277 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
22202284 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
22212285 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
22222286 ; CHECK-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2223 ; CHECK-NEXT: retq # sched: [7:1.00]
2287 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2288 ; SKX-LABEL: sext_8x8mem_to_8x16:
2289 ; SKX: # BB#0:
2290 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2291 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2292 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
2293 ; SKX-NEXT: retq
22242294 %a = load <8 x i8>,<8 x i8> *%i,align 1
22252295 %x = sext <8 x i8> %a to <8 x i16>
22262296 %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
22342304 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
22352305 ; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
22362306 ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
2237 ; CHECK-NEXT: retq # sched: [7:1.00]
2307 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2308 ; SKX-LABEL: zext_16x8mem_to_16x16:
2309 ; SKX: # BB#0:
2310 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2311 ; SKX-NEXT: vpmovb2m %xmm0, %k1
2312 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
2313 ; SKX-NEXT: retq
22382314 %a = load <16 x i8>,<16 x i8> *%i,align 1
22392315 %x = zext <16 x i8> %a to <16 x i16>
22402316 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
22472323 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
22482324 ; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
22492325 ; CHECK-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
2250 ; CHECK-NEXT: retq # sched: [7:1.00]
2326 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2327 ; SKX-LABEL: sext_16x8mem_to_16x16:
2328 ; SKX: # BB#0:
2329 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2330 ; SKX-NEXT: vpmovb2m %xmm0, %k1
2331 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
2332 ; SKX-NEXT: retq
22512333 %a = load <16 x i8>,<16 x i8> *%i,align 1
22522334 %x = sext <16 x i8> %a to <16 x i16>
22532335 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
22582340 ; CHECK-LABEL: zext_16x8_to_16x16:
22592341 ; CHECK: # BB#0:
22602342 ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
2261 ; CHECK-NEXT: retq # sched: [7:1.00]
2343 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
22622344 %x = zext <16 x i8> %a to <16 x i16>
22632345 ret <16 x i16> %x
22642346 }
22692351 ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
22702352 ; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
22712353 ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
2272 ; CHECK-NEXT: retq # sched: [7:1.00]
2354 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2355 ; SKX-LABEL: zext_16x8_to_16x16_mask:
2356 ; SKX: # BB#0:
2357 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2358 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2359 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2360 ; SKX-NEXT: retq
22732361 %x = zext <16 x i8> %a to <16 x i16>
22742362 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
22752363 ret <16 x i16> %ret
22792367 ; CHECK-LABEL: sext_16x8_to_16x16:
22802368 ; CHECK: # BB#0:
22812369 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
2282 ; CHECK-NEXT: retq # sched: [7:1.00]
2370 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
22832371 %x = sext <16 x i8> %a to <16 x i16>
22842372 ret <16 x i16> %x
22852373 }
22902378 ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
22912379 ; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
22922380 ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
2293 ; CHECK-NEXT: retq # sched: [7:1.00]
2381 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2382 ; SKX-LABEL: sext_16x8_to_16x16_mask:
2383 ; SKX: # BB#0:
2384 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2385 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2386 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
2387 ; SKX-NEXT: retq
22942388 %x = sext <16 x i8> %a to <16 x i16>
22952389 %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
22962390 ret <16 x i16> %ret
23022396 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
23032397 ; CHECK-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
23042398 ; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00]
2305 ; CHECK-NEXT: retq # sched: [7:1.00]
2399 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2400 ; SKX-LABEL: zext_32x8mem_to_32x16:
2401 ; SKX: # BB#0:
2402 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2403 ; SKX-NEXT: vpmovb2m %ymm0, %k1
2404 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero
2405 ; SKX-NEXT: retq
23062406 %a = load <32 x i8>,<32 x i8> *%i,align 1
23072407 %x = zext <32 x i8> %a to <32 x i16>
23082408 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
23152415 ; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
23162416 ; CHECK-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
23172417 ; CHECK-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
2318 ; CHECK-NEXT: retq # sched: [7:1.00]
2418 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2419 ; SKX-LABEL: sext_32x8mem_to_32x16:
2420 ; SKX: # BB#0:
2421 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
2422 ; SKX-NEXT: vpmovb2m %ymm0, %k1
2423 ; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
2424 ; SKX-NEXT: retq
23192425 %a = load <32 x i8>,<32 x i8> *%i,align 1
23202426 %x = sext <32 x i8> %a to <32 x i16>
23212427 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
23262432 ; CHECK-LABEL: zext_32x8_to_32x16:
23272433 ; CHECK: # BB#0:
23282434 ; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
2329 ; CHECK-NEXT: retq # sched: [7:1.00]
2435 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2436 ; SKX-LABEL: zext_32x8_to_32x16:
2437 ; SKX: # BB#0:
2438 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2439 ; SKX-NEXT: retq
23302440 %x = zext <32 x i8> %a to <32 x i16>
23312441 ret <32 x i16> %x
23322442 }
23372447 ; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
23382448 ; CHECK-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
23392449 ; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
2340 ; CHECK-NEXT: retq # sched: [7:1.00]
2450 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2451 ; SKX-LABEL: zext_32x8_to_32x16_mask:
2452 ; SKX: # BB#0:
2453 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
2454 ; SKX-NEXT: vpmovb2m %ymm1, %k1
2455 ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
2456 ; SKX-NEXT: retq
23412457 %x = zext <32 x i8> %a to <32 x i16>
23422458 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
23432459 ret <32 x i16> %ret
23472463 ; CHECK-LABEL: sext_32x8_to_32x16:
23482464 ; CHECK: # BB#0:
23492465 ; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00]
2350 ; CHECK-NEXT: retq # sched: [7:1.00]
2466 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2467 ; SKX-LABEL: sext_32x8_to_32x16:
2468 ; SKX: # BB#0:
2469 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
2470 ; SKX-NEXT: retq
23512471 %x = sext <32 x i8> %a to <32 x i16>
23522472 ret <32 x i16> %x
23532473 }
23582478 ; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
23592479 ; CHECK-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
23602480 ; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00]
2361 ; CHECK-NEXT: retq # sched: [7:1.00]
2481 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2482 ; SKX-LABEL: sext_32x8_to_32x16_mask:
2483 ; SKX: # BB#0:
2484 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
2485 ; SKX-NEXT: vpmovb2m %ymm1, %k1
2486 ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
2487 ; SKX-NEXT: retq
23622488 %x = sext <32 x i8> %a to <32 x i16>
23632489 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
23642490 ret <32 x i16> %ret
23702496 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
23712497 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
23722498 ; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
2373 ; CHECK-NEXT: retq # sched: [7:1.00]
2499 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2500 ; SKX-LABEL: zext_4x8mem_to_4x32:
2501 ; SKX: # BB#0:
2502 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2503 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2504 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
2505 ; SKX-NEXT: retq
23742506 %a = load <4 x i8>,<4 x i8> *%i,align 1
23752507 %x = zext <4 x i8> %a to <4 x i32>
23762508 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
23832515 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
23842516 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
23852517 ; CHECK-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2386 ; CHECK-NEXT: retq # sched: [7:1.00]
2518 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2519 ; SKX-LABEL: sext_4x8mem_to_4x32:
2520 ; SKX: # BB#0:
2521 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2522 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2523 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
2524 ; SKX-NEXT: retq
23872525 %a = load <4 x i8>,<4 x i8> *%i,align 1
23882526 %x = sext <4 x i8> %a to <4 x i32>
23892527 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
23962534 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
23972535 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
23982536 ; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
2399 ; CHECK-NEXT: retq # sched: [7:1.00]
2537 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2538 ; SKX-LABEL: zext_8x8mem_to_8x32:
2539 ; SKX: # BB#0:
2540 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2541 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2542 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
2543 ; SKX-NEXT: retq
24002544 %a = load <8 x i8>,<8 x i8> *%i,align 1
24012545 %x = zext <8 x i8> %a to <8 x i32>
24022546 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
24092553 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
24102554 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
24112555 ; CHECK-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
2412 ; CHECK-NEXT: retq # sched: [7:1.00]
2556 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2557 ; SKX-LABEL: sext_8x8mem_to_8x32:
2558 ; SKX: # BB#0:
2559 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2560 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2561 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
2562 ; SKX-NEXT: retq
24132563 %a = load <8 x i8>,<8 x i8> *%i,align 1
24142564 %x = sext <8 x i8> %a to <8 x i32>
24152565 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
24222572 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
24232573 ; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
24242574 ; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00]
2425 ; CHECK-NEXT: retq # sched: [7:1.00]
2575 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2576 ; SKX-LABEL: zext_16x8mem_to_16x32:
2577 ; SKX: # BB#0:
2578 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2579 ; SKX-NEXT: vpmovb2m %xmm0, %k1
2580 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero
2581 ; SKX-NEXT: retq
24262582 %a = load <16 x i8>,<16 x i8> *%i,align 1
24272583 %x = zext <16 x i8> %a to <16 x i32>
24282584 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
24352591 ; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
24362592 ; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
24372593 ; CHECK-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
2438 ; CHECK-NEXT: retq # sched: [7:1.00]
2594 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2595 ; SKX-LABEL: sext_16x8mem_to_16x32:
2596 ; SKX: # BB#0:
2597 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
2598 ; SKX-NEXT: vpmovb2m %xmm0, %k1
2599 ; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
2600 ; SKX-NEXT: retq
24392601 %a = load <16 x i8>,<16 x i8> *%i,align 1
24402602 %x = sext <16 x i8> %a to <16 x i32>
24412603 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
24482610 ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
24492611 ; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
24502612 ; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2451 ; CHECK-NEXT: retq # sched: [7:1.00]
2613 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2614 ; SKX-LABEL: zext_16x8_to_16x32_mask:
2615 ; SKX: # BB#0:
2616 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2617 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2618 ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
2619 ; SKX-NEXT: retq
24522620 %x = zext <16 x i8> %a to <16 x i32>
24532621 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
24542622 ret <16 x i32> %ret
24602628 ; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
24612629 ; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
24622630 ; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
2463 ; CHECK-NEXT: retq # sched: [7:1.00]
2631 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2632 ; SKX-LABEL: sext_16x8_to_16x32_mask:
2633 ; SKX: # BB#0:
2634 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
2635 ; SKX-NEXT: vpmovb2m %xmm1, %k1
2636 ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
2637 ; SKX-NEXT: retq
24642638 %x = sext <16 x i8> %a to <16 x i32>
24652639 %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
24662640 ret <16 x i32> %ret
24702644 ; CHECK-LABEL: zext_16x8_to_16x32:
24712645 ; CHECK: # BB#0:
24722646 ; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
2473 ; CHECK-NEXT: retq # sched: [7:1.00]
2647 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
24742648 %x = zext <16 x i8> %i to <16 x i32>
24752649 ret <16 x i32> %x
24762650 }
24792653 ; CHECK-LABEL: sext_16x8_to_16x32:
24802654 ; CHECK: # BB#0:
24812655 ; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
2482 ; CHECK-NEXT: retq # sched: [7:1.00]
2656 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
24832657 %x = sext <16 x i8> %i to <16 x i32>
24842658 ret <16 x i32> %x
24852659 }
24902664 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
24912665 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
24922666 ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
2493 ; CHECK-NEXT: retq # sched: [7:1.00]
2667 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2668 ; SKX-LABEL: zext_2x8mem_to_2x64:
2669 ; SKX: # BB#0:
2670 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
2671 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
2672 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
2673 ; SKX-NEXT: retq
24942674 %a = load <2 x i8>,<2 x i8> *%i,align 1
24952675 %x = zext <2 x i8> %a to <2 x i64>
24962676 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
25022682 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
25032683 ; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
25042684 ; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2505 ; CHECK-NEXT: retq # sched: [7:1.00]
2685 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2686 ; SKX-LABEL: sext_2x8mem_to_2x64mask:
2687 ; SKX: # BB#0:
2688 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
2689 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1
2690 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
2691 ; SKX-NEXT: retq
25062692 %a = load <2 x i8>,<2 x i8> *%i,align 1
25072693 %x = sext <2 x i8> %a to <2 x i64>
25082694 %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
25122698 ; CHECK-LABEL: sext_2x8mem_to_2x64:
25132699 ; CHECK: # BB#0:
25142700 ; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
2515 ; CHECK-NEXT: retq # sched: [7:1.00]
2701 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
25162702 %a = load <2 x i8>,<2 x i8> *%i,align 1
25172703 %x = sext <2 x i8> %a to <2 x i64>
25182704 ret <2 x i64> %x
25242710 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
25252711 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
25262712 ; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
2527 ; CHECK-NEXT: retq # sched: [7:1.00]
2713 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2714 ; SKX-LABEL: zext_4x8mem_to_4x64:
2715 ; SKX: # BB#0:
2716 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2717 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2718 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
2719 ; SKX-NEXT: retq
25282720 %a = load <4 x i8>,<4 x i8> *%i,align 1
25292721 %x = zext <4 x i8> %a to <4 x i64>
25302722 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
25372729 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
25382730 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
25392731 ; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
2540 ; CHECK-NEXT: retq # sched: [7:1.00]
2732 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2733 ; SKX-LABEL: sext_4x8mem_to_4x64mask:
2734 ; SKX: # BB#0:
2735 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2736 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2737 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
2738 ; SKX-NEXT: retq
25412739 %a = load <4 x i8>,<4 x i8> *%i,align 1
25422740 %x = sext <4 x i8> %a to <4 x i64>
25432741 %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
25482746 ; CHECK-LABEL: sext_4x8mem_to_4x64:
25492747 ; CHECK: # BB#0:
25502748 ; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
2551 ; CHECK-NEXT: retq # sched: [7:1.00]
2749 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
25522750 %a = load <4 x i8>,<4 x i8> *%i,align 1
25532751 %x = sext <4 x i8> %a to <4 x i64>
25542752 ret <4 x i64> %x
25602758 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
25612759 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
25622760 ; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
2563 ; CHECK-NEXT: retq # sched: [7:1.00]
2761 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2762 ; SKX-LABEL: zext_8x8mem_to_8x64:
2763 ; SKX: # BB#0:
2764 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2765 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2766 ; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
2767 ; SKX-NEXT: retq
25642768 %a = load <8 x i8>,<8 x i8> *%i,align 1
25652769 %x = zext <8 x i8> %a to <8 x i64>
25662770 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
25732777 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
25742778 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
25752779 ; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
2576 ; CHECK-NEXT: retq # sched: [7:1.00]
2780 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2781 ; SKX-LABEL: sext_8x8mem_to_8x64mask:
2782 ; SKX: # BB#0:
2783 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2784 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2785 ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
2786 ; SKX-NEXT: retq
25772787 %a = load <8 x i8>,<8 x i8> *%i,align 1
25782788 %x = sext <8 x i8> %a to <8 x i64>
25792789 %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
25842794 ; CHECK-LABEL: sext_8x8mem_to_8x64:
25852795 ; CHECK: # BB#0:
25862796 ; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
2587 ; CHECK-NEXT: retq # sched: [7:1.00]
2797 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
25882798 %a = load <8 x i8>,<8 x i8> *%i,align 1
25892799 %x = sext <8 x i8> %a to <8 x i64>
25902800 ret <8 x i64> %x
25962806 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
25972807 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
25982808 ; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
2599 ; CHECK-NEXT: retq # sched: [7:1.00]
2809 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2810 ; SKX-LABEL: zext_4x16mem_to_4x32:
2811 ; SKX: # BB#0:
2812 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2813 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2814 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
2815 ; SKX-NEXT: retq
26002816 %a = load <4 x i16>,<4 x i16> *%i,align 1
26012817 %x = zext <4 x i16> %a to <4 x i32>
26022818 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
26092825 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
26102826 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
26112827 ; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
2612 ; CHECK-NEXT: retq # sched: [7:1.00]
2828 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2829 ; SKX-LABEL: sext_4x16mem_to_4x32mask:
2830 ; SKX: # BB#0:
2831 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0
2832 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1
2833 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
2834 ; SKX-NEXT: retq
26132835 %a = load <4 x i16>,<4 x i16> *%i,align 1
26142836 %x = sext <4 x i16> %a to <4 x i32>
26152837 %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
26202842 ; CHECK-LABEL: sext_4x16mem_to_4x32:
26212843 ; CHECK: # BB#0:
26222844 ; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
2623 ; CHECK-NEXT: retq # sched: [7:1.00]
2845 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
26242846 %a = load <4 x i16>,<4 x i16> *%i,align 1
26252847 %x = sext <4 x i16> %a to <4 x i32>
26262848 ret <4 x i32> %x
26332855 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
26342856 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
26352857 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
2636 ; CHECK-NEXT: retq # sched: [7:1.00]
2858 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2859 ; SKX-LABEL: zext_8x16mem_to_8x32:
2860 ; SKX: # BB#0:
2861 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2862 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2863 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
2864 ; SKX-NEXT: retq
26372865 %a = load <8 x i16>,<8 x i16> *%i,align 1
26382866 %x = zext <8 x i16> %a to <8 x i32>
26392867 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
26462874 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
26472875 ; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
26482876 ; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
2649 ; CHECK-NEXT: retq # sched: [7:1.00]
2877 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2878 ; SKX-LABEL: sext_8x16mem_to_8x32mask:
2879 ; SKX: # BB#0:
2880 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
2881 ; SKX-NEXT: vpmovw2m %xmm0, %k1
2882 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
2883 ; SKX-NEXT: retq
26502884 %a = load <8 x i16>,<8 x i16> *%i,align 1
26512885 %x = sext <8 x i16> %a to <8 x i32>
26522886 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
26572891 ; CHECK-LABEL: sext_8x16mem_to_8x32:
26582892 ; CHECK: # BB#0:
26592893 ; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00]
2660 ; CHECK-NEXT: retq # sched: [7:1.00]
2894 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
26612895 %a = load <8 x i16>,<8 x i16> *%i,align 1
26622896 %x = sext <8 x i16> %a to <8 x i32>
26632897 ret <8 x i32> %x
26692903 ; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
26702904 ; CHECK-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
26712905 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2672 ; CHECK-NEXT: retq # sched: [7:1.00]
2906 ; CHECK-NEXT: ret{{[l|q]}} # sched: [7:1.00]
2907 ; SKX-LABEL: zext_8x16_to_8x32mask:
2908 ; SKX: # BB#0:
2909 ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
2910 ; SKX-NEXT: vpmovw2m %xmm1, %k1
2911 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2912 ; SKX-NEXT: retq
26732913 %x = zext <8 x i16> %a to <8 x i32>
26742914 %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
26752915 ret <8 x i32> %ret
26792919 ; CHECK-LABEL: zext_8x16_to_8x32:
26802920 ; CHECK: # BB#0:
26812921 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
2682 ; CHECK-NEXT: retq # sched: [7:1.00]
2922