Tree @release_37 (Download .tar.gz)
- ..
- GC
- 2003-08-03-CallArgLiveRanges.ll
- 2003-08-23-DeadBlockTest.ll
- 2003-11-03-GlobalBool.ll
- 2004-02-13-FrameReturnAddress.ll
- 2004-02-14-InefficientStackPointer.ll
- 2004-02-22-Casts.ll
- 2004-03-30-Select-Max.ll
- 2004-04-13-FPCMOV-Crash.ll
- 2004-06-10-StackifierCrash.ll
- 2004-10-08-SelectSetCCFold.ll
- 2005-01-17-CycleInDAG.ll
- 2005-02-14-IllegalAssembler.ll
- 2005-05-08-FPStackifierPHI.ll
- 2006-01-19-ISelFoldingBug.ll
- 2006-03-01-InstrSchedBug.ll
- 2006-03-02-InstrSchedBug.ll
- 2006-04-04-CrossBlockCrash.ll
- 2006-04-27-ISelFoldingBug.ll
- 2006-05-01-SchedCausingSpills.ll
- 2006-05-02-InstrSched1.ll
- 2006-05-02-InstrSched2.ll
- 2006-05-08-CoalesceSubRegClass.ll
- 2006-05-08-InstrSched.ll
- 2006-05-11-InstrSched.ll
- 2006-05-17-VectorArg.ll
- 2006-05-22-FPSetEQ.ll
- 2006-05-25-CycleInDAG.ll
- 2006-07-10-InlineAsmAConstraint.ll
- 2006-07-12-InlineAsmQConstraint.ll
- 2006-07-20-InlineAsm.ll
- 2006-07-28-AsmPrint-Long-As-Pointer.ll
- 2006-07-31-SingleRegClass.ll
- 2006-08-07-CycleInDAG.ll
- 2006-08-16-CycleInDAG.ll
- 2006-08-21-ExtraMovInst.ll
- 2006-09-01-CycleInDAG.ll
- 2006-10-02-BoolRetCrash.ll
- 2006-10-09-CycleInDAG.ll
- 2006-10-10-FindModifiedNodeSlotBug.ll
- 2006-10-12-CycleInDAG.ll
- 2006-10-13-CycleInDAG.ll
- 2006-10-19-SwitchUnnecessaryBranching.ll
- 2006-11-12-CSRetCC.ll
- 2006-11-17-IllegalMove.ll
- 2006-11-27-SelectLegalize.ll
- 2006-12-16-InlineAsmCrash.ll
- 2006-12-19-IntelSyntax.ll
- 2007-01-08-InstrSched.ll
- 2007-01-08-X86-64-Pointer.ll
- 2007-01-13-StackPtrIndex.ll
- 2007-01-29-InlineAsm-ir.ll
- 2007-02-04-OrAddrMode.ll
- 2007-02-16-BranchFold.ll
- 2007-02-19-LiveIntervalAssert.ll
- 2007-02-23-DAGCombine-Miscompile.ll
- 2007-02-25-FastCCStack.ll
- 2007-03-01-SpillerCrash.ll
- 2007-03-15-GEP-Idx-Sink.ll
- 2007-03-16-InlineAsm.ll
- 2007-03-18-LiveIntervalAssert.ll
- 2007-03-24-InlineAsmMultiRegConstraint.ll
- 2007-03-24-InlineAsmPModifier.ll
- 2007-03-24-InlineAsmVectorOp.ll
- 2007-03-24-InlineAsmXConstraint.ll
- 2007-03-26-CoalescerBug.ll
- 2007-04-08-InlineAsmCrash.ll
- 2007-04-11-InlineAsmVectorResult.ll
- 2007-04-17-LiveIntervalAssert.ll
- 2007-04-24-Huge-Stack.ll
- 2007-04-24-VectorCrash.ll
- 2007-04-27-InlineAsm-IntMemInput.ll
- 2007-05-05-Personality.ll
- 2007-05-05-VecCastExpand.ll
- 2007-05-14-LiveIntervalAssert.ll
- 2007-05-15-maskmovq.ll
- 2007-05-17-ShuffleISelBug.ll
- 2007-06-04-X86-64-CtorAsmBugs.ll
- 2007-06-28-X86-64-isel.ll
- 2007-06-29-DAGCombinerBug.ll
- 2007-06-29-VecFPConstantCSEBug.ll
- 2007-07-03-GR64ToVR64.ll
- 2007-07-10-StackerAssert.ll
- 2007-07-18-Vector-Extract.ll
- 2007-08-01-LiveVariablesBug.ll
- 2007-08-09-IllegalX86-64Asm.ll
- 2007-08-10-SignExtSubreg.ll
- 2007-08-13-AppendingLinkage.ll
- 2007-09-05-InvalidAsm.ll
- 2007-09-06-ExtWeakAliasee.ll
- 2007-09-27-LDIntrinsics.ll
- 2007-10-04-AvoidEFLAGSCopy.ll
- 2007-10-12-CoalesceExtSubReg.ll
- 2007-10-12-SpillerUnfold1.ll
- 2007-10-12-SpillerUnfold2.ll
- 2007-10-14-CoalescerCrash.ll
- 2007-10-15-CoalescerCrash.ll
- 2007-10-16-CoalescerCrash.ll
- 2007-10-19-SpillerUnfold.ll
- 2007-10-28-inlineasm-q-modifier.ll
- 2007-10-29-ExtendSetCC.ll
- 2007-10-30-LSRCrash.ll
- 2007-10-31-extractelement-i64.ll
- 2007-11-01-ISelCrash.ll
- 2007-11-03-x86-64-q-constraint.ll
- 2007-11-04-LiveIntervalCrash.ll
- 2007-11-04-LiveVariablesBug.ll
- 2007-11-04-rip-immediate-constant.ll
- 2007-11-06-InstrSched.ll
- 2007-11-07-MulBy4.ll
- 2007-11-30-LoadFolding-Bug.ll
- 2007-12-16-BURRSchedCrash.ll
- 2007-12-18-LoadCSEBug.ll
- 2008-01-08-IllegalCMP.ll
- 2008-01-08-SchedulerCrash.ll
- 2008-01-09-LongDoubleSin.ll
- 2008-01-16-FPStackifierAssert.ll
- 2008-01-16-InvalidDAGCombineXform.ll
- 2008-02-05-ISelCrash.ll
- 2008-02-06-LoadFoldingBug.ll
- 2008-02-14-BitMiscompile.ll
- 2008-02-18-TailMergingBug.ll
- 2008-02-20-InlineAsmClobber.ll
- 2008-02-22-LocalRegAllocBug.ll
- 2008-02-25-InlineAsmBug.ll
- 2008-02-25-X86-64-CoalescerBug.ll
- 2008-02-26-AsmDirectMemOp.ll
- 2008-02-27-DeadSlotElimBug.ll
- 2008-02-27-PEICrash.ll
- 2008-03-06-frem-fpstack.ll
- 2008-03-07-APIntBug.ll
- 2008-03-10-RegAllocInfLoop.ll
- 2008-03-12-ThreadLocalAlias.ll
- 2008-03-13-TwoAddrPassCrash.ll
- 2008-03-14-SpillerCrash.ll
- 2008-03-19-DAGCombinerBug.ll
- 2008-03-23-DarwinAsmComments.ll
- 2008-03-25-TwoAddrPassBug.ll
- 2008-03-31-SpillerFoldingBug.ll
- 2008-04-02-unnamedEH.ll
- 2008-04-08-CoalescerCrash.ll
- 2008-04-09-BranchFolding.ll
- 2008-04-15-LiveVariableBug.ll
- 2008-04-16-CoalescerBug.ll
- 2008-04-16-ReMatBug.ll
- 2008-04-17-CoalescerBug.ll
- 2008-04-24-MemCpyBug.ll
- 2008-04-24-pblendw-fold-crash.ll
- 2008-04-26-Asm-Optimize-Imm.ll
- 2008-04-28-CoalescerBug.ll
- 2008-04-28-CyclicSchedUnit.ll
- 2008-05-01-InvalidOrdCompare.ll
- 2008-05-09-PHIElimBug.ll
- 2008-05-09-ShuffleLoweringBug.ll
- 2008-05-12-tailmerge-5.ll
- 2008-05-21-CoalescerBug.ll
- 2008-05-22-FoldUnalignedLoad.ll
- 2008-05-28-CoalescerBug.ll
- 2008-05-28-LocalRegAllocBug.ll
- 2008-06-13-NotVolatileLoadStore.ll
- 2008-06-13-VolatileLoadStore.ll
- 2008-06-16-SubregsBug.ll
- 2008-06-25-VecISelBug.ll
- 2008-07-07-DanglingDeadInsts.ll
- 2008-07-09-ELFSectionAttributes.ll
- 2008-07-11-SHLBy1.ll
- 2008-07-16-CoalescerCrash.ll
- 2008-07-19-movups-spills.ll
- 2008-07-22-CombinerCrash.ll
- 2008-07-23-VSetCC.ll
- 2008-08-06-CmpStride.ll
- 2008-08-06-RewriterBug.ll
- 2008-08-17-UComiCodeGenBug.ll
- 2008-08-19-SubAndFetch.ll
- 2008-08-23-64Bit-maskmovq.ll
- 2008-08-31-EH_RETURN32.ll
- 2008-08-31-EH_RETURN64.ll
- 2008-09-05-sinttofp-2xi32.ll
- 2008-09-09-LinearScanBug.ll
- 2008-09-11-CoalescerBug.ll
- 2008-09-11-CoalescerBug2.ll
- 2008-09-17-inline-asm-1.ll
- 2008-09-18-inline-asm-2.ll
- 2008-09-19-RegAllocBug.ll
- 2008-09-25-sseregparm-1.ll
- 2008-09-26-FrameAddrBug.ll
- 2008-09-29-ReMatBug.ll
- 2008-09-29-VolatileBug.ll
- 2008-10-06-x87ld-nan-1.ll
- 2008-10-06-x87ld-nan-2.ll
- 2008-10-07-SSEISelBug.ll
- 2008-10-11-CallCrash.ll
- 2008-10-13-CoalescerBug.ll
- 2008-10-16-VecUnaryOp.ll
- 2008-10-17-Asm64bitRConstraint.ll
- 2008-10-20-AsmDoubleInI32.ll
- 2008-10-24-FlippedCompare.ll
- 2008-10-27-CoalescerBug.ll
- 2008-10-29-ExpandVAARG.ll
- 2008-11-03-F80VAARG.ll
- 2008-11-06-testb.ll
- 2008-11-13-inlineasm-3.ll
- 2008-11-29-ULT-Sign.ll
- 2008-12-01-loop-iv-used-outside-loop.ll
- 2008-12-01-SpillerAssert.ll
- 2008-12-02-dagcombine-1.ll
- 2008-12-02-dagcombine-2.ll
- 2008-12-02-dagcombine-3.ll
- 2008-12-02-IllegalResultType.ll
- 2008-12-16-dagcombine-4.ll
- 2008-12-19-EarlyClobberBug.ll
- 2008-12-22-dagcombine-5.ll
- 2008-12-23-crazy-address.ll
- 2008-12-23-dagcombine-6.ll
- 2009-01-13-DoubleUpdate.ll
- 2009-01-16-SchedulerBug.ll
- 2009-01-16-UIntToFP.ll
- 2009-01-18-ConstantExprCrash.ll
- 2009-01-25-NoSSE.ll
- 2009-01-26-WrongCheck.ll
- 2009-01-27-NullStrings.ll
- 2009-01-31-BigShift.ll
- 2009-01-31-BigShift2.ll
- 2009-01-31-BigShift3.ll
- 2009-02-01-LargeMask.ll
- 2009-02-03-AnalyzedTwice.ll
- 2009-02-04-sext-i64-gep.ll
- 2009-02-08-CoalescerBug.ll
- 2009-02-09-ivs-different-sizes.ll
- 2009-02-11-codegenprepare-reuse.ll
- 2009-02-12-DebugInfoVLA.ll
- 2009-02-12-InlineAsm-nieZ-constraints.ll
- 2009-02-12-SpillerBug.ll
- 2009-02-21-ExtWeakInitializer.ll
- 2009-02-25-CommuteBug.ll
- 2009-02-26-MachineLICMBug.ll
- 2009-03-03-BitcastLongDouble.ll
- 2009-03-03-BTHang.ll
- 2009-03-05-burr-list-crash.ll
- 2009-03-07-FPConstSelect.ll
- 2009-03-09-APIntCrash.ll
- 2009-03-09-SpillerBug.ll
- 2009-03-10-CoalescerBug.ll
- 2009-03-12-CPAlignBug.ll
- 2009-03-13-PHIElimBug.ll
- 2009-03-16-PHIElimInLPad.ll
- 2009-03-23-i80-fp80.ll
- 2009-03-23-LinearScanBug.ll
- 2009-03-23-MultiUseSched.ll
- 2009-03-25-TestBug.ll
- 2009-03-26-NoImplicitFPBug.ll
- 2009-04-12-FastIselOverflowCrash.ll
- 2009-04-12-picrel.ll
- 2009-04-13-2AddrAssert-2.ll
- 2009-04-13-2AddrAssert.ll
- 2009-04-14-IllegalRegs.ll
- 2009-04-16-SpillerUnfold.ll
- 2009-04-24.ll
- 2009-04-25-CoalescerBug.ll
- 2009-04-27-CoalescerAssert.ll
- 2009-04-27-LiveIntervalsAssert.ll
- 2009-04-27-LiveIntervalsAssert2.ll
- 2009-04-29-IndirectDestOperands.ll
- 2009-04-29-LinearScanBug.ll
- 2009-04-29-RegAllocAssert.ll
- 2009-04-scale.ll
- 2009-05-08-InlineAsmIOffset.ll
- 2009-05-11-tailmerge-crash.ll
- 2009-05-19-SingleElementExtractElement.ll
- 2009-05-23-available_externally.ll
- 2009-05-23-dagcombine-shifts.ll
- 2009-05-28-DAGCombineCrash.ll
- 2009-05-30-ISelBug.ll
- 2009-06-02-RewriterBug.ll
- 2009-06-03-Win64DisableRedZone.ll
- 2009-06-03-Win64SpillXMM.ll
- 2009-06-04-VirtualLiveIn.ll
- 2009-06-05-sitofpCrash.ll
- 2009-06-05-VariableIndexInsert.ll
- 2009-06-05-VZextByteShort.ll
- 2009-06-06-ConcatVectors.ll
- 2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
- 2009-06-15-not-a-tail-call.ll
- 2009-06-18-movlp-shuffle-register.ll
- 2009-07-06-TwoAddrAssert.ll
- 2009-07-07-SplitICmp.ll
- 2009-07-09-ExtractBoolFromVector.ll
- 2009-07-15-CoalescerBug.ll
- 2009-07-16-CoalescerBug.ll
- 2009-07-19-AsmExtraOperands.ll
- 2009-07-20-CoalescerBug.ll
- 2009-07-20-DAGCombineBug.ll
- 2009-08-06-branchfolder-crash.ll
- 2009-08-06-inlineasm.ll
- 2009-08-08-CastError.ll
- 2009-08-12-badswitch.ll
- 2009-08-14-Win64MemoryIndirectArg.ll
- 2009-08-19-LoadNarrowingMiscompile.ll
- 2009-08-23-SubRegReuseUndo.ll
- 2009-09-10-LoadFoldingBug.ll
- 2009-09-10-SpillComments.ll
- 2009-09-16-CoalescerBug.ll
- 2009-09-19-earlyclobber.ll
- 2009-09-21-NoSpillLoopCount.ll
- 2009-09-22-CoalescerBug.ll
- 2009-09-23-LiveVariablesBug.ll
- 2009-10-14-LiveVariablesBug.ll
- 2009-10-16-Scope.ll
- 2009-10-19-atomic-cmp-eflags.ll
- 2009-10-19-EmergencySpill.ll
- 2009-10-25-RewriterBug.ll
- 2009-11-04-SubregCoalescingBug.ll
- 2009-11-13-VirtRegRewriterBug.ll
- 2009-11-16-MachineLICM.ll
- 2009-11-16-UnfoldMemOpBug.ll
- 2009-11-17-UpdateTerminator.ll
- 2009-11-18-TwoAddrKill.ll
- 2009-11-25-ImpDefBug.ll
- 2009-12-01-EarlyClobberBug.ll
- 2009-12-11-TLSNoRedZone.ll
- 20090313-signext.ll
- 2010-01-05-ZExt-Shl.ll
- 2010-01-07-ISelBug.ll
- 2010-01-08-Atomic64Bug.ll
- 2010-01-11-ExtraPHIArg.ll
- 2010-01-13-OptExtBug.ll
- 2010-01-15-SelectionDAGCycle.ll
- 2010-01-18-DbgValue.ll
- 2010-01-19-OptExtBug.ll
- 2010-02-01-DbgValueCrash.ll
- 2010-02-01-TaillCallCrash.ll
- 2010-02-03-DualUndef.ll
- 2010-02-04-SchedulerBug.ll
- 2010-02-11-NonTemporal.ll
- 2010-02-12-CoalescerBug-Impdef.ll
- 2010-02-15-ImplicitDefBug.ll
- 2010-02-19-TailCallRetAddrBug.ll
- 2010-02-23-DAGCombineBug.ll
- 2010-02-23-DIV8rDefinesAX.ll
- 2010-02-23-RematImplicitSubreg.ll
- 2010-02-23-SingleDefPhiJoin.ll
- 2010-03-04-Mul8Bug.ll
- 2010-03-05-ConstantFoldCFG.ll
- 2010-03-05-EFLAGS-Redef.ll
- 2010-03-17-ISelBug.ll
- 2010-04-06-SSEDomainFixCrash.ll
- 2010-04-08-CoalescerBug.ll
- 2010-04-13-AnalyzeBranchCrash.ll
- 2010-04-21-CoalescerBug.ll
- 2010-04-29-CoalescerCrash.ll
- 2010-04-30-LocalAlloc-LandingPad.ll
- 2010-05-03-CoalescerSubRegClobber.ll
- 2010-05-05-LocalAllocEarlyClobber.ll
- 2010-05-06-LocalInlineAsmClobber.ll
- 2010-05-07-ldconvert.ll
- 2010-05-10-DAGCombinerBug.ll
- 2010-05-12-FastAllocKills.ll
- 2010-05-16-nosseconversion.ll
- 2010-05-25-DotDebugLoc.ll
- 2010-05-26-DotDebugLoc.ll
- 2010-05-26-FP_TO_INT-crash.ll
- 2010-05-28-Crash.ll
- 2010-06-01-DeadArg-DbgInfo.ll
- 2010-06-09-FastAllocRegisters.ll
- 2010-06-14-fast-isel-fs-load.ll
- 2010-06-15-FastAllocEarlyCLobber.ll
- 2010-06-24-g-constraint-crash.ll
- 2010-06-25-asm-RA-crash.ll
- 2010-06-25-CoalescerSubRegDefDead.ll
- 2010-06-28-FastAllocTiedOperand.ll
- 2010-06-28-matched-g-constraint.ll
- 2010-07-02-asm-alignstack.ll
- 2010-07-02-UnfoldBug.ll
- 2010-07-06-asm-RIP.ll
- 2010-07-06-DbgCrash.ll
- 2010-07-11-FPStackLoneUse.ll
- 2010-07-13-indirectXconstraint.ll
- 2010-07-15-Crash.ll
- 2010-07-29-SetccSimplify.ll
- 2010-08-04-MaskedSignedCompare.ll
- 2010-08-04-MingWCrash.ll
- 2010-08-04-StackVariable.ll
- 2010-09-01-RemoveCopyByCommutingDef.ll
- 2010-09-16-asmcrash.ll
- 2010-09-16-EmptyFilename.ll
- 2010-09-17-SideEffectsInChain.ll
- 2010-09-30-CMOV-JumpTable-PHI.ll
- 2010-10-08-cmpxchg8b.ll
- 2010-11-02-DbgParameter.ll
- 2010-11-09-MOVLPS.ll
- 2010-11-18-SelectOfExtload.ll
- 2011-01-07-LegalizeTypesCrash.ll
- 2011-01-10-DagCombineHang.ll
- 2011-01-24-DbgValue-Before-Use.ll
- 2011-02-04-FastRegallocNoFP.ll
- 2011-02-12-shuffle.ll
- 2011-02-21-VirtRegRewriter-KillSubReg.ll
- 2011-02-23-UnfoldBug.ll
- 2011-02-27-Fpextend.ll
- 2011-03-02-DAGCombiner.ll
- 2011-03-08-Sched-crash.ll
- 2011-03-09-Physreg-Coalescing.ll
- 2011-03-30-CreateFixedObjCrash.ll
- 2011-04-13-SchedCmpJmp.ll
- 2011-04-19-sclr-bb.ll
- 2011-05-09-loaduse.ll
- 2011-05-26-UnreachableBlockElim.ll
- 2011-05-27-CrossClassCoalescing.ll
- 2011-06-01-fildll.ll
- 2011-06-03-x87chain.ll
- 2011-06-06-fgetsign80bit.ll
- 2011-06-12-FastAllocSpill.ll
- 2011-06-14-mmx-inlineasm.ll
- 2011-06-14-PreschedRegalias.ll
- 2011-06-19-QuicksortCoalescerBug.ll
- 2011-07-13-BadFrameIndexDisplacement.ll
- 2011-08-23-PerformSubCombine128.ll
- 2011-08-23-Trampoline.ll
- 2011-08-29-BlockConstant.ll
- 2011-08-29-InitOrder.ll
- 2011-09-14-valcoalesce.ll
- 2011-09-18-sse2cmp.ll
- 2011-09-21-setcc-bug.ll
- 2011-10-11-SpillDead.ll
- 2011-10-11-srl.ll
- 2011-10-12-MachineCSE.ll
- 2011-10-18-FastISel-VectorParams.ll
- 2011-10-19-LegelizeLoad.ll
- 2011-10-19-widen_vselect.ll
- 2011-10-21-widen-cmp.ll
- 2011-10-27-tstore.ll
- 2011-10-30-padd.ll
- 2011-11-07-LegalizeBuildVector.ll
- 2011-11-22-AVX2-Domains.ll
- 2011-11-30-or.ll
- 2011-12-06-AVXVectorExtractCombine.ll
- 2011-12-06-BitcastVectorGlobal.ll
- 2011-12-08-AVXISelBugs.ll
- 2011-12-15-vec_shift.ll
- 2011-12-26-extractelement-duplicate-load.ll
- 2011-12-28-vselecti8.ll
- 2011-12-8-bitcastintprom.ll
- 2011-20-21-zext-ui2fp.ll
- 2012-01-10-UndefExceptionEdge.ll
- 2012-01-11-split-cv.ll
- 2012-01-12-extract-sv.ll
- 2012-01-16-mfence-nosse-flags.ll
- 2012-01-18-vbitcast.ll
- 2012-02-12-dagco.ll
- 2012-02-14-scalar.ll
- 2012-02-23-mmx-inlineasm.ll
- 2012-02-29-CoalescerBug.ll
- 2012-03-15-build_vector_wl.ll
- 2012-03-20-LargeConstantExpr.ll
- 2012-03-26-PostRALICMBug.ll
- 2012-04-09-TwoAddrPassBug.ll
- 2012-04-26-sdglue.ll
- 2012-05-17-TwoAddressBug.ll
- 2012-05-19-CoalescerCrash.ll
- 2012-07-10-extload64.ll
- 2012-07-10-shufnorm.ll
- 2012-07-15-broadcastfold.ll
- 2012-07-15-BuildVectorPromote.ll
- 2012-07-15-tconst_shl.ll
- 2012-07-15-vshl.ll
- 2012-07-16-fp2ui-i1.ll
- 2012-07-16-LeaUndef.ll
- 2012-07-17-vtrunc.ll
- 2012-07-23-select_cc.ll
- 2012-08-07-CmpISelBug.ll
- 2012-08-16-setcc.ll
- 2012-08-17-legalizer-crash.ll
- 2012-08-28-UnsafeMathCrash.ll
- 2012-09-13-dagco-fneg.ll
- 2012-09-28-CGPBug.ll
- 2012-1-10-buildvector.ll
- 2012-10-02-DAGCycle.ll
- 2012-10-03-DAGCycle.ll
- 2012-10-18-crash-dagco.ll
- 2012-11-28-merge-store-alias.ll
- 2012-11-30-handlemove-dbg.ll
- 2012-11-30-misched-dbg.ll
- 2012-11-30-regpres-dbg.ll
- 2012-12-06-python27-miscompile.ll
- 2012-12-1-merge-multiple.ll
- 2012-12-12-DAGCombineCrash.ll
- 2012-12-14-v8fp80-crash.ll
- 2012-12-19-NoImplicitFloat.ll
- 2013-01-09-DAGCombineBug.ll
- 2013-03-13-VEX-DestReg.ll
- 2013-05-06-ConactVectorCrash.ll
- 2013-10-14-FastISel-incorrect-vreg.ll
- 2014-05-29-factorial.ll
- 2014-05-30-CombineAddNSW.ll
- 2014-08-29-CompactUnwind.ll
- 3addr-16bit.ll
- 3addr-or.ll
- 3dnow-intrinsics.ll
- 4char-promote.ll
- 9601.ll
- abi-isel.ll
- add-of-carry.ll
- add.ll
- add32ri8.ll
- add_shl_constant.ll
- adde-carry.ll
- addr-label-difference.ll
- addr-mode-matcher.ll
- address-type-promotion-constantexpr.ll
- adx-intrinsics.ll
- aes_intrinsics.ll
- aliases.ll
- aligned-comm.ll
- aligned-variadic.ll
- alignment-2.ll
- alignment.ll
- all-ones-vector.ll
- alldiv-divdi3.ll
- alloca-align-rounding-32.ll
- alloca-align-rounding.ll
- allrem-moddi3.ll
- and-load-fold.ll
- and-or-fold.ll
- and-su.ll
- andimm8.ll
- anyext.ll
- anyregcc-crash.ll
- anyregcc.ll
- apm.ll
- arg-cast.ll
- asm-block-labels.ll
- asm-global-imm.ll
- asm-indirect-mem.ll
- asm-invalid-register-class-crasher.ll
- asm-label.ll
- asm-label2.ll
- asm-mismatched-types.ll
- asm-modifier-P.ll
- asm-modifier.ll
- asm-reg-type-mismatch.ll
- asm-reject-reg-type-mismatch.ll
- atom-bypass-slow-division-64.ll
- atom-bypass-slow-division.ll
- atom-call-reg-indirect-foldedreload32.ll
- atom-call-reg-indirect-foldedreload64.ll
- atom-call-reg-indirect.ll
- atom-cmpb.ll
- atom-fixup-lea1.ll
- atom-fixup-lea2.ll
- atom-fixup-lea3.ll
- atom-fixup-lea4.ll
- atom-lea-addw-bug.ll
- atom-lea-sp.ll
- atom-pad-short-functions.ll
- atom-sched.ll
- atom-shuf.ll
- atomic-dagsched.ll
- atomic-load-store-wide.ll
- atomic-load-store.ll
- atomic-minmax-i6432.ll
- atomic-ops-ancient-64.ll
- atomic-or.ll
- atomic-pointer.ll
- atomic128.ll
- atomic16.ll
- atomic32.ll
- atomic64.ll
- atomic6432.ll
- atomic8.ll
- atomic_add.ll
- atomic_idempotent.ll
- atomic_mi.ll
- atomic_op.ll
- Atomics-64.ll
- attribute-sections.ll
- avoid-lea-scale2.ll
- avoid-loop-align-2.ll
- avoid-loop-align.ll
- avoid_complex_am.ll
- avx-arith.ll
- avx-basic.ll
- avx-bitcast.ll
- avx-brcond.ll
- avx-cast.ll
- avx-cmp.ll
- avx-cvt-2.ll
- avx-cvt.ll
- avx-fp2int.ll
- avx-insertelt.ll
- avx-intel-ocl.ll
- avx-intrinsics-x86-upgrade.ll
- avx-intrinsics-x86.ll
- avx-intrinsics-x86_64.ll
- avx-load-store.ll
- avx-logic.ll
- avx-minmax.ll
- avx-select.ll
- avx-shift.ll
- avx-shuffle-x86_32.ll
- avx-splat.ll
- avx-trunc.ll
- avx-unpack.ll
- avx-varargs-x86_64.ll
- avx-vbroadcast.ll
- avx-vextractf128.ll
- avx-vinsertf128.ll
- avx-vperm2x128.ll
- avx-vzeroupper.ll
- avx-win64-args.ll
- avx-win64.ll
- avx.ll
- avx1-logical-load-folding.ll
- avx2-arith.ll
- avx2-cmp.ll
- avx2-conversions.ll
- avx2-gather.ll
- avx2-intrinsics-x86-upgrade.ll
- avx2-intrinsics-x86.ll
- avx2-logic.ll
- avx2-nontemporal.ll
- avx2-phaddsub.ll
- avx2-pmovxrm-intrinsics.ll
- avx2-shift.ll
- avx2-vbroadcast.ll
- avx2-vector-shifts.ll
- avx2-vperm.ll
- avx512-arith.ll
- avx512-bugfix-23634.ll
- avx512-build-vector.ll
- avx512-calling-conv.ll
- avx512-cmp.ll
- avx512-cvt.ll
- avx512-fma-intrinsics.ll
- avx512-fma.ll
- avx512-gather-scatter-intrin.ll
- avx512-i1test.ll
- avx512-inc-dec.ll
- avx512-insert-extract.ll
- avx512-intel-ocl.ll
- avx512-intrinsics.ll
- avx512-logic.ll
- avx512-mask-bugfix.ll
- avx512-mask-op.ll
- avx512-mov.ll
- avx512-nontemporal.ll
- avx512-round.ll
- avx512-scalar.ll
- avx512-select.ll
- avx512-shift.ll
- avx512-trunc-ext.ll
- avx512-vbroadcast.ll
- avx512-vec-cmp.ll
- avx512-vselect-crash.ll
- avx512bw-arith.ll
- avx512bw-intrinsics.ll
- avx512bw-mask-op.ll
- avx512bw-mov.ll
- avx512bw-vec-cmp.ll
- avx512bwvl-arith.ll
- avx512bwvl-intrinsics.ll
- avx512bwvl-mov.ll
- avx512bwvl-vec-cmp.ll
- avx512dq-mask-op.ll
- avx512dqvl-intrinsics.ll
- avx512er-intrinsics.ll
- avx512vl-arith.ll
- avx512vl-intrinsics.ll
- avx512vl-logic.ll
- avx512vl-mov.ll
- avx512vl-nontemporal.ll
- avx512vl-vec-cmp.ll
- barrier-sse.ll
- barrier.ll
- basic-promote-integers.ll
- bc-extract.ll
- bigstructret.ll
- bigstructret2.ll
- bit-test-shift.ll
- bitcast-i256.ll
- bitcast-int-to-vector.ll
- bitcast-mmx.ll
- bitcast.ll
- bitcast2.ll
- block-placement.ll
- bmi.ll
- bool-simplify.ll
- bool-zext.ll
- br-fold.ll
- branchfolding-landingpads.ll
- brcond.ll
- break-anti-dependencies.ll
- break-false-dep.ll
- bss_pagealigned.ll
- bswap-inline-asm.ll
- bswap-vector.ll
- bswap.ll
- bt.ll
- btq.ll
- buildvec-insertvec.ll
- byval-align.ll
- byval-callee-cleanup.ll
- byval.ll
- byval2.ll
- byval3.ll
- byval4.ll
- byval5.ll
- byval6.ll
- byval7.ll
- cache-intrinsic.ll
- call-imm.ll
- call-push.ll
- cas.ll
- catch.ll
- cfi.ll
- cfstring.ll
- chain_order.ll
- change-compare-stride-1.ll
- change-compare-stride-trickiness-0.ll
- change-compare-stride-trickiness-1.ll
- change-compare-stride-trickiness-2.ll
- clobber-fi0.ll
- clz.ll
- cmov-double.ll
- cmov-fp.ll
- cmov-into-branch.ll
- cmov.ll
- cmovcmov.ll
- cmp-fast-isel.ll
- cmp.ll
- cmpxchg-clobber-flags.ll
- cmpxchg-i1.ll
- cmpxchg-i128-i1.ll
- cmpxchg16b.ll
- coalesce-esp.ll
- coalesce-implicitdef.ll
- coalesce_commute_subreg.ll
- coalescer-commute1.ll
- coalescer-commute2.ll
- coalescer-commute3.ll
- coalescer-commute4.ll
- coalescer-commute5.ll
- coalescer-cross.ll
- coalescer-dce.ll
- coalescer-dce2.ll
- coalescer-identity.ll
- coalescer-remat.ll
- coalescer-subreg.ll
- code_placement.ll
- code_placement_align_all.ll
- code_placement_eh.ll
- code_placement_outline_optional_branches.ll
- codegen-prepare-addrmode-sext.ll
- codegen-prepare-cast.ll
- codegen-prepare-crash.ll
- codegen-prepare-extload.ll
- codegen-prepare.ll
- codemodel.ll
- coff-comdat.ll
- coff-comdat2.ll
- coff-comdat3.ll
- coff-feat00.ll
- coff-weak.ll
- coldcc64.ll
- combine-64bit-vec-binop.ll
- combine-and.ll
- combine-avx-intrinsics.ll
- combine-avx2-intrinsics.ll
- combine-lds.ll
- combine-or.ll
- combine-sse2-intrinsics.ll
- combine-sse41-intrinsics.ll
- combiner-aa-0.ll
- combiner-aa-1.ll
- commute-blend-avx2.ll
- commute-blend-sse41.ll
- commute-clmul.ll
- commute-fcmp.ll
- commute-intrinsic.ll
- commute-two-addr.ll
- commute-xop.ll
- commuted-blend-mask.ll
- compact-unwind.ll
- compare-add.ll
- compare-inf.ll
- compare_folding.ll
- compiler_used.ll
- complex-asm.ll
- complex-fca.ll
- computeKnownBits_urem.ll
- conditional-indecrement.ll
- const-base-addr.ll
- constant-combines.ll
- constant-hoisting-optnone.ll
- constant-hoisting-shift-immediate.ll
- constant-pool-remat-0.ll
- constant-pool-sharing.ll
- constpool.ll
- constructor.ll
- convert-2-addr-3-addr-inc64.ll
- copy-propagation.ll
- copysign-constant-magnitude.ll
- cppeh-nounwind.ll
- cpus.ll
- crash-nosse.ll
- crash-O0.ll
- crash.ll
- critical-anti-dep-breaker.ll
- critical-edge-split-2.ll
- cse-add-with-overflow.ll
- cstring.ll
- ctpop-combine.ll
- cvt16.ll
- cvtv2f32.ll
- dag-optnone.ll
- dag-rauw-cse.ll
- dagcombine-and-setcc.ll
- dagcombine-buildvector.ll
- dagcombine-cse.ll
- dagcombine-shifts.ll
- dagcombine-unsafe-math.ll
- darwin-bzero.ll
- darwin-no-dead-strip.ll
- darwin-quote.ll
- darwin-stub.ll
- dbg-changes-codegen-branch-folding.ll
- dbg-changes-codegen.ll
- dbg-combine.ll
- DbgValueOtherTargets.test
- disable-tail-calls.ll
- discontiguous-loops.ll
- div8.ll
- divide-by-constant.ll
- divrem.ll
- divrem8_ext.ll
- dllexport-x86_64.ll
- dllexport.ll
- dllimport-x86_64.ll
- dllimport.ll
- dollar-name.ll
- dont-trunc-store-double-to-float.ll
- dwarf-comp-dir.ll
- dwarf-eh-prepare.ll
- dyn-stackalloc.ll
- dyn_alloca_aligned.ll
- dynamic-alloca-in-entry.ll
- dynamic-alloca-lifetime.ll
- dynamic-allocas-VLAs.ll
- early-ifcvt-crash.ll
- early-ifcvt.ll
- eh-label.ll
- eh-nolandingpads.ll
- eh_frame.ll
- elf-comdat.ll
- elf-comdat2.ll
- emit-big-cst.ll
- empty-functions.ll
- empty-struct-return-type.ll
- epilogue.ll
- equiv_with_fndef.ll
- equiv_with_vardef.ll
- exception-label.ll
- exedeps-movq.ll
- exedepsfix-broadcast.ll
- expand-opaque-const.ll
- extend.ll
- extended-fma-contraction.ll
- extern_weak.ll
- extmul128.ll
- extmul64.ll
- extract-combine.ll
- extract-concat.ll
- extract-extract.ll
- extract-store.ll
- extractelement-from-arg.ll
- extractelement-index.ll
- extractelement-legalization-store-ordering.ll
- extractelement-load.ll
- extractelement-shuffle.ll
- extractps.ll
- f16c-intrinsics.ll
- fabs.ll
- fast-cc-callee-pops.ll
- fast-cc-merge-stack-adj.ll
- fast-cc-pass-in-regs.ll
- fast-isel-agg-constant.ll
- fast-isel-args-fail.ll
- fast-isel-args-fail2.ll
- fast-isel-args.ll
- fast-isel-atomic.ll
- fast-isel-avoid-unnecessary-pic-base.ll
- fast-isel-bail.ll
- fast-isel-bc.ll
- fast-isel-branch_weights.ll
- fast-isel-call-bool.ll
- fast-isel-call.ll
- fast-isel-cmp-branch.ll
- fast-isel-cmp-branch2.ll
- fast-isel-cmp-branch3.ll
- fast-isel-cmp.ll
- fast-isel-constant.ll
- fast-isel-constpool.ll
- fast-isel-constrain-store-indexreg.ll
- fast-isel-divrem-x86-64.ll
- fast-isel-divrem.ll
- fast-isel-double-half-convertion.ll
- fast-isel-expect.ll
- fast-isel-extract.ll
- fast-isel-float-half-convertion.ll
- fast-isel-fneg.ll
- fast-isel-fold-mem.ll
- fast-isel-fptrunc-fpext.ll
- fast-isel-gep.ll
- fast-isel-gv.ll
- fast-isel-i1.ll
- fast-isel-int-float-conversion.ll
- fast-isel-mem.ll
- fast-isel-movsbl-indexreg.ll
- fast-isel-ret-ext.ll
- fast-isel-select-cmov.ll
- fast-isel-select-cmov2.ll
- fast-isel-select-cmp.ll
- fast-isel-select-pseudo-cmov.ll
- fast-isel-select-sse.ll
- fast-isel-select.ll
- fast-isel-sext.ll
- fast-isel-sse12-fptoint.ll
- fast-isel-store.ll
- fast-isel-tailcall.ll
- fast-isel-tls.ll
- fast-isel-trunc-kill-subreg.ll
- fast-isel-vecload.ll
- fast-isel-x32.ll
- fast-isel-x86-64.ll
- fast-isel-x86.ll
- fast-isel.ll
- fastcall-correct-mangling.ll
- fastcc-2.ll
- fastcc-byval.ll
- fastcc-sret.ll
- fastcc.ll
- fastcc3struct.ll
- fastisel-gep-promote-before-add.ll
- fastmath-float-half-conversion.ll
- fcmove.ll
- fdiv-combine.ll
- fdiv.ll
- field-extract-use-trunc.ll
- fildll.ll
- float-asmprint.ll
- float-conv-elim.ll
- floor-soft-float.ll
- fltused.ll
- fltused_function_pointer.ll
- fma-do-not-commute.ll
- fma-intrinsics-phi-213-to-231.ll
- fma-intrinsics-x86.ll
- fma-phi-213-to-231.ll
- fma.ll
- fma4-intrinsics-x86_64-folded-load.ll
- fma_patterns.ll
- fma_patterns_wide.ll
- fmaxnum.ll
- fminnum.ll
- fmul-combines.ll
- fmul-zero.ll
- fnabs.ll
- fold-add.ll
- fold-and-shift.ll
- fold-call-2.ll
- fold-call-3.ll
- fold-call-oper.ll
- fold-call.ll
- fold-imm.ll
- fold-load-binops.ll
- fold-load-unops.ll
- fold-load-vec.ll
- fold-load.ll
- fold-mul-lohi.ll
- fold-pcmpeqd-1.ll
- fold-pcmpeqd-2.ll
- fold-sext-trunc.ll
- fold-tied-op.ll
- fold-vector-bv-crash.ll
- fold-vector-sext-crash.ll
- fold-vector-sext-crash2.ll
- fold-vector-sext-zext.ll
- fold-vector-shl-crash.ll
- fold-vector-shuffle-crash.ll
- fold-vector-trunc-sitofp.ll
- fold-vex.ll
- fold-xmm-zero.ll
- fold-zext-trunc.ll
- force-align-stack-alloca.ll
- force-align-stack.ll
- fp-double-rounding.ll
- fp-elim-and-no-fp-elim.ll
- fp-elim.ll
- fp-fast.ll
- fp-immediate-shorten.ll
- fp-in-intregs.ll
- fp-load-trunc.ll
- fp-select-cmp-and.ll
- fp-stack-2results.ll
- fp-stack-compare-cmov.ll
- fp-stack-compare.ll
- fp-stack-direct-ret.ll
- fp-stack-O0-crash.ll
- fp-stack-O0.ll
- fp-stack-ret-conv.ll
- fp-stack-ret-store.ll
- fp-stack-ret.ll
- fp-stack-retcopy.ll
- fp-stack-set-st1.ll
- fp-stack.ll
- fp-trunc.ll
- fp-une-cmp.ll
- fp2sint.ll
- fp_constant_op.ll
- fp_load_cast_fold.ll
- fp_load_fold.ll
- fpstack-debuginstr-kill.ll
- frame-base.ll
- frameaddr.ll
- frameescape.ll
- frameregister.ll
- fsgsbase.ll
- fsxor-alignment.ll
- full-lsr.ll
- function-subtarget-features-2.ll
- function-subtarget-features.ll
- ga-offset.ll
- gather-addresses.ll
- gcc_except_table.ll
- gcc_except_table_functions.ll
- getelementptr.ll
- ghc-cc.ll
- ghc-cc64.ll
- global-fill.ll
- global-sections-comdat.ll
- global-sections-tls.ll
- global-sections.ll
- gs-fold.ll
- h-register-addressing-32.ll
- h-register-addressing-64.ll
- h-register-store.ll
- h-registers-0.ll
- h-registers-1.ll
- h-registers-2.ll
- h-registers-3.ll
- haddsub-2.ll
- haddsub-undef.ll
- haddsub.ll
- half.ll
- handle-move.ll
- hidden-vis-2.ll
- hidden-vis-3.ll
- hidden-vis-4.ll
- hidden-vis-pic.ll
- hidden-vis.ll
- hipe-cc.ll
- hipe-cc64.ll
- hipe-prologue.ll
- hoist-common.ll
- hoist-invariant-load.ll
- huge-stack-offset.ll
- i128-and-beyond.ll
- i128-immediate.ll
- i128-mul.ll
- i128-ret.ll
- i128-sdiv.ll
- i1narrowfail.ll
- i256-add.ll
- i2k.ll
- i486-fence-loop.ll
- i64-mem-copy.ll
- iabs.ll
- ident-metadata.ll
- illegal-insert.ll
- illegal-vector-args-return.ll
- implicit-null-check-negative.ll
- implicit-null-check.ll
- imul-lea-2.ll
- imul-lea.ll
- imul.ll
- inalloca-ctor.ll
- inalloca-invoke.ll
- inalloca-regparm.ll
- inalloca-stdcall.ll
- inalloca.ll
- indirect-hidden.ll
- init-priority.ll
- inline-asm-2addr.ll
- inline-asm-bad-constraint-n.ll
- inline-asm-duplicated-constraint.ll
- inline-asm-error.ll
- inline-asm-flag-clobber.ll
- inline-asm-fpstack.ll
- inline-asm-h.ll
- inline-asm-modifier-n.ll
- inline-asm-modifier-q.ll
- inline-asm-mrv.ll
- inline-asm-out-regs.ll
- inline-asm-pic.ll
- inline-asm-ptr-cast.ll
- inline-asm-q-regs.ll
- inline-asm-R-constraint.ll
- inline-asm-sp-clobber-memcpy.ll
- inline-asm-stack-realign.ll
- inline-asm-stack-realign2.ll
- inline-asm-stack-realign3.ll
- inline-asm-tied.ll
- inline-asm-x-scalar.ll
- inline-asm.ll
- inlineasm-sched-bug.ll
- inreg.ll
- ins_split_regalloc.ll
- ins_subreg_coalesce-1.ll
- ins_subreg_coalesce-2.ll
- ins_subreg_coalesce-3.ll
- insert-positions.ll
- insertelement-copytoregs.ll
- insertelement-legalize.ll
- insertps-O0-bug.ll
- int-intrinsic.ll
- invalid-shift-immediate.ll
- isel-optnone.ll
- isel-sink.ll
- isel-sink2.ll
- isel-sink3.ll
- isint.ll
- isnan.ll
- isnan2.ll
- ispositive.ll
- jump_sign.ll
- label-redefinition.ll
- large-code-model-isel.ll
- large-constants.ll
- large-gep-chain.ll
- large-gep-scale.ll
- large-global.ll
- ldzero.ll
- lea-2.ll
- lea-3.ll
- lea-4.ll
- lea-5.ll
- lea-recursion.ll
- lea.ll
- leaf-fp-elim.ll
- legalize-fmp-oeq-vector-select.ll
- legalize-libcalls.ll
- legalize-shift-64.ll
- legalize-shl-vec.ll
- legalize-sub-zero-2.ll
- legalize-sub-zero.ll
- legalizedag_vec.ll
- lfence.ll
- libcall-sret.ll
- licm-dominance.ll
- licm-nested.ll
- licm-regpressure.ll
- licm-symbol.ll
- limited-prec.ll
- lit.local.cfg
- live-out-reg-info.ll
- liveness-local-regalloc.ll
- llc-override-mcpu-mattr.ll
- load-slice.ll
- lock-inst-encoding.ll
- log2_not_readnone.ll
- logical-load-fold.ll
- long-setcc.ll
- longlong-deadload.ll
- loop-blocks.ll
- loop-hoist.ll
- loop-strength-reduce-2.ll
- loop-strength-reduce-3.ll
- loop-strength-reduce.ll
- loop-strength-reduce2.ll
- loop-strength-reduce4.ll
- loop-strength-reduce5.ll
- loop-strength-reduce6.ll
- loop-strength-reduce7.ll
- loop-strength-reduce8.ll
- lower-bitcast.ll
- lower-vec-shift-2.ll
- lower-vec-shift.ll
- lower-vec-shuffle-bug.ll
- lsr-delayed-fold.ll
- lsr-i386.ll
- lsr-interesting-step.ll
- lsr-loop-exit-cond.ll
- lsr-negative-stride.ll
- lsr-nonaffine.ll
- lsr-normalization.ll
- lsr-overflow.ll
- lsr-quadratic-expand.ll
- lsr-redundant-addressing.ll
- lsr-reuse-trunc.ll
- lsr-reuse.ll
- lsr-sort.ll
- lsr-static-addr.ll
- lsr-wrap.ll
- lzcnt-tzcnt.ll
- lzcnt.ll
- machine-combiner.ll
- machine-cp.ll
- machine-cse.ll
- machine-trace-metrics-crash.ll
- MachineBranchProb.ll
- MachineSink-CritEdge.ll
- MachineSink-DbgValue.ll
- MachineSink-eflags.ll
- MachineSink-PHIUse.ll
- macho-comdat.ll
- masked-iv-safe.ll
- masked-iv-unsafe.ll
- masked_gather_scatter.ll
- masked_memop.ll
- maskmovdqu.ll
- mature-mc-support.ll
- mcinst-avx-lowering.ll
- mcinst-lowering.ll
- mem-intrin-base-reg.ll
- mem-promote-integers.ll
- membarrier.ll
- memcmp.ll
- memcpy-2.ll
- memcpy.ll
- memset-2.ll
- memset-3.ll
- memset-sse-stack-realignment.ll
- memset.ll
- memset64-on-x86-32.ll
- merge-consecutive-stores-i1.ll
- merge_store.ll
- MergeConsecutiveStores.ll
- mfence.ll
- mingw-alloca.ll
- misaligned-memset.ll
- misched-aa-colored.ll
- misched-aa-mmos.ll
- misched-balance.ll
- misched-code-difference-with-debug.ll
- misched-copy.ll
- misched-crash.ll
- misched-fusion.ll
- misched-ilp.ll
- misched-matmul.ll
- misched-matrix.ll
- misched-new.ll
- mmx-arg-passing-x86-64.ll
- mmx-arg-passing.ll
- mmx-arith.ll
- mmx-bitcast.ll
- mmx-copy-gprs.ll
- mmx-fold-load.ll
- mmx-intrinsics.ll
- mod128.ll
- movbe.ll
- movfs.ll
- movgs.ll
- movmsk.ll
- movntdq-no-avx.ll
- movtopush.ll
- ms-inline-asm.ll
- mul-legalize.ll
- mul-remat.ll
- mul-shift-reassoc.ll
- mul128.ll
- mul128_sext_loop.ll
- mul64.ll
- muloti.ll
- mult-alt-generic-i686.ll
- mult-alt-generic-x86_64.ll
- mult-alt-x86.ll
- multiple-loop-post-inc.ll
- multiple-return-values-cross-block.ll
- mulx32.ll
- mulx64.ll
- musttail-fastcall.ll
- musttail-indirect.ll
- musttail-thiscall.ll
- musttail-varargs.ll
- musttail.ll
- named-reg-alloc.ll
- named-reg-notareg.ll
- nancvt.ll
- narrow-shl-cst.ll
- narrow-shl-load.ll
- narrow_op-1.ll
- neg-shl-add.ll
- neg_cmp.ll
- neg_fp.ll
- negate-add-zero.ll
- negative-sin.ll
- negative-stride-fptosi-user.ll
- negative-subscript.ll
- negative_zero.ll
- newline-and-quote.ll
- no-cmov.ll
- nobt.ll
- nocx16.ll
- non-lazy-bind.ll
- non-unique-sections.ll
- nonconst-static-ev.ll
- nonconst-static-iv.ll
- nontemporal-2.ll
- nontemporal.ll
- norex-subreg.ll
- nosse-error1.ll
- nosse-error2.ll
- nosse-varargs.ll
- null-streamer.ll
- objc-gc-module-flags.ll
- object-size.ll
- opaque-constant-asm.ll
- opt-ext-uses.ll
- opt-shuff-tstore.ll
- optimize-max-0.ll
- optimize-max-1.ll
- optimize-max-2.ll
- optimize-max-3.ll
- or-address.ll
- or-branch.ll
- osx-private-labels.ll
- overlap-shift.ll
- packed_struct.ll
- palignr.ll
- pass-three.ll
- patchpoint-invoke.ll
- patchpoint-webkit_jscc.ll
- patchpoint.ll
- peep-setb.ll
- peep-test-0.ll
- peep-test-1.ll
- peep-test-2.ll
- peep-test-3.ll
- peep-test-4.ll
- peephole-fold-movsd.ll
- peephole-multiple-folds.ll
- personality.ll
- personality_size.ll
- phaddsub.ll
- phi-bit-propagation.ll
- phi-immediate-factoring.ll
- phielim-split.ll
- phys-reg-local-regalloc.ll
- phys_subreg_coalesce-2.ll
- phys_subreg_coalesce-3.ll
- phys_subreg_coalesce.ll
- pic-load-remat.ll
- pic.ll
- pic_jumptable.ll
- pmovext.ll
- pmovsx-inreg.ll
- pmul.ll
- pmulld.ll
- pointer-vector.ll
- popcnt.ll
- postalloc-coalescing.ll
- postra-licm.ll
- powi.ll
- pr10068.ll
- pr10475.ll
- pr10499.ll
- pr10523.ll
- pr10524.ll
- pr10525.ll
- pr10526.ll
- pr11202.ll
- pr11334.ll
- pr11415.ll
- pr11468.ll
- pr11985.ll
- pr11998.ll
- pr12312.ll
- pr12360.ll
- pr12889.ll
- pr13209.ll
- pr13220.ll
- pr13458.ll
- pr13577.ll
- pr13859.ll
- pr13899.ll
- pr14088.ll
- pr14098.ll
- pr14161.ll
- pr14204.ll
- pr14314.ll
- pr14333.ll
- pr14562.ll
- pr1462.ll
- pr1489.ll
- pr1505.ll
- pr1505b.ll
- pr15267.ll
- pr15296.ll
- pr15309.ll
- pr16031.ll
- pr16360.ll
- pr16807.ll
- pr17546.ll
- pr17631.ll
- pr17764.ll
- pr18014.ll
- pr18023.ll
- pr18054.ll
- pr18162.ll
- pr18846.ll
- pr19049.ll
- pr20020.ll
- pr20088.ll
- pr21099.ll
- pr21529.ll
- pr2177.ll
- pr21792.ll
- pr2182.ll
- pr22019.ll
- pr22103.ll
- pr22774.ll
- pr23103.ll
- pr23246.ll
- pr2326.ll
- pr23273.ll
- pr23603.ll
- pr23664.ll
- pr23900.ll
- pr24374.ll
- pr2656.ll
- pr2659.ll
- pr2849.ll
- pr2924.ll
- pr2982.ll
- pr3154.ll
- pr3216.ll
- pr3241.ll
- pr3243.ll
- pr3244.ll
- pr3250.ll
- pr3317.ll
- pr3366.ll
- pr3457.ll
- pr3522.ll
- pr5145.ll
- pr7882.ll
- pr9127.ll
- pr9743.ll
- pre-ra-sched.ll
- prefetch.ll
- prefixdata.ll
- preserve_allcc64.ll
- preserve_mostcc64.ll
- private-2.ll
- private.ll
- prologuedata.ll
- promote-assert-zext.ll
- promote-i16.ll
- promote-trunc.ll
- promote.ll
- pshufb-mask-comments.ll
- pshufd-combine-crash.ll
- psubus.ll
- ptr-rotate.ll
- ptrtoint-constexpr.ll
- ragreedy-bug.ll
- ragreedy-hoist-spill.ll
- ragreedy-last-chance-recoloring.ll
- rd-mod-wr-eflags.ll
- rdpmc.ll
- rdrand.ll
- rdseed.ll
- rdtsc.ll
- read-fp-no-frame-pointer.ll
- recip-fastmath.ll
- red-zone.ll
- red-zone2.ll
- regalloc-reconcile-broken-hints.ll
- reghinting.ll
- regpressure.ll
- rem.ll
- remat-constant.ll
- remat-fold-load.ll
- remat-invalid-liveness.ll
- remat-mov-0.ll
- remat-phys-dead.ll
- remat-scalar-zero.ll
- ret-addr.ll
- ret-i64-0.ll
- ret-mmx.ll
- return_zeroext_i2.ll
- returned-trunc-tail-calls.ll
- reverse_branches.ll
- rip-rel-address.ll
- rip-rel-lea.ll
- rodata-relocs.ll
- rot16.ll
- rot32.ll
- rot64.ll
- rotate.ll
- rotate2.ll
- rotate4.ll
- rounding-ops.ll
- rrlist-livereg-corrutpion.ll
- rtm.ll
- saddo-redundant-add.ll
- sandybridge-loads.ll
- scalar-extract.ll
- scalar-min-max-fill-operand.ll
- scalar_sse_minmax.ll
- scalar_widen_div.ll
- scalarize-bitcast.ll
- scev-interchange.ll
- scheduler-backtracking.ll
- sdiv-exact.ll
- segmented-stacks-dynamic.ll
- segmented-stacks.ll
- seh-catch-all-win32.ll
- seh-catch-all.ll
- seh-except-finally.ll
- seh-filter-no-personality.ll
- seh-filter.ll
- seh-finally.ll
- seh-safe-div-win32.ll
- seh-safe-div.ll
- seh-stack-realign-win32.ll
- seh-stack-realign.ll
- select-with-and-or.ll
- select.ll
- select_const.ll
- selectiondag-crash.ll
- selectiondag-cse.ll
- setcc-combine.ll
- setcc-lowering.ll
- setcc-narrowing.ll
- setcc-sentinals.ll
- setcc.ll
- setjmp-spills.ll
- setoeq.ll
- setuge.ll
- sext-i1.ll
- sext-load.ll
- sext-ret-val.ll
- sext-setcc-self.ll
- sext-subreg.ll
- sext-trunc.ll
- sfence.ll
- sha.ll
- shift-and.ll
- shift-avx2-crash.ll
- shift-bmi2.ll
- shift-coalesce.ll
- shift-codegen.ll
- shift-combine-crash.ll
- shift-combine.ll
- shift-double.ll
- shift-folding.ll
- shift-i128.ll
- shift-i256.ll
- shift-one.ll
- shift-pair.ll
- shift-parts.ll
- shift-pcmp.ll
- shl-anyext.ll
- shl-i64.ll
- shl_elim.ll
- shl_undef.ll
- shrink-compare.ll
- shrink-fp-const1.ll
- shrink-fp-const2.ll
- shuffle-combine-crash.ll
- sibcall-2.ll
- sibcall-3.ll
- sibcall-4.ll
- sibcall-5.ll
- sibcall-6.ll
- sibcall-byval.ll
- sibcall-win64.ll
- sibcall.ll
- simple-zext.ll
- sincos-opt.ll
- sincos.ll
- sink-blockfreq.ll
- sink-cheap-instructions.ll
- sink-hoist.ll
- sink-out-of-loop.ll
- sjlj-baseptr.ll
- sjlj.ll
- slow-div.ll
- slow-incdec.ll
- small-byval-memcpy.ll
- smul-with-overflow.ll
- soft-fp.ll
- splat-const.ll
- splat-for-size.ll
- split-eh-lpad-edges.ll
- split-vector-bitcast.ll
- split-vector-rem.ll
- sqrt-fastmath.ll
- sqrt.ll
- sret-implicit.ll
- sse-align-0.ll
- sse-align-1.ll
- sse-align-10.ll
- sse-align-11.ll
- sse-align-12.ll
- sse-align-2.ll
- sse-align-3.ll
- sse-align-4.ll
- sse-align-5.ll
- sse-align-6.ll
- sse-align-7.ll
- sse-align-8.ll
- sse-align-9.ll
- sse-commute.ll
- sse-domains.ll
- sse-fcopysign.ll
- sse-intel-ocl.ll
- sse-intrinsics-x86.ll
- sse-load-ret.ll
- sse-minmax.ll
- sse-scalar-fp-arith-unary.ll
- sse-scalar-fp-arith.ll
- sse-unaligned-mem-feature.ll
- sse-varargs.ll
- sse1.ll
- sse2-intrinsics-x86-upgrade.ll
- sse2-intrinsics-x86.ll
- sse2-vector-shifts.ll
- sse2.ll
- sse3-avx-addsub-2.ll
- sse3-avx-addsub.ll
- sse3-intrinsics-x86.ll
- sse3.ll
- sse41-intrinsics-x86-upgrade.ll
- sse41-intrinsics-x86.ll
- sse41-pmovxrm-intrinsics.ll
- sse41.ll
- sse42-intrinsics-x86.ll
- sse42.ll
- sse42_64.ll
- sse4a.ll
- sse_partial_update.ll
- sse_reload_fold.ll
- ssp-data-layout.ll
- ssse3-intrinsics-x86.ll
- stack-align-memcpy.ll
- stack-align.ll
- stack-align2.ll
- stack-folding-3dnow.ll
- stack-folding-fp-avx1.ll
- stack-folding-fp-sse42.ll
- stack-folding-int-avx1.ll
- stack-folding-int-avx2.ll
- stack-folding-int-sse42.ll
- stack-folding-mmx.ll
- stack-folding-x86_64.ll
- stack-folding-xop.ll
- stack-probe-size.ll
- stack-protector-dbginfo.ll
- stack-protector-vreg-to-vreg-copy.ll
- stack-protector-weight.ll
- stack-protector.ll
- stack-update-frame-opcode.ll
- stack_guard_remat.ll
- StackColoring-dbg.ll
- StackColoring.ll
- stackmap-fast-isel.ll
- stackmap-large-constants.ll
- stackmap-liveness.ll
- stackmap-nops.ll
- stackmap-shadow-optimization.ll
- stackmap.ll
- stackpointer.ll
- statepoint-allocas.ll
- statepoint-call-lowering.ll
- statepoint-far-call.ll
- statepoint-forward.ll
- statepoint-gctransition-call-lowering.ll
- statepoint-invoke.ll
- statepoint-stack-usage.ll
- statepoint-stackmap-format.ll
- stdarg.ll
- stdcall-notailcall.ll
- stdcall.ll
- store-empty-member.ll
- store-fp-constant.ll
- store-global-address.ll
- store-narrow.ll
- store_op_load_fold.ll
- store_op_load_fold2.ll
- stores-merging.ll
- storetrunc-fp.ll
- stride-nine-with-base-reg.ll
- stride-reuse.ll
- sub-with-overflow.ll
- sub.ll
- subreg-to-reg-0.ll
- subreg-to-reg-1.ll
- subreg-to-reg-2.ll
- subreg-to-reg-3.ll
- subreg-to-reg-4.ll
- subreg-to-reg-6.ll
- sunkaddr-ext.ll
- switch-bt.ll
- switch-crit-edge-constant.ll
- switch-default-only.ll
- switch-jump-table.ll
- switch-or.ll
- switch-order-weight.ll
- switch-zextload.ll
- switch.ll
- SwitchLowering.ll
- swizzle-2.ll
- swizzle-avx2.ll
- SwizzleShuff.ll
- system-intrinsics-64.ll
- system-intrinsics.ll
- tail-call-attrs.ll
- tail-call-got.ll
- tail-call-legality.ll
- tail-call-win64.ll
- tail-dup-addr.ll
- tail-opts.ll
- tail-threshold.ll
- tailcall-64.ll
- tailcall-calleesave.ll
- tailcall-cgp-dup.ll
- tailcall-disable.ll
- tailcall-fastisel.ll
- tailcall-largecode.ll
- tailcall-mem-intrinsics.ll
- tailcall-multiret.ll
- tailcall-returndup-void.ll
- tailcall-ri64.ll
- tailcall-stackalign.ll
- tailcall-structret.ll
- tailcall.ll
- tailcallbyval.ll
- tailcallbyval64.ll
- tailcallfp.ll
- tailcallfp2.ll
- tailcallpic1.ll
- tailcallpic2.ll
- tailcallpic3.ll
- tailcallstack64.ll
- targetLoweringGeneric.ll
- tbm-intrinsics-x86_64.ll
- tbm_patterns.ll
- test-nofold.ll
- test-shrink-bug.ll
- test-shrink.ll
- testb-je-fusion.ll
- testl-commute.ll
- this-return-64.ll
- tls-addr-non-leaf-function.ll
- tls-local-dynamic.ll
- tls-models.ll
- tls-pic.ll
- tls-pie.ll
- tls.ll
- tlv-1.ll
- tlv-2.ll
- tlv-3.ll
- trap.ll
- trunc-ext-ld-st.ll
- trunc-to-bool.ll
- TruncAssertZext.ll
- twoaddr-coalesce-2.ll
- twoaddr-coalesce-3.ll
- twoaddr-coalesce.ll
- twoaddr-lea.ll
- twoaddr-pass-sink.ll
- twoaddr-sink-terminator.ll
- uint64-to-float.ll
- uint_to_fp-2.ll
- uint_to_fp.ll
- umul-with-carry.ll
- umul-with-overflow.ll
- unaligned-32-byte-memops.ll
- unaligned-load.ll
- unaligned-spill-folding.ll
- undef-label.ll
- unknown-location.ll
- unreachable-loop-sinking.ll
- unwind-init.ll
- unwindraise.ll
- urem-i8-constant.ll
- use-add-flags.ll
- utf16-cfstrings.ll
- utf8.ll
- v2f32.ll
- v4f32-immediate.ll
- v4i32load-crash.ll
- v8i1-masks.ll
- vaargs.ll
- vararg-callee-cleanup.ll
- vararg_no_start.ll
- vararg_tailcall.ll
- variable-sized-darwin-bzero.ll
- variadic-node-pic.ll
- vastart-defs-eflags.ll
- vbinop-simplify-bug.ll
- vec-loadsingles-alignment.ll
- vec-sign.ll
- vec-trunc-store.ll
- vec_add.ll
- vec_align.ll
- vec_align_i256.ll
- vec_anyext.ll
- vec_call.ll
- vec_cast.ll
- vec_cast2.ll
- vec_compare-sse4.ll
- vec_compare.ll
- vec_ctbits.ll
- vec_ext_inreg.ll
- vec_extract-avx.ll
- vec_extract-mmx.ll
- vec_extract-sse4.ll
- vec_extract.ll
- vec_fabs.ll
- vec_floor.ll
- vec_fneg.ll
- vec_fp_to_int.ll
- vec_fpext.ll
- vec_i64.ll
- vec_ins_extract-1.ll
- vec_ins_extract.ll
- vec_insert-2.ll
- vec_insert-3.ll
- vec_insert-4.ll
- vec_insert-5.ll
- vec_insert-7.ll
- vec_insert-8.ll
- vec_insert-9.ll
- vec_insert-mmx.ll
- vec_int_to_fp.ll
- vec_loadsingles.ll
- vec_logical.ll
- vec_partial.ll
- vec_reassociate.ll
- vec_return.ll
- vec_round.ll
- vec_sdiv_to_shift.ll
- vec_set-2.ll
- vec_set-3.ll
- vec_set-4.ll
- vec_set-6.ll
- vec_set-7.ll
- vec_set-8.ll
- vec_set-A.ll
- vec_set-B.ll
- vec_set-C.ll
- vec_set-D.ll
- vec_set-F.ll
- vec_set-H.ll
- vec_set.ll
- vec_setcc-2.ll
- vec_setcc.ll
- vec_shift.ll
- vec_shift2.ll
- vec_shift3.ll
- vec_shift4.ll
- vec_shift5.ll
- vec_shift6.ll
- vec_shift7.ll
- vec_shuf-insert.ll
- vec_split.ll
- vec_ss_load_fold.ll
- vec_trunc_sext.ll
- vec_udiv_to_shift.ll
- vec_uint_to_fp.ll
- vec_unsafe-fp-math.ll
- vec_zero-2.ll
- vec_zero.ll
- vec_zero_cse.ll
- vector-blend.ll
- vector-gep.ll
- vector-idiv.ll
- vector-intrinsics.ll
- vector-lzcnt-128.ll
- vector-lzcnt-256.ll
- vector-popcnt-128.ll
- vector-popcnt-256.ll
- vector-rem.ll
- vector-sext.ll
- vector-shift-ashr-128.ll
- vector-shift-ashr-256.ll
- vector-shift-lshr-128.ll
- vector-shift-lshr-256.ll
- vector-shift-shl-128.ll
- vector-shift-shl-256.ll
- vector-shuffle-128-v16.ll
- vector-shuffle-128-v2.ll
- vector-shuffle-128-v4.ll
- vector-shuffle-128-v8.ll
- vector-shuffle-256-v16.ll
- vector-shuffle-256-v32.ll
- vector-shuffle-256-v4.ll
- vector-shuffle-256-v8.ll
- vector-shuffle-512-v16.ll
- vector-shuffle-512-v8.ll
- vector-shuffle-combining.ll
- vector-shuffle-mmx.ll
- vector-shuffle-sse1.ll
- vector-shuffle-sse4a.ll
- vector-trunc.ll
- vector-tzcnt-128.ll
- vector-tzcnt-256.ll
- vector-variable-idx.ll
- vector-variable-idx2.ll
- vector-zext.ll
- vector-zmov.ll
- vector.ll
- vectorcall.ll
- vfcmp.ll
- viabs.ll
- visibility.ll
- visibility2.ll
- volatile.ll
- vortex-bug.ll
- vselect-2.ll
- vselect-avx.ll
- vselect-minmax.ll
- vselect.ll
- vshift-1.ll
- vshift-2.ll
- vshift-3.ll
- vshift-4.ll
- vshift-5.ll
- vshift-6.ll
- vshift_scalar.ll
- vshift_split.ll
- vshift_split2.ll
- vsplit-and.ll
- warn-stack.ll
- weak.ll
- weak_def_can_be_hidden.ll
- webkit-jscc.ll
- wide-fma-contraction.ll
- wide-integer-fold.ll
- widen_arith-1.ll
- widen_arith-2.ll
- widen_arith-3.ll
- widen_arith-4.ll
- widen_arith-5.ll
- widen_arith-6.ll
- widen_cast-1.ll
- widen_cast-2.ll
- widen_cast-3.ll
- widen_cast-4.ll
- widen_cast-5.ll
- widen_cast-6.ll
- widen_conv-1.ll
- widen_conv-2.ll
- widen_conv-3.ll
- widen_conv-4.ll
- widen_conversions.ll
- widen_extract-1.ll
- widen_load-0.ll
- widen_load-1.ll
- widen_load-2.ll
- widen_shuffle-1.ll
- WidenArith.ll
- win32-eh-states.ll
- win32-eh.ll
- win32-pic-jumptable.ll
- win32_sret.ll
- win64_alloca_dynalloca.ll
- win64_call_epi.ll
- win64_eh.ll
- win64_frame.ll
- win64_nonvol.ll
- win64_params.ll
- win64_vararg.ll
- win_chkstk.ll
- win_cst_pool.ll
- win_eh_prepare.ll
- win_ftol2.ll
- windows-itanium-alloca.ll
- x32-function_pointer-1.ll
- x32-function_pointer-2.ll
- x32-function_pointer-3.ll
- x32-lea-1.ll
- x86-32-vector-calling-conv.ll
- x86-64-and-mask.ll
- x86-64-arg.ll
- x86-64-asm.ll
- x86-64-baseptr.ll
- x86-64-call.ll
- x86-64-dead-stack-adjust.ll
- x86-64-disp.ll
- x86-64-double-precision-shift-left.ll
- x86-64-double-precision-shift-right.ll
- x86-64-double-shifts-Oz-Os-O2.ll
- x86-64-double-shifts-var.ll
- x86-64-extend-shift.ll
- x86-64-gv-offset.ll
- x86-64-jumps.ll
- x86-64-mem.ll
- x86-64-pic-1.ll
- x86-64-pic-10.ll
- x86-64-pic-11.ll
- x86-64-pic-2.ll
- x86-64-pic-3.ll
- x86-64-pic-4.ll
- x86-64-pic-5.ll
- x86-64-pic-6.ll
- x86-64-pic-7.ll
- x86-64-pic-8.ll
- x86-64-pic-9.ll
- x86-64-psub.ll
- x86-64-ptr-arg-simple.ll
- x86-64-ret0.ll
- x86-64-shortint.ll
- x86-64-sret-return-2.ll
- x86-64-sret-return.ll
- x86-64-stack-and-frame-ptr.ll
- x86-64-static-relo-movl.ll
- x86-64-tls-1.ll
- x86-64-varargs.ll
- x86-fold-pshufb.ll
- x86-framelowering-trap.ll
- x86-inline-asm-validation.ll
- x86-mixed-alignment-dagcombine.ll
- x86-setcc-int-to-fp-combine.ll
- x86-shifts.ll
- x86-shrink-wrapping.ll
- x86-store-gv-addr.ll
- x86-upgrade-avx-vbroadcast.ll
- x86-upgrade-avx2-vbroadcast.ll
- x86_64-mul-by-const.ll
- xaluo.ll
- xmm-r64.ll
- xmulo.ll
- xop-intrinsics-x86_64.ll
- xor-icmp.ll
- xor.ll
- xtest.ll
- zero-remat.ll
- zext-extract_subreg.ll
- zext-fold.ll
- zext-inreg-0.ll
- zext-inreg-1.ll
- zext-sext.ll
- zext-shl.ll
- zext-trunc.ll
- zlib-longest-match.ll
sse41.ll @release_37 — raw · history · blame
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 | ; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X64
@g16 = external global i16
define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
; X32-LABEL: pinsrd_1:
; X32: ## BB#0:
; X32-NEXT: pinsrd $1, {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: pinsrd_1:
; X64: ## BB#0:
; X64-NEXT: pinsrd $1, %edi, %xmm0
; X64-NEXT: retq
%tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
ret <4 x i32> %tmp1
}
define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
; X32-LABEL: pinsrb_1:
; X32: ## BB#0:
; X32-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: pinsrb_1:
; X64: ## BB#0:
; X64-NEXT: pinsrb $1, %edi, %xmm0
; X64-NEXT: retq
%tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
ret <16 x i8> %tmp1
}
define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
; X32-LABEL: pmovsxbd_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pmovsxbd (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: pmovsxbd_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: pmovsxbd (%rdi), %xmm0
; X64-NEXT: retq
entry:
%0 = load i32, i32* %p, align 4
%1 = insertelement <4 x i32> undef, i32 %0, i32 0
%2 = insertelement <4 x i32> %1, i32 0, i32 1
%3 = insertelement <4 x i32> %2, i32 0, i32 2
%4 = insertelement <4 x i32> %3, i32 0, i32 3
%5 = bitcast <4 x i32> %4 to <16 x i8>
%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
%7 = bitcast <4 x i32> %6 to <2 x i64>
ret <2 x i64> %7
}
define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
; X32-LABEL: pmovsxwd_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pmovsxwd (%eax), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: pmovsxwd_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: pmovsxwd (%rdi), %xmm0
; X64-NEXT: retq
entry:
%0 = load i64, i64* %p ; <i64> [#uses=1]
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 ; <<2 x i64>> [#uses=1]
%1 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1]
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone ; <<4 x i32>> [#uses=1]
%3 = bitcast <4 x i32> %2 to <2 x i64> ; <<2 x i64>> [#uses=1]
ret <2 x i64> %3
}
define <2 x i64> @pmovzxbq_1() nounwind {
; X32-LABEL: pmovzxbq_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl L_g16$non_lazy_ptr, %eax
; X32-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: pmovzxbq_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: movq _g16@{{.*}}(%rip), %rax
; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
entry:
%0 = load i16, i16* @g16, align 2 ; <i16> [#uses=1]
%1 = insertelement <8 x i16> undef, i16 %0, i32 0 ; <<8 x i16>> [#uses=1]
%2 = bitcast <8 x i16> %1 to <16 x i8> ; <<16 x i8>> [#uses=1]
%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone ; <<2 x i64>> [#uses=1]
ret <2 x i64> %3
}
declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define i32 @extractps_1(<4 x float> %v) nounwind {
; X32-LABEL: extractps_1:
; X32: ## BB#0:
; X32-NEXT: extractps $3, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: extractps_1:
; X64: ## BB#0:
; X64-NEXT: extractps $3, %xmm0, %eax
; X64-NEXT: retq
%s = extractelement <4 x float> %v, i32 3
%i = bitcast float %s to i32
ret i32 %i
}
define i32 @extractps_2(<4 x float> %v) nounwind {
; X32-LABEL: extractps_2:
; X32: ## BB#0:
; X32-NEXT: extractps $3, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: extractps_2:
; X64: ## BB#0:
; X64-NEXT: extractps $3, %xmm0, %eax
; X64-NEXT: retq
%t = bitcast <4 x float> %v to <4 x i32>
%s = extractelement <4 x i32> %t, i32 3
ret i32 %s
}
; The non-store form of extractps puts its result into a GPR.
; This makes it suitable for an extract from a <4 x float> that
; is bitcasted to i32, but unsuitable for much of anything else.
define float @ext_1(<4 x float> %v) nounwind {
; X32-LABEL: ext_1:
; X32: ## BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X32-NEXT: addss LCPI7_0, %xmm0
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: ext_1:
; X64: ## BB#0:
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X64-NEXT: addss {{.*}}(%rip), %xmm0
; X64-NEXT: retq
%s = extractelement <4 x float> %v, i32 3
%t = fadd float %s, 1.0
ret float %t
}
define float @ext_2(<4 x float> %v) nounwind {
; X32-LABEL: ext_2:
; X32: ## BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: ext_2:
; X64: ## BB#0:
; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; X64-NEXT: retq
%s = extractelement <4 x float> %v, i32 3
ret float %s
}
define i32 @ext_3(<4 x i32> %v) nounwind {
; X32-LABEL: ext_3:
; X32: ## BB#0:
; X32-NEXT: pextrd $3, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: ext_3:
; X64: ## BB#0:
; X64-NEXT: pextrd $3, %xmm0, %eax
; X64-NEXT: retq
%i = extractelement <4 x i32> %v, i32 3
ret i32 %i
}
define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
; X32-LABEL: insertps_1:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_1:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
; X64-NEXT: retq
%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
ret <4 x float> %tmp1
}
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
; When optimizing for speed, prefer blendps over insertps even if it means we have to
; generate a separate movss to load the scalar operand.
define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind {
; X32-LABEL: blendps_not_insertps_1:
; X32: ## BB#0:
; X32-NEXT: movss {{.*#+}} xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: blendps_not_insertps_1:
; X64: ## BB#0:
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
ret <4 x float> %tmp1
}
; When optimizing for size, generate an insertps if there's a load fold opportunity.
; The difference between i386 and x86-64 ABIs for the float operand means we should
; generate an insertps for X32 but not for X64!
define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize nounwind {
; X32-LABEL: insertps_or_blendps:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_or_blendps:
; X64: ## BB#0:
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
ret <4 x float> %tmp1
}
; An insert into the low 32-bits of a vector from the low 32-bits of another vector
; is always just a blendps because blendps is never more expensive than insertps.
define <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nounwind {
; X32-LABEL: blendps_not_insertps_2:
; X32: ## BB#0:
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: blendps_not_insertps_2:
; X64: ## BB#0:
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp2 = extractelement <4 x float> %t2, i32 0
%tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
ret <4 x float> %tmp1
}
define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
; X32-LABEL: ptestz_1:
; X32: ## BB#0:
; X32-NEXT: ptest %xmm1, %xmm0
; X32-NEXT: sete %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: retl
;
; X64-LABEL: ptestz_1:
; X64: ## BB#0:
; X64-NEXT: ptest %xmm1, %xmm0
; X64-NEXT: sete %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: retq
%tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
ret i32 %tmp1
}
define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
; X32-LABEL: ptestz_2:
; X32: ## BB#0:
; X32-NEXT: ptest %xmm1, %xmm0
; X32-NEXT: sbbl %eax, %eax
; X32-NEXT: andl $1, %eax
; X32-NEXT: retl
;
; X64-LABEL: ptestz_2:
; X64: ## BB#0:
; X64-NEXT: ptest %xmm1, %xmm0
; X64-NEXT: sbbl %eax, %eax
; X64-NEXT: andl $1, %eax
; X64-NEXT: retq
%tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
ret i32 %tmp1
}
define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
; X32-LABEL: ptestz_3:
; X32: ## BB#0:
; X32-NEXT: ptest %xmm1, %xmm0
; X32-NEXT: seta %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: retl
;
; X64-LABEL: ptestz_3:
; X64: ## BB#0:
; X64-NEXT: ptest %xmm1, %xmm0
; X64-NEXT: seta %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: retq
%tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
ret i32 %tmp1
}
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
; This used to compile to insertps $0 + insertps $16. insertps $0 is always
; pointless.
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
; X32-LABEL: buildvector:
; X32: ## BB#0: ## %entry
; X32-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; X32-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X32-NEXT: addss %xmm1, %xmm0
; X32-NEXT: addss %xmm2, %xmm3
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: buildvector:
; X64: ## BB#0: ## %entry
; X64-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; X64-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-NEXT: addss %xmm1, %xmm0
; X64-NEXT: addss %xmm2, %xmm3
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
; X64-NEXT: retq
entry:
%tmp7 = extractelement <2 x float> %A, i32 0
%tmp5 = extractelement <2 x float> %A, i32 1
%tmp3 = extractelement <2 x float> %B, i32 0
%tmp1 = extractelement <2 x float> %B, i32 1
%add.r = fadd float %tmp7, %tmp3
%add.i = fadd float %tmp5, %tmp1
%tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
%tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
ret <2 x float> %tmp9
}
define <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
; X32-LABEL: insertps_from_shufflevector_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_shufflevector_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X64-NEXT: retq
entry:
%0 = load <4 x float>, <4 x float>* %pb, align 16
%vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x float> %vecinit6
}
define <4 x float> @insertps_from_shufflevector_2(<4 x float> %a, <4 x float> %b) {
; X32-LABEL: insertps_from_shufflevector_2:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_shufflevector_2:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
; X64-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
ret <4 x float> %vecinit6
}
; For loading an i32 from memory into an xmm register we use pinsrd
; instead of insertps
define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocapture readonly %pb) {
; X32-LABEL: pinsrd_from_shufflevector_i32:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: pinsrd_from_shufflevector_i32:
; X64: ## BB#0: ## %entry
; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
entry:
%0 = load <4 x i32>, <4 x i32>* %pb, align 16
%vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %vecinit6
}
define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
; X32-LABEL: insertps_from_shufflevector_i32_2:
; X32: ## BB#0: ## %entry
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_shufflevector_i32_2:
; X64: ## BB#0: ## %entry
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X64-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x i32> %vecinit6
}
define <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b) {
; X32-LABEL: insertps_from_load_ins_elt_undef:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_load_ins_elt_undef:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X64-NEXT: retq
%1 = load float, float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
ret <4 x float> %result
}
; TODO: Like on pinsrd_from_shufflevector_i32, remove this mov instr
define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
; X32-LABEL: insertps_from_load_ins_elt_undef_i32:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_load_ins_elt_undef_i32:
; X64: ## BB#0:
; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: retq
%1 = load i32, i32* %b, align 4
%2 = insertelement <4 x i32> undef, i32 %1, i32 0
%result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
ret <4 x i32> %result
}
;;;;;; Shuffles optimizable with a single insertps or blend instruction
define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_XYZ0:
; X32: ## BB#0:
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; X32-NEXT: retl
;
; X64-LABEL: shuf_XYZ0:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %x, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecext3 = extractelement <4 x float> %x, i32 2
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
ret <4 x float> %vecinit5
}
define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_XY00:
; X32: ## BB#0:
; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X32-NEXT: retl
;
; X64-LABEL: shuf_XY00:
; X64: ## BB#0:
; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %x, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_XYY0:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
; X32-NEXT: retl
;
; X64-LABEL: shuf_XYY0:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %x, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext1, i32 2
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
ret <4 x float> %vecinit5
}
define <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_XYW0:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
; X32-NEXT: retl
;
; X64-LABEL: shuf_XYW0:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %x, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecext2 = extractelement <4 x float> %x, i32 3
%vecinit3 = insertelement <4 x float> %vecinit2, float %vecext2, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.0, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_W00W:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: shuf_W00W:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 3
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.0, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float %vecext, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X00A:
; X32: ## BB#0:
; X32-NEXT: xorps %xmm2, %xmm2
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X00A:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm2, %xmm2
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
%vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
%vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x float> %vecinit4
}
define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X00X:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X00X:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
%vecinit2 = insertelement <4 x float> %vecinit1, float 0.0, i32 2
%vecinit4 = shufflevector <4 x float> %vecinit2, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x float> %vecinit4
}
define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X0YC:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X0YC:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
%vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
%vecinit5 = shufflevector <4 x float> %vecinit3, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
ret <4 x float> %vecinit5
}
define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYZ0:
; X32: ## BB#0:
; X32-NEXT: pxor %xmm1, %xmm1
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYZ0:
; X64: ## BB#0:
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext3 = extractelement <4 x i32> %x, i32 2
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
%vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
ret <4 x i32> %vecinit5
}
define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XY00:
; X32: ## BB#0:
; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XY00:
; X64: ## BB#0:
; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
ret <4 x i32> %vecinit4
}
define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYY0:
; X32: ## BB#0:
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYY0:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext1, i32 2
%vecinit5 = insertelement <4 x i32> %vecinit4, i32 0, i32 3
ret <4 x i32> %vecinit5
}
define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYW0:
; X32: ## BB#0:
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYW0:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext2 = extractelement <4 x i32> %x, i32 3
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 %vecext2, i32 2
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 0, i32 3
ret <4 x i32> %vecinit4
}
define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_W00W:
; X32: ## BB#0:
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_W00W:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 3
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1
%vecinit3 = insertelement <4 x i32> %vecinit2, i32 0, i32 2
%vecinit4 = insertelement <4 x i32> %vecinit3, i32 %vecext, i32 3
ret <4 x i32> %vecinit4
}
define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X00A:
; X32: ## BB#0:
; X32-NEXT: pxor %xmm2, %xmm2
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X00A:
; X64: ## BB#0:
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
%vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %vecinit4
}
define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X00X:
; X32: ## BB#0:
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X00X:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
; X64-NEXT: pxor %xmm0, %xmm0
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
%vecinit4 = shufflevector <4 x i32> %vecinit2, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %vecinit4
}
define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X0YC:
; X32: ## BB#0:
; X32-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X0YC:
; X64: ## BB#0:
; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
%vecinit3 = shufflevector <4 x i32> %vecinit1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 5, i32 undef>
%vecinit5 = shufflevector <4 x i32> %vecinit3, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
ret <4 x i32> %vecinit5
}
;; Test for a bug in the first implementation of LowerBuildVectorv4x32
define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
; X32-LABEL: test_insertps_no_undef:
; X32: ## BB#0:
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; X32-NEXT: maxps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_insertps_no_undef:
; X64: ## BB#0:
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
; X64-NEXT: maxps %xmm1, %xmm0
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %x, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecext3 = extractelement <4 x float> %x, i32 2
%vecinit4 = insertelement <4 x float> %vecinit2, float %vecext3, i32 2
%vecinit5 = insertelement <4 x float> %vecinit4, float 0.0, i32 3
%mask = fcmp olt <4 x float> %vecinit5, %x
%res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
ret <4 x float> %res
}
define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
; X32-LABEL: blendvb_fallback:
; X32: ## BB#0:
; X32-NEXT: psllw $15, %xmm0
; X32-NEXT: psraw $15, %xmm0
; X32-NEXT: pblendvb %xmm1, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: blendvb_fallback:
; X64: ## BB#0:
; X64-NEXT: psllw $15, %xmm0
; X64-NEXT: psraw $15, %xmm0
; X64-NEXT: pblendvb %xmm1, %xmm2
; X64-NEXT: movdqa %xmm2, %xmm0
; X64-NEXT: retq
%ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %ret
}
; On X32, account for the argument's move to registers
define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
; X32-LABEL: insertps_from_vector_load:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
ret <4 x float> %2
}
;; Use a non-zero CountS for insertps
;; Try to match a bit more of the instr, since we need the load's offset.
define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
; X32-LABEL: insertps_from_vector_load_offset:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
ret <4 x float> %2
}
;; Try to match a bit more of the instr, since we need the load's offset.
define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
; X32-LABEL: insertps_from_vector_load_offset_2:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: shll $4, %ecx
; X32-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset_2:
; X64: ## BB#0:
; X64-NEXT: shlq $4, %rsi
; X64-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
; X64-NEXT: retq
%1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
%2 = load <4 x float>, <4 x float>* %1, align 16
%3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
ret <4 x float> %3
}
define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
; X32-LABEL: insertps_from_broadcast_loadf32:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_broadcast_loadf32:
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
%1 = getelementptr inbounds float, float* %fb, i64 %index
%2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
%6 = insertelement <4 x float> %5, float %2, i32 3
%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
ret <4 x float> %7
}
define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
; X32-LABEL: insertps_from_broadcast_loadv4f32:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movups (%eax), %xmm1
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_broadcast_loadv4f32:
; X64: ## BB#0:
; X64-NEXT: movups (%rdi), %xmm1
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %b, align 4
%2 = extractelement <4 x float> %1, i32 0
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
%6 = insertelement <4 x float> %5, float %2, i32 3
%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
ret <4 x float> %7
}
;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
; X32-LABEL: insertps_from_broadcast_multiple_use:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X32-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
; X32-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
; X32-NEXT: addps %xmm1, %xmm0
; X32-NEXT: addps %xmm2, %xmm3
; X32-NEXT: addps %xmm3, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_broadcast_multiple_use:
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X64-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
; X64-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
; X64-NEXT: addps %xmm1, %xmm0
; X64-NEXT: addps %xmm2, %xmm3
; X64-NEXT: addps %xmm3, %xmm0
; X64-NEXT: retq
%1 = getelementptr inbounds float, float* %fb, i64 %index
%2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
%6 = insertelement <4 x float> %5, float %2, i32 3
%7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48)
%8 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %b, <4 x float> %6, i32 48)
%9 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %c, <4 x float> %6, i32 48)
%10 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %d, <4 x float> %6, i32 48)
%11 = fadd <4 x float> %7, %8
%12 = fadd <4 x float> %9, %10
%13 = fadd <4 x float> %11, %12
ret <4 x float> %13
}
define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
; X32-LABEL: insertps_with_undefs:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_with_undefs:
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: retq
%1 = load float, float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
ret <4 x float> %result
}
; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using
; the destination index to change the load, instead of the source index.
define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
; X32-LABEL: pr20087:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
; X32-NEXT: retl
;
; X64-LABEL: pr20087:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
; X64-NEXT: retq
%load = load <4 x float> , <4 x float> *%ptr
%ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
ret <4 x float> %ret
}
; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
; X32-LABEL: insertps_pr20411:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X32-NEXT: movdqu %xmm1, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: insertps_pr20411:
; X64: ## BB#0:
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X64-NEXT: movdqu %xmm1, (%rdi)
; X64-NEXT: retq
%shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
%ptrcast = bitcast i32* %RET to <4 x i32>*
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
ret void
}
define <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_4:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_4:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
%vecext2 = extractelement <4 x float> %B, i32 2
%vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_5:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_5:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %B, i32 1
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_6:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_6:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 1
%vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
%vecext1 = extractelement <4 x float> %B, i32 2
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
ret <4 x float> %vecinit3
}
define <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_7:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_7:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
%vecext2 = extractelement <4 x float> %B, i32 1
%vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_8:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_8:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
%vecext1 = extractelement <4 x float> %B, i32 0
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
define <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_9:
; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_9:
; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 0
%vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
%vecext1 = extractelement <4 x float> %B, i32 2
%vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
%vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
ret <4 x float> %vecinit3
}
define <4 x float> @insertps_10(<4 x float> %A)
; X32-LABEL: insertps_10:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_10:
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
; X64-NEXT: retq
{
%vecext = extractelement <4 x float> %A, i32 0
%vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
%vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
ret <4 x float> %vecbuild2
}
define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
; X32-LABEL: build_vector_to_shuffle_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: build_vector_to_shuffle_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 1
%vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
%vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x float> %vecinit3
}
define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
; X32-LABEL: build_vector_to_shuffle_2:
; X32: ## BB#0: ## %entry
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; X32-NEXT: retl
;
; X64-LABEL: build_vector_to_shuffle_2:
; X64: ## BB#0: ## %entry
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; X64-NEXT: retq
entry:
%vecext = extractelement <4 x float> %A, i32 1
%vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
%vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
ret <4 x float> %vecinit1
}
|