LLVM 19.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(RTLIB::MULO_I64, nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(RTLIB::MULO_I128, nullptr);
161
163
164 static const ISD::CondCode FPCCToExpand[] = {
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
219
220 if (Subtarget.is64Bit())
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
239 }
240
241 for (MVT VT : LSXVTs) {
245
249
252 }
253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
257 Legal);
259 VT, Legal);
266 Expand);
267 }
268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
271 }
272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
280 VT, Expand);
281 }
282 }
283
284 // Set operations for 'LASX' feature.
285
286 if (Subtarget.hasExtLASX()) {
287 for (MVT VT : LASXVTs) {
291
295
298 }
299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
303 Legal);
305 VT, Legal);
312 Expand);
313 }
314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
317 }
318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
326 VT, Expand);
327 }
328 }
329
330 // Set DAG combine for LA32 and LA64.
331
335
336 // Set DAG combine for 'LSX' feature.
337
338 if (Subtarget.hasExtLSX())
340
341 // Compute derived properties from the register classes.
343
345
348
350
352
353 // Function alignments.
355 // Set preferred alignments.
359}
360
362 const GlobalAddressSDNode *GA) const {
363 // In order to maximise the opportunity for common subexpression elimination,
364 // keep a separate ADD node for the global address offset instead of folding
365 // it in the global address node. Later peephole optimisations may choose to
366 // fold it back in when profitable.
367 return false;
368}
369
371 SelectionDAG &DAG) const {
372 switch (Op.getOpcode()) {
374 return lowerATOMIC_FENCE(Op, DAG);
376 return lowerEH_DWARF_CFA(Op, DAG);
378 return lowerGlobalAddress(Op, DAG);
380 return lowerGlobalTLSAddress(Op, DAG);
382 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
384 return lowerINTRINSIC_W_CHAIN(Op, DAG);
386 return lowerINTRINSIC_VOID(Op, DAG);
388 return lowerBlockAddress(Op, DAG);
389 case ISD::JumpTable:
390 return lowerJumpTable(Op, DAG);
391 case ISD::SHL_PARTS:
392 return lowerShiftLeftParts(Op, DAG);
393 case ISD::SRA_PARTS:
394 return lowerShiftRightParts(Op, DAG, true);
395 case ISD::SRL_PARTS:
396 return lowerShiftRightParts(Op, DAG, false);
398 return lowerConstantPool(Op, DAG);
399 case ISD::FP_TO_SINT:
400 return lowerFP_TO_SINT(Op, DAG);
401 case ISD::BITCAST:
402 return lowerBITCAST(Op, DAG);
403 case ISD::UINT_TO_FP:
404 return lowerUINT_TO_FP(Op, DAG);
405 case ISD::SINT_TO_FP:
406 return lowerSINT_TO_FP(Op, DAG);
407 case ISD::VASTART:
408 return lowerVASTART(Op, DAG);
409 case ISD::FRAMEADDR:
410 return lowerFRAMEADDR(Op, DAG);
411 case ISD::RETURNADDR:
412 return lowerRETURNADDR(Op, DAG);
414 return lowerWRITE_REGISTER(Op, DAG);
416 return lowerINSERT_VECTOR_ELT(Op, DAG);
418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
420 return lowerBUILD_VECTOR(Op, DAG);
422 return lowerVECTOR_SHUFFLE(Op, DAG);
423 }
424 return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428 SelectionDAG &DAG) const {
429 // TODO: custom shuffle.
430 return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434 if (Op->isUndef())
435 return true;
436 if (isa<ConstantSDNode>(Op))
437 return true;
438 if (isa<ConstantFPSDNode>(Op))
439 return true;
440 return false;
441}
442
444 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445 if (isConstantOrUndef(Op->getOperand(i)))
446 return true;
447 return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451 SelectionDAG &DAG) const {
452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453 EVT ResTy = Op->getValueType(0);
454 SDLoc DL(Op);
455 APInt SplatValue, SplatUndef;
456 unsigned SplatBitSize;
457 bool HasAnyUndefs;
458 bool Is128Vec = ResTy.is128BitVector();
459 bool Is256Vec = ResTy.is256BitVector();
460
461 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462 (!Subtarget.hasExtLASX() || !Is256Vec))
463 return SDValue();
464
465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466 /*MinSplatBits=*/8) &&
467 SplatBitSize <= 64) {
468 // We can only cope with 8, 16, 32, or 64-bit elements.
469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470 SplatBitSize != 64)
471 return SDValue();
472
473 EVT ViaVecTy;
474
475 switch (SplatBitSize) {
476 default:
477 return SDValue();
478 case 8:
479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480 break;
481 case 16:
482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483 break;
484 case 32:
485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486 break;
487 case 64:
488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489 break;
490 }
491
492 // SelectionDAG::getConstant will promote SplatValue appropriately.
493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494
495 // Bitcast to the type we originally wanted.
496 if (ViaVecTy != ResTy)
497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498
499 return Result;
500 }
501
502 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503 return Op;
504
506 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507 // The resulting code is the same length as the expansion, but it doesn't
508 // use memory operations.
509 EVT ResTy = Node->getValueType(0);
510
511 assert(ResTy.isVector());
512
513 unsigned NumElts = ResTy.getVectorNumElements();
514 SDValue Vector = DAG.getUNDEF(ResTy);
515 for (unsigned i = 0; i < NumElts; ++i) {
517 Node->getOperand(i),
518 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519 }
520 return Vector;
521 }
522
523 return SDValue();
524}
525
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528 SelectionDAG &DAG) const {
529 EVT VecTy = Op->getOperand(0)->getValueType(0);
530 SDValue Idx = Op->getOperand(1);
531 EVT EltTy = VecTy.getVectorElementType();
532 unsigned NumElts = VecTy.getVectorNumElements();
533
534 if (isa<ConstantSDNode>(Idx) &&
535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537 return Op;
538
539 return SDValue();
540}
541
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544 SelectionDAG &DAG) const {
545 if (isa<ConstantSDNode>(Op->getOperand(2)))
546 return Op;
547 return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551 SelectionDAG &DAG) const {
552 SDLoc DL(Op);
553 SyncScope::ID FenceSSID =
554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555
556 // singlethread fences only synchronize with signal handlers on the same
557 // thread and thus only need to preserve instruction order, not actually
558 // enforce memory ordering.
559 if (FenceSSID == SyncScope::SingleThread)
560 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563 return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567 SelectionDAG &DAG) const {
568
569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570 DAG.getContext()->emitError(
571 "On LA64, only 64-bit registers can be written.");
572 return Op.getOperand(0);
573 }
574
575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576 DAG.getContext()->emitError(
577 "On LA32, only 32-bit registers can be written.");
578 return Op.getOperand(0);
579 }
580
581 return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585 SelectionDAG &DAG) const {
586 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588 "be a constant integer");
589 return SDValue();
590 }
591
594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595 EVT VT = Op.getValueType();
596 SDLoc DL(Op);
597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598 unsigned Depth = Op.getConstantOperandVal(0);
599 int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601 while (Depth--) {
602 int Offset = -(GRLenInBytes * 2);
603 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
605 FrameAddr =
606 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607 }
608 return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612 SelectionDAG &DAG) const {
614 return SDValue();
615
616 // Currently only support lowering return address for current frame.
617 if (Op.getConstantOperandVal(0) != 0) {
618 DAG.getContext()->emitError(
619 "return address can only be determined for the current frame");
620 return SDValue();
621 }
622
625 MVT GRLenVT = Subtarget.getGRLenVT();
626
627 // Return the value of the return address register, marking it an implicit
628 // live-in.
629 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630 getRegClassFor(GRLenVT));
631 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635 SelectionDAG &DAG) const {
637 auto Size = Subtarget.getGRLen() / 8;
638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643 SelectionDAG &DAG) const {
645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647 SDLoc DL(Op);
648 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
650
651 // vastart just stores the address of the VarArgsFrameIndex slot into the
652 // memory location argument.
653 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659 SelectionDAG &DAG) const {
660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661 !Subtarget.hasBasicD() && "unexpected target features");
662
663 SDLoc DL(Op);
664 SDValue Op0 = Op.getOperand(0);
665 if (Op0->getOpcode() == ISD::AND) {
666 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668 return Op;
669 }
670
671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673 Op0.getConstantOperandVal(2) == UINT64_C(0))
674 return Op;
675
676 if (Op0.getOpcode() == ISD::AssertZext &&
677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678 return Op;
679
680 EVT OpVT = Op0.getValueType();
681 EVT RetVT = Op.getValueType();
682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683 MakeLibCallOptions CallOptions;
684 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685 SDValue Chain = SDValue();
687 std::tie(Result, Chain) =
688 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689 return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693 SelectionDAG &DAG) const {
694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695 !Subtarget.hasBasicD() && "unexpected target features");
696
697 SDLoc DL(Op);
698 SDValue Op0 = Op.getOperand(0);
699
700 if ((Op0.getOpcode() == ISD::AssertSext ||
702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703 return Op;
704
705 EVT OpVT = Op0.getValueType();
706 EVT RetVT = Op.getValueType();
707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708 MakeLibCallOptions CallOptions;
709 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710 SDValue Chain = SDValue();
712 std::tie(Result, Chain) =
713 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714 return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718 SelectionDAG &DAG) const {
719
720 SDLoc DL(Op);
721 SDValue Op0 = Op.getOperand(0);
722
723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727 }
728 return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732 SelectionDAG &DAG) const {
733
734 SDLoc DL(Op);
735
736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737 !Subtarget.hasBasicD()) {
738 SDValue Dst =
739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741 }
742
743 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746}
747
749 SelectionDAG &DAG, unsigned Flags) {
750 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751}
752
754 SelectionDAG &DAG, unsigned Flags) {
755 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756 Flags);
757}
758
760 SelectionDAG &DAG, unsigned Flags) {
761 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762 N->getOffset(), Flags);
763}
764
766 SelectionDAG &DAG, unsigned Flags) {
767 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
773 bool IsLocal) const {
774 SDLoc DL(N);
775 EVT Ty = getPointerTy(DAG.getDataLayout());
776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778 switch (M) {
779 default:
780 report_fatal_error("Unsupported code model");
781
782 case CodeModel::Large: {
783 assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785 // This is not actually used, but is necessary for successfully matching
786 // the PseudoLA_*_LARGE nodes.
787 SDValue Tmp = DAG.getConstant(0, DL, Ty);
788 if (IsLocal)
789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790 // eventually becomes the desired 5-insn code sequence.
791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792 Tmp, Addr),
793 0);
794
795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796 // becomes the desired 5-insn code sequence.
797 return SDValue(
798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799 0);
800 }
801
802 case CodeModel::Small:
804 if (IsLocal)
805 // This generates the pattern (PseudoLA_PCREL sym), which expands to
806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807 return SDValue(
808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813 0);
814 }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818 SelectionDAG &DAG) const {
819 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820 DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824 SelectionDAG &DAG) const {
825 return getAddr(cast<JumpTableSDNode>(Op), DAG,
826 DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830 SelectionDAG &DAG) const {
831 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832 DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836 SelectionDAG &DAG) const {
837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838 assert(N->getOffset() == 0 && "unexpected offset in global node");
839 auto CM = DAG.getTarget().getCodeModel();
840 const GlobalValue *GV = N->getGlobal();
841
842 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844 CM = *GCM;
845 }
846
847 return getAddr(N, DAG, CM, GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851 SelectionDAG &DAG,
852 unsigned Opc,
853 bool Large) const {
854 SDLoc DL(N);
855 EVT Ty = getPointerTy(DAG.getDataLayout());
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 // This is not actually used, but is necessary for successfully matching the
859 // PseudoLA_*_LARGE nodes.
860 SDValue Tmp = DAG.getConstant(0, DL, Ty);
861 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
863 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865
866 // Add the thread pointer.
867 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868 DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DAG.getDataLayout());
877 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
882
883 // Use a PC-relative addressing mode to access the dynamic GOT address.
884 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887
888 // Prepare argument list to generate call.
890 ArgListEntry Entry;
891 Entry.Node = Load;
892 Entry.Ty = CallTy;
893 Args.push_back(Entry);
894
895 // Setup call to __tls_get_addr.
897 CLI.setDebugLoc(DL)
898 .setChain(DAG.getEntryNode())
899 .setLibCallee(CallingConv::C, CallTy,
900 DAG.getExternalSymbol("__tls_get_addr", Ty),
901 std::move(Args));
902
903 return LowerCallTo(CLI).first;
904}
905
906SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
907 SelectionDAG &DAG, unsigned Opc,
908 bool Large) const {
909 SDLoc DL(N);
910 EVT Ty = getPointerTy(DAG.getDataLayout());
911 const GlobalValue *GV = N->getGlobal();
912
913 // This is not actually used, but is necessary for successfully matching the
914 // PseudoLA_*_LARGE nodes.
915 SDValue Tmp = DAG.getConstant(0, DL, Ty);
916
917 // Use a PC-relative addressing mode to access the global dynamic GOT address.
918 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
919 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
920 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
921 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
922}
923
925LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
926 SelectionDAG &DAG) const {
929 report_fatal_error("In GHC calling convention TLS is not supported");
930
932 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
933
934 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
935 assert(N->getOffset() == 0 && "unexpected offset in global node");
936
937 bool IsDesc = DAG.getTarget().useTLSDESC();
938
939 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
941 // In this model, application code calls the dynamic linker function
942 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
943 // runtime.
944 if (!IsDesc)
945 return getDynamicTLSAddr(N, DAG,
946 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
947 : LoongArch::PseudoLA_TLS_GD,
948 Large);
949 break;
951 // Same as GeneralDynamic, except for assembly modifiers and relocation
952 // records.
953 if (!IsDesc)
954 return getDynamicTLSAddr(N, DAG,
955 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
956 : LoongArch::PseudoLA_TLS_LD,
957 Large);
958 break;
960 // This model uses the GOT to resolve TLS offsets.
961 return getStaticTLSAddr(N, DAG,
962 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
963 : LoongArch::PseudoLA_TLS_IE,
964 Large);
966 // This model is used when static linking as the TLS offsets are resolved
967 // during program linking.
968 //
969 // This node doesn't need an extra argument for the large code model.
970 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
971 }
972
973 return getTLSDescAddr(N, DAG,
974 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
975 : LoongArch::PseudoLA_TLS_DESC_PC,
976 Large);
977}
978
979template <unsigned N>
980static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
981 SelectionDAG &DAG, bool IsSigned = false) {
982 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
983 // Check the ImmArg.
984 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
985 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
986 DAG.getContext()->emitError(Op->getOperationName(0) +
987 ": argument out of range.");
988 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
989 }
990 return SDValue();
991}
992
994LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
995 SelectionDAG &DAG) const {
996 SDLoc DL(Op);
997 switch (Op.getConstantOperandVal(0)) {
998 default:
999 return SDValue(); // Don't custom lower most intrinsics.
1000 case Intrinsic::thread_pointer: {
1001 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1002 return DAG.getRegister(LoongArch::R2, PtrVT);
1003 }
1004 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1005 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1006 case Intrinsic::loongarch_lsx_vreplvei_d:
1007 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1008 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1009 case Intrinsic::loongarch_lsx_vreplvei_w:
1010 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1011 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1012 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1013 case Intrinsic::loongarch_lasx_xvpickve_d:
1014 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1015 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1016 case Intrinsic::loongarch_lasx_xvinsve0_d:
1017 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1018 case Intrinsic::loongarch_lsx_vsat_b:
1019 case Intrinsic::loongarch_lsx_vsat_bu:
1020 case Intrinsic::loongarch_lsx_vrotri_b:
1021 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1022 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1023 case Intrinsic::loongarch_lsx_vsrlri_b:
1024 case Intrinsic::loongarch_lsx_vsrari_b:
1025 case Intrinsic::loongarch_lsx_vreplvei_h:
1026 case Intrinsic::loongarch_lasx_xvsat_b:
1027 case Intrinsic::loongarch_lasx_xvsat_bu:
1028 case Intrinsic::loongarch_lasx_xvrotri_b:
1029 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1030 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1031 case Intrinsic::loongarch_lasx_xvsrlri_b:
1032 case Intrinsic::loongarch_lasx_xvsrari_b:
1033 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1034 case Intrinsic::loongarch_lasx_xvpickve_w:
1035 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1036 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1037 case Intrinsic::loongarch_lasx_xvinsve0_w:
1038 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1039 case Intrinsic::loongarch_lsx_vsat_h:
1040 case Intrinsic::loongarch_lsx_vsat_hu:
1041 case Intrinsic::loongarch_lsx_vrotri_h:
1042 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1043 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1044 case Intrinsic::loongarch_lsx_vsrlri_h:
1045 case Intrinsic::loongarch_lsx_vsrari_h:
1046 case Intrinsic::loongarch_lsx_vreplvei_b:
1047 case Intrinsic::loongarch_lasx_xvsat_h:
1048 case Intrinsic::loongarch_lasx_xvsat_hu:
1049 case Intrinsic::loongarch_lasx_xvrotri_h:
1050 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1051 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1052 case Intrinsic::loongarch_lasx_xvsrlri_h:
1053 case Intrinsic::loongarch_lasx_xvsrari_h:
1054 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1055 return checkIntrinsicImmArg<4>(Op, 2, DAG);
1056 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1057 case Intrinsic::loongarch_lsx_vsrani_b_h:
1058 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1059 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1060 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1061 case Intrinsic::loongarch_lsx_vssrani_b_h:
1062 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1063 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1064 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1065 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1066 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1067 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1068 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1069 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1070 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1071 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1072 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1073 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1074 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1075 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1076 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1077 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1078 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1079 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1080 return checkIntrinsicImmArg<4>(Op, 3, DAG);
1081 case Intrinsic::loongarch_lsx_vsat_w:
1082 case Intrinsic::loongarch_lsx_vsat_wu:
1083 case Intrinsic::loongarch_lsx_vrotri_w:
1084 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1085 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1086 case Intrinsic::loongarch_lsx_vsrlri_w:
1087 case Intrinsic::loongarch_lsx_vsrari_w:
1088 case Intrinsic::loongarch_lsx_vslei_bu:
1089 case Intrinsic::loongarch_lsx_vslei_hu:
1090 case Intrinsic::loongarch_lsx_vslei_wu:
1091 case Intrinsic::loongarch_lsx_vslei_du:
1092 case Intrinsic::loongarch_lsx_vslti_bu:
1093 case Intrinsic::loongarch_lsx_vslti_hu:
1094 case Intrinsic::loongarch_lsx_vslti_wu:
1095 case Intrinsic::loongarch_lsx_vslti_du:
1096 case Intrinsic::loongarch_lsx_vbsll_v:
1097 case Intrinsic::loongarch_lsx_vbsrl_v:
1098 case Intrinsic::loongarch_lasx_xvsat_w:
1099 case Intrinsic::loongarch_lasx_xvsat_wu:
1100 case Intrinsic::loongarch_lasx_xvrotri_w:
1101 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1102 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1103 case Intrinsic::loongarch_lasx_xvsrlri_w:
1104 case Intrinsic::loongarch_lasx_xvsrari_w:
1105 case Intrinsic::loongarch_lasx_xvslei_bu:
1106 case Intrinsic::loongarch_lasx_xvslei_hu:
1107 case Intrinsic::loongarch_lasx_xvslei_wu:
1108 case Intrinsic::loongarch_lasx_xvslei_du:
1109 case Intrinsic::loongarch_lasx_xvslti_bu:
1110 case Intrinsic::loongarch_lasx_xvslti_hu:
1111 case Intrinsic::loongarch_lasx_xvslti_wu:
1112 case Intrinsic::loongarch_lasx_xvslti_du:
1113 case Intrinsic::loongarch_lasx_xvbsll_v:
1114 case Intrinsic::loongarch_lasx_xvbsrl_v:
1115 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1116 case Intrinsic::loongarch_lsx_vseqi_b:
1117 case Intrinsic::loongarch_lsx_vseqi_h:
1118 case Intrinsic::loongarch_lsx_vseqi_w:
1119 case Intrinsic::loongarch_lsx_vseqi_d:
1120 case Intrinsic::loongarch_lsx_vslei_b:
1121 case Intrinsic::loongarch_lsx_vslei_h:
1122 case Intrinsic::loongarch_lsx_vslei_w:
1123 case Intrinsic::loongarch_lsx_vslei_d:
1124 case Intrinsic::loongarch_lsx_vslti_b:
1125 case Intrinsic::loongarch_lsx_vslti_h:
1126 case Intrinsic::loongarch_lsx_vslti_w:
1127 case Intrinsic::loongarch_lsx_vslti_d:
1128 case Intrinsic::loongarch_lasx_xvseqi_b:
1129 case Intrinsic::loongarch_lasx_xvseqi_h:
1130 case Intrinsic::loongarch_lasx_xvseqi_w:
1131 case Intrinsic::loongarch_lasx_xvseqi_d:
1132 case Intrinsic::loongarch_lasx_xvslei_b:
1133 case Intrinsic::loongarch_lasx_xvslei_h:
1134 case Intrinsic::loongarch_lasx_xvslei_w:
1135 case Intrinsic::loongarch_lasx_xvslei_d:
1136 case Intrinsic::loongarch_lasx_xvslti_b:
1137 case Intrinsic::loongarch_lasx_xvslti_h:
1138 case Intrinsic::loongarch_lasx_xvslti_w:
1139 case Intrinsic::loongarch_lasx_xvslti_d:
1140 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1141 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1142 case Intrinsic::loongarch_lsx_vsrani_h_w:
1143 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1144 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1145 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1146 case Intrinsic::loongarch_lsx_vssrani_h_w:
1147 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1148 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1149 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1150 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1151 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1152 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1153 case Intrinsic::loongarch_lsx_vfrstpi_b:
1154 case Intrinsic::loongarch_lsx_vfrstpi_h:
1155 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1156 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1157 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1158 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1159 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1160 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1161 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1162 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1163 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1164 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1165 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1166 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1167 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1168 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1169 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1170 case Intrinsic::loongarch_lsx_vsat_d:
1171 case Intrinsic::loongarch_lsx_vsat_du:
1172 case Intrinsic::loongarch_lsx_vrotri_d:
1173 case Intrinsic::loongarch_lsx_vsrlri_d:
1174 case Intrinsic::loongarch_lsx_vsrari_d:
1175 case Intrinsic::loongarch_lasx_xvsat_d:
1176 case Intrinsic::loongarch_lasx_xvsat_du:
1177 case Intrinsic::loongarch_lasx_xvrotri_d:
1178 case Intrinsic::loongarch_lasx_xvsrlri_d:
1179 case Intrinsic::loongarch_lasx_xvsrari_d:
1180 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1181 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1182 case Intrinsic::loongarch_lsx_vsrani_w_d:
1183 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1184 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1185 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1186 case Intrinsic::loongarch_lsx_vssrani_w_d:
1187 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1188 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1189 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1190 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1191 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1192 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1193 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1194 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1195 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1196 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1197 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1198 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1199 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1200 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1201 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1202 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1203 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1204 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1205 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1206 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1207 case Intrinsic::loongarch_lsx_vsrani_d_q:
1208 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1209 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1210 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1211 case Intrinsic::loongarch_lsx_vssrani_d_q:
1212 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1213 case Intrinsic::loongarch_lsx_vssrani_du_q:
1214 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1215 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1216 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1217 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1218 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1219 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1220 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1221 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1222 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1223 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1224 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1225 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1226 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1227 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1228 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1229 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1230 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1231 case Intrinsic::loongarch_lsx_vnori_b:
1232 case Intrinsic::loongarch_lsx_vshuf4i_b:
1233 case Intrinsic::loongarch_lsx_vshuf4i_h:
1234 case Intrinsic::loongarch_lsx_vshuf4i_w:
1235 case Intrinsic::loongarch_lasx_xvnori_b:
1236 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1237 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1238 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1239 case Intrinsic::loongarch_lasx_xvpermi_d:
1240 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1241 case Intrinsic::loongarch_lsx_vshuf4i_d:
1242 case Intrinsic::loongarch_lsx_vpermi_w:
1243 case Intrinsic::loongarch_lsx_vbitseli_b:
1244 case Intrinsic::loongarch_lsx_vextrins_b:
1245 case Intrinsic::loongarch_lsx_vextrins_h:
1246 case Intrinsic::loongarch_lsx_vextrins_w:
1247 case Intrinsic::loongarch_lsx_vextrins_d:
1248 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1249 case Intrinsic::loongarch_lasx_xvpermi_w:
1250 case Intrinsic::loongarch_lasx_xvpermi_q:
1251 case Intrinsic::loongarch_lasx_xvbitseli_b:
1252 case Intrinsic::loongarch_lasx_xvextrins_b:
1253 case Intrinsic::loongarch_lasx_xvextrins_h:
1254 case Intrinsic::loongarch_lasx_xvextrins_w:
1255 case Intrinsic::loongarch_lasx_xvextrins_d:
1256 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1257 case Intrinsic::loongarch_lsx_vrepli_b:
1258 case Intrinsic::loongarch_lsx_vrepli_h:
1259 case Intrinsic::loongarch_lsx_vrepli_w:
1260 case Intrinsic::loongarch_lsx_vrepli_d:
1261 case Intrinsic::loongarch_lasx_xvrepli_b:
1262 case Intrinsic::loongarch_lasx_xvrepli_h:
1263 case Intrinsic::loongarch_lasx_xvrepli_w:
1264 case Intrinsic::loongarch_lasx_xvrepli_d:
1265 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1266 case Intrinsic::loongarch_lsx_vldi:
1267 case Intrinsic::loongarch_lasx_xvldi:
1268 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1269 }
1270}
1271
1272// Helper function that emits error message for intrinsics with chain and return
1273// merge values of a UNDEF and the chain.
1275 StringRef ErrorMsg,
1276 SelectionDAG &DAG) {
1277 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1278 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1279 SDLoc(Op));
1280}
1281
1282SDValue
1283LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1284 SelectionDAG &DAG) const {
1285 SDLoc DL(Op);
1286 MVT GRLenVT = Subtarget.getGRLenVT();
1287 EVT VT = Op.getValueType();
1288 SDValue Chain = Op.getOperand(0);
1289 const StringRef ErrorMsgOOR = "argument out of range";
1290 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1291 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1292
1293 switch (Op.getConstantOperandVal(1)) {
1294 default:
1295 return Op;
1296 case Intrinsic::loongarch_crc_w_b_w:
1297 case Intrinsic::loongarch_crc_w_h_w:
1298 case Intrinsic::loongarch_crc_w_w_w:
1299 case Intrinsic::loongarch_crc_w_d_w:
1300 case Intrinsic::loongarch_crcc_w_b_w:
1301 case Intrinsic::loongarch_crcc_w_h_w:
1302 case Intrinsic::loongarch_crcc_w_w_w:
1303 case Intrinsic::loongarch_crcc_w_d_w:
1304 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1305 case Intrinsic::loongarch_csrrd_w:
1306 case Intrinsic::loongarch_csrrd_d: {
1307 unsigned Imm = Op.getConstantOperandVal(2);
1308 return !isUInt<14>(Imm)
1309 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1310 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1311 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1312 }
1313 case Intrinsic::loongarch_csrwr_w:
1314 case Intrinsic::loongarch_csrwr_d: {
1315 unsigned Imm = Op.getConstantOperandVal(3);
1316 return !isUInt<14>(Imm)
1317 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1318 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1319 {Chain, Op.getOperand(2),
1320 DAG.getConstant(Imm, DL, GRLenVT)});
1321 }
1322 case Intrinsic::loongarch_csrxchg_w:
1323 case Intrinsic::loongarch_csrxchg_d: {
1324 unsigned Imm = Op.getConstantOperandVal(4);
1325 return !isUInt<14>(Imm)
1326 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1327 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1328 {Chain, Op.getOperand(2), Op.getOperand(3),
1329 DAG.getConstant(Imm, DL, GRLenVT)});
1330 }
1331 case Intrinsic::loongarch_iocsrrd_d: {
1332 return DAG.getNode(
1333 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1334 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1335 }
1336#define IOCSRRD_CASE(NAME, NODE) \
1337 case Intrinsic::loongarch_##NAME: { \
1338 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1339 {Chain, Op.getOperand(2)}); \
1340 }
1341 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1342 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1343 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1344#undef IOCSRRD_CASE
1345 case Intrinsic::loongarch_cpucfg: {
1346 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1347 {Chain, Op.getOperand(2)});
1348 }
1349 case Intrinsic::loongarch_lddir_d: {
1350 unsigned Imm = Op.getConstantOperandVal(3);
1351 return !isUInt<8>(Imm)
1352 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1353 : Op;
1354 }
1355 case Intrinsic::loongarch_movfcsr2gr: {
1356 if (!Subtarget.hasBasicF())
1357 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1358 unsigned Imm = Op.getConstantOperandVal(2);
1359 return !isUInt<2>(Imm)
1360 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1361 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1362 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1363 }
1364 case Intrinsic::loongarch_lsx_vld:
1365 case Intrinsic::loongarch_lsx_vldrepl_b:
1366 case Intrinsic::loongarch_lasx_xvld:
1367 case Intrinsic::loongarch_lasx_xvldrepl_b:
1368 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1369 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1370 : SDValue();
1371 case Intrinsic::loongarch_lsx_vldrepl_h:
1372 case Intrinsic::loongarch_lasx_xvldrepl_h:
1373 return !isShiftedInt<11, 1>(
1374 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1376 Op, "argument out of range or not a multiple of 2", DAG)
1377 : SDValue();
1378 case Intrinsic::loongarch_lsx_vldrepl_w:
1379 case Intrinsic::loongarch_lasx_xvldrepl_w:
1380 return !isShiftedInt<10, 2>(
1381 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1383 Op, "argument out of range or not a multiple of 4", DAG)
1384 : SDValue();
1385 case Intrinsic::loongarch_lsx_vldrepl_d:
1386 case Intrinsic::loongarch_lasx_xvldrepl_d:
1387 return !isShiftedInt<9, 3>(
1388 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1390 Op, "argument out of range or not a multiple of 8", DAG)
1391 : SDValue();
1392 }
1393}
1394
1395// Helper function that emits error message for intrinsics with void return
1396// value and return the chain.
1398 SelectionDAG &DAG) {
1399
1400 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1401 return Op.getOperand(0);
1402}
1403
1404SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1405 SelectionDAG &DAG) const {
1406 SDLoc DL(Op);
1407 MVT GRLenVT = Subtarget.getGRLenVT();
1408 SDValue Chain = Op.getOperand(0);
1409 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1410 SDValue Op2 = Op.getOperand(2);
1411 const StringRef ErrorMsgOOR = "argument out of range";
1412 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1413 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1414 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1415
1416 switch (IntrinsicEnum) {
1417 default:
1418 // TODO: Add more Intrinsics.
1419 return SDValue();
1420 case Intrinsic::loongarch_cacop_d:
1421 case Intrinsic::loongarch_cacop_w: {
1422 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1423 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1424 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1425 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1426 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1427 unsigned Imm1 = Op2->getAsZExtVal();
1428 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1429 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1430 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1431 return Op;
1432 }
1433 case Intrinsic::loongarch_dbar: {
1434 unsigned Imm = Op2->getAsZExtVal();
1435 return !isUInt<15>(Imm)
1436 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1437 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1438 DAG.getConstant(Imm, DL, GRLenVT));
1439 }
1440 case Intrinsic::loongarch_ibar: {
1441 unsigned Imm = Op2->getAsZExtVal();
1442 return !isUInt<15>(Imm)
1443 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1444 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1445 DAG.getConstant(Imm, DL, GRLenVT));
1446 }
1447 case Intrinsic::loongarch_break: {
1448 unsigned Imm = Op2->getAsZExtVal();
1449 return !isUInt<15>(Imm)
1450 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1451 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1452 DAG.getConstant(Imm, DL, GRLenVT));
1453 }
1454 case Intrinsic::loongarch_movgr2fcsr: {
1455 if (!Subtarget.hasBasicF())
1456 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1457 unsigned Imm = Op2->getAsZExtVal();
1458 return !isUInt<2>(Imm)
1459 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1460 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1461 DAG.getConstant(Imm, DL, GRLenVT),
1462 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1463 Op.getOperand(3)));
1464 }
1465 case Intrinsic::loongarch_syscall: {
1466 unsigned Imm = Op2->getAsZExtVal();
1467 return !isUInt<15>(Imm)
1468 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1469 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1470 DAG.getConstant(Imm, DL, GRLenVT));
1471 }
1472#define IOCSRWR_CASE(NAME, NODE) \
1473 case Intrinsic::loongarch_##NAME: { \
1474 SDValue Op3 = Op.getOperand(3); \
1475 return Subtarget.is64Bit() \
1476 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1477 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1478 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1479 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1480 Op3); \
1481 }
1482 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1483 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1484 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1485#undef IOCSRWR_CASE
1486 case Intrinsic::loongarch_iocsrwr_d: {
1487 return !Subtarget.is64Bit()
1488 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1489 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1490 Op2,
1491 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1492 Op.getOperand(3)));
1493 }
1494#define ASRT_LE_GT_CASE(NAME) \
1495 case Intrinsic::loongarch_##NAME: { \
1496 return !Subtarget.is64Bit() \
1497 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1498 : Op; \
1499 }
1500 ASRT_LE_GT_CASE(asrtle_d)
1501 ASRT_LE_GT_CASE(asrtgt_d)
1502#undef ASRT_LE_GT_CASE
1503 case Intrinsic::loongarch_ldpte_d: {
1504 unsigned Imm = Op.getConstantOperandVal(3);
1505 return !Subtarget.is64Bit()
1506 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1507 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1508 : Op;
1509 }
1510 case Intrinsic::loongarch_lsx_vst:
1511 case Intrinsic::loongarch_lasx_xvst:
1512 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1513 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1514 : SDValue();
1515 case Intrinsic::loongarch_lasx_xvstelm_b:
1516 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1517 !isUInt<5>(Op.getConstantOperandVal(5)))
1518 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1519 : SDValue();
1520 case Intrinsic::loongarch_lsx_vstelm_b:
1521 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1522 !isUInt<4>(Op.getConstantOperandVal(5)))
1523 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1524 : SDValue();
1525 case Intrinsic::loongarch_lasx_xvstelm_h:
1526 return (!isShiftedInt<8, 1>(
1527 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1528 !isUInt<4>(Op.getConstantOperandVal(5)))
1530 Op, "argument out of range or not a multiple of 2", DAG)
1531 : SDValue();
1532 case Intrinsic::loongarch_lsx_vstelm_h:
1533 return (!isShiftedInt<8, 1>(
1534 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1535 !isUInt<3>(Op.getConstantOperandVal(5)))
1537 Op, "argument out of range or not a multiple of 2", DAG)
1538 : SDValue();
1539 case Intrinsic::loongarch_lasx_xvstelm_w:
1540 return (!isShiftedInt<8, 2>(
1541 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1542 !isUInt<3>(Op.getConstantOperandVal(5)))
1544 Op, "argument out of range or not a multiple of 4", DAG)
1545 : SDValue();
1546 case Intrinsic::loongarch_lsx_vstelm_w:
1547 return (!isShiftedInt<8, 2>(
1548 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1549 !isUInt<2>(Op.getConstantOperandVal(5)))
1551 Op, "argument out of range or not a multiple of 4", DAG)
1552 : SDValue();
1553 case Intrinsic::loongarch_lasx_xvstelm_d:
1554 return (!isShiftedInt<8, 3>(
1555 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1556 !isUInt<2>(Op.getConstantOperandVal(5)))
1558 Op, "argument out of range or not a multiple of 8", DAG)
1559 : SDValue();
1560 case Intrinsic::loongarch_lsx_vstelm_d:
1561 return (!isShiftedInt<8, 3>(
1562 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1563 !isUInt<1>(Op.getConstantOperandVal(5)))
1565 Op, "argument out of range or not a multiple of 8", DAG)
1566 : SDValue();
1567 }
1568}
1569
1570SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1571 SelectionDAG &DAG) const {
1572 SDLoc DL(Op);
1573 SDValue Lo = Op.getOperand(0);
1574 SDValue Hi = Op.getOperand(1);
1575 SDValue Shamt = Op.getOperand(2);
1576 EVT VT = Lo.getValueType();
1577
1578 // if Shamt-GRLen < 0: // Shamt < GRLen
1579 // Lo = Lo << Shamt
1580 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1581 // else:
1582 // Lo = 0
1583 // Hi = Lo << (Shamt-GRLen)
1584
1585 SDValue Zero = DAG.getConstant(0, DL, VT);
1586 SDValue One = DAG.getConstant(1, DL, VT);
1587 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1588 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1589 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1590 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1591
1592 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1593 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1594 SDValue ShiftRightLo =
1595 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1596 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1597 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1598 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1599
1600 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1601
1602 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1603 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1604
1605 SDValue Parts[2] = {Lo, Hi};
1606 return DAG.getMergeValues(Parts, DL);
1607}
1608
1609SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1610 SelectionDAG &DAG,
1611 bool IsSRA) const {
1612 SDLoc DL(Op);
1613 SDValue Lo = Op.getOperand(0);
1614 SDValue Hi = Op.getOperand(1);
1615 SDValue Shamt = Op.getOperand(2);
1616 EVT VT = Lo.getValueType();
1617
1618 // SRA expansion:
1619 // if Shamt-GRLen < 0: // Shamt < GRLen
1620 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1621 // Hi = Hi >>s Shamt
1622 // else:
1623 // Lo = Hi >>s (Shamt-GRLen);
1624 // Hi = Hi >>s (GRLen-1)
1625 //
1626 // SRL expansion:
1627 // if Shamt-GRLen < 0: // Shamt < GRLen
1628 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1629 // Hi = Hi >>u Shamt
1630 // else:
1631 // Lo = Hi >>u (Shamt-GRLen);
1632 // Hi = 0;
1633
1634 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1635
1636 SDValue Zero = DAG.getConstant(0, DL, VT);
1637 SDValue One = DAG.getConstant(1, DL, VT);
1638 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1639 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1640 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1641 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1642
1643 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1644 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1645 SDValue ShiftLeftHi =
1646 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1647 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1648 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1649 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1650 SDValue HiFalse =
1651 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1652
1653 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1654
1655 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1656 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1657
1658 SDValue Parts[2] = {Lo, Hi};
1659 return DAG.getMergeValues(Parts, DL);
1660}
1661
1662// Returns the opcode of the target-specific SDNode that implements the 32-bit
1663// form of the given Opcode.
1665 switch (Opcode) {
1666 default:
1667 llvm_unreachable("Unexpected opcode");
1668 case ISD::SHL:
1669 return LoongArchISD::SLL_W;
1670 case ISD::SRA:
1671 return LoongArchISD::SRA_W;
1672 case ISD::SRL:
1673 return LoongArchISD::SRL_W;
1674 case ISD::ROTL:
1675 case ISD::ROTR:
1676 return LoongArchISD::ROTR_W;
1677 case ISD::CTTZ:
1678 return LoongArchISD::CTZ_W;
1679 case ISD::CTLZ:
1680 return LoongArchISD::CLZ_W;
1681 }
1682}
1683
1684// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1685// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1686// otherwise be promoted to i64, making it difficult to select the
1687// SLL_W/.../*W later one because the fact the operation was originally of
1688// type i8/i16/i32 is lost.
1690 unsigned ExtOpc = ISD::ANY_EXTEND) {
1691 SDLoc DL(N);
1692 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1693 SDValue NewOp0, NewRes;
1694
1695 switch (NumOp) {
1696 default:
1697 llvm_unreachable("Unexpected NumOp");
1698 case 1: {
1699 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1700 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1701 break;
1702 }
1703 case 2: {
1704 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1705 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1706 if (N->getOpcode() == ISD::ROTL) {
1707 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
1708 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
1709 }
1710 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1711 break;
1712 }
1713 // TODO:Handle more NumOp.
1714 }
1715
1716 // ReplaceNodeResults requires we maintain the same type for the return
1717 // value.
1718 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1719}
1720
1721// Helper function that emits error message for intrinsics with/without chain
1722// and return a UNDEF or and the chain as the results.
1725 StringRef ErrorMsg, bool WithChain = true) {
1726 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1727 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1728 if (!WithChain)
1729 return;
1730 Results.push_back(N->getOperand(0));
1731}
1732
1733template <unsigned N>
1734static void
1736 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1737 unsigned ResOp) {
1738 const StringRef ErrorMsgOOR = "argument out of range";
1739 unsigned Imm = Node->getConstantOperandVal(2);
1740 if (!isUInt<N>(Imm)) {
1742 /*WithChain=*/false);
1743 return;
1744 }
1745 SDLoc DL(Node);
1746 SDValue Vec = Node->getOperand(1);
1747
1748 SDValue PickElt =
1749 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1750 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1752 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1753 PickElt.getValue(0)));
1754}
1755
1758 SelectionDAG &DAG,
1759 const LoongArchSubtarget &Subtarget,
1760 unsigned ResOp) {
1761 SDLoc DL(N);
1762 SDValue Vec = N->getOperand(1);
1763
1764 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1765 Results.push_back(
1766 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1767}
1768
1769static void
1771 SelectionDAG &DAG,
1772 const LoongArchSubtarget &Subtarget) {
1773 switch (N->getConstantOperandVal(0)) {
1774 default:
1775 llvm_unreachable("Unexpected Intrinsic.");
1776 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1777 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1779 break;
1780 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1781 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1782 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1784 break;
1785 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1786 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1788 break;
1789 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1790 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1792 break;
1793 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1794 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1795 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1797 break;
1798 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1799 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1801 break;
1802 case Intrinsic::loongarch_lsx_bz_b:
1803 case Intrinsic::loongarch_lsx_bz_h:
1804 case Intrinsic::loongarch_lsx_bz_w:
1805 case Intrinsic::loongarch_lsx_bz_d:
1806 case Intrinsic::loongarch_lasx_xbz_b:
1807 case Intrinsic::loongarch_lasx_xbz_h:
1808 case Intrinsic::loongarch_lasx_xbz_w:
1809 case Intrinsic::loongarch_lasx_xbz_d:
1810 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1812 break;
1813 case Intrinsic::loongarch_lsx_bz_v:
1814 case Intrinsic::loongarch_lasx_xbz_v:
1815 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1817 break;
1818 case Intrinsic::loongarch_lsx_bnz_b:
1819 case Intrinsic::loongarch_lsx_bnz_h:
1820 case Intrinsic::loongarch_lsx_bnz_w:
1821 case Intrinsic::loongarch_lsx_bnz_d:
1822 case Intrinsic::loongarch_lasx_xbnz_b:
1823 case Intrinsic::loongarch_lasx_xbnz_h:
1824 case Intrinsic::loongarch_lasx_xbnz_w:
1825 case Intrinsic::loongarch_lasx_xbnz_d:
1826 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1828 break;
1829 case Intrinsic::loongarch_lsx_bnz_v:
1830 case Intrinsic::loongarch_lasx_xbnz_v:
1831 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1833 break;
1834 }
1835}
1836
1839 SDLoc DL(N);
1840 EVT VT = N->getValueType(0);
1841 switch (N->getOpcode()) {
1842 default:
1843 llvm_unreachable("Don't know how to legalize this operation");
1844 case ISD::SHL:
1845 case ISD::SRA:
1846 case ISD::SRL:
1847 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1848 "Unexpected custom legalisation");
1849 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1850 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1851 break;
1852 }
1853 break;
1854 case ISD::ROTL:
1855 case ISD::ROTR:
1856 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1857 "Unexpected custom legalisation");
1858 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1859 break;
1860 case ISD::FP_TO_SINT: {
1861 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1862 "Unexpected custom legalisation");
1863 SDValue Src = N->getOperand(0);
1864 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1865 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1867 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1868 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1869 return;
1870 }
1871 // If the FP type needs to be softened, emit a library call using the 'si'
1872 // version. If we left it to default legalization we'd end up with 'di'.
1873 RTLIB::Libcall LC;
1874 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1875 MakeLibCallOptions CallOptions;
1876 EVT OpVT = Src.getValueType();
1877 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1878 SDValue Chain = SDValue();
1879 SDValue Result;
1880 std::tie(Result, Chain) =
1881 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1882 Results.push_back(Result);
1883 break;
1884 }
1885 case ISD::BITCAST: {
1886 SDValue Src = N->getOperand(0);
1887 EVT SrcVT = Src.getValueType();
1888 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1889 Subtarget.hasBasicF()) {
1890 SDValue Dst =
1891 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1892 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1893 }
1894 break;
1895 }
1896 case ISD::FP_TO_UINT: {
1897 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1898 "Unexpected custom legalisation");
1899 auto &TLI = DAG.getTargetLoweringInfo();
1900 SDValue Tmp1, Tmp2;
1901 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1902 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1903 break;
1904 }
1905 case ISD::BSWAP: {
1906 SDValue Src = N->getOperand(0);
1907 assert((VT == MVT::i16 || VT == MVT::i32) &&
1908 "Unexpected custom legalization");
1909 MVT GRLenVT = Subtarget.getGRLenVT();
1910 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1911 SDValue Tmp;
1912 switch (VT.getSizeInBits()) {
1913 default:
1914 llvm_unreachable("Unexpected operand width");
1915 case 16:
1916 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1917 break;
1918 case 32:
1919 // Only LA64 will get to here due to the size mismatch between VT and
1920 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1921 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1922 break;
1923 }
1924 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1925 break;
1926 }
1927 case ISD::BITREVERSE: {
1928 SDValue Src = N->getOperand(0);
1929 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1930 "Unexpected custom legalization");
1931 MVT GRLenVT = Subtarget.getGRLenVT();
1932 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1933 SDValue Tmp;
1934 switch (VT.getSizeInBits()) {
1935 default:
1936 llvm_unreachable("Unexpected operand width");
1937 case 8:
1938 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1939 break;
1940 case 32:
1941 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1942 break;
1943 }
1944 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1945 break;
1946 }
1947 case ISD::CTLZ:
1948 case ISD::CTTZ: {
1949 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1950 "Unexpected custom legalisation");
1951 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1952 break;
1953 }
1955 SDValue Chain = N->getOperand(0);
1956 SDValue Op2 = N->getOperand(2);
1957 MVT GRLenVT = Subtarget.getGRLenVT();
1958 const StringRef ErrorMsgOOR = "argument out of range";
1959 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1960 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1961
1962 switch (N->getConstantOperandVal(1)) {
1963 default:
1964 llvm_unreachable("Unexpected Intrinsic.");
1965 case Intrinsic::loongarch_movfcsr2gr: {
1966 if (!Subtarget.hasBasicF()) {
1967 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1968 return;
1969 }
1970 unsigned Imm = Op2->getAsZExtVal();
1971 if (!isUInt<2>(Imm)) {
1972 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1973 return;
1974 }
1975 SDValue MOVFCSR2GRResults = DAG.getNode(
1976 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1977 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1978 Results.push_back(
1979 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1980 Results.push_back(MOVFCSR2GRResults.getValue(1));
1981 break;
1982 }
1983#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1984 case Intrinsic::loongarch_##NAME: { \
1985 SDValue NODE = DAG.getNode( \
1986 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1987 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1988 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1989 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1990 Results.push_back(NODE.getValue(1)); \
1991 break; \
1992 }
1993 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1994 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1995 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1996 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1997 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1998 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1999#undef CRC_CASE_EXT_BINARYOP
2000
2001#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2002 case Intrinsic::loongarch_##NAME: { \
2003 SDValue NODE = DAG.getNode( \
2004 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2005 {Chain, Op2, \
2006 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2007 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2008 Results.push_back(NODE.getValue(1)); \
2009 break; \
2010 }
2011 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2012 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2013#undef CRC_CASE_EXT_UNARYOP
2014#define CSR_CASE(ID) \
2015 case Intrinsic::loongarch_##ID: { \
2016 if (!Subtarget.is64Bit()) \
2017 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2018 break; \
2019 }
2020 CSR_CASE(csrrd_d);
2021 CSR_CASE(csrwr_d);
2022 CSR_CASE(csrxchg_d);
2023 CSR_CASE(iocsrrd_d);
2024#undef CSR_CASE
2025 case Intrinsic::loongarch_csrrd_w: {
2026 unsigned Imm = Op2->getAsZExtVal();
2027 if (!isUInt<14>(Imm)) {
2028 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2029 return;
2030 }
2031 SDValue CSRRDResults =
2032 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2033 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2034 Results.push_back(
2035 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2036 Results.push_back(CSRRDResults.getValue(1));
2037 break;
2038 }
2039 case Intrinsic::loongarch_csrwr_w: {
2040 unsigned Imm = N->getConstantOperandVal(3);
2041 if (!isUInt<14>(Imm)) {
2042 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2043 return;
2044 }
2045 SDValue CSRWRResults =
2046 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2047 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2048 DAG.getConstant(Imm, DL, GRLenVT)});
2049 Results.push_back(
2050 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2051 Results.push_back(CSRWRResults.getValue(1));
2052 break;
2053 }
2054 case Intrinsic::loongarch_csrxchg_w: {
2055 unsigned Imm = N->getConstantOperandVal(4);
2056 if (!isUInt<14>(Imm)) {
2057 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2058 return;
2059 }
2060 SDValue CSRXCHGResults = DAG.getNode(
2061 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2062 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2063 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2064 DAG.getConstant(Imm, DL, GRLenVT)});
2065 Results.push_back(
2066 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2067 Results.push_back(CSRXCHGResults.getValue(1));
2068 break;
2069 }
2070#define IOCSRRD_CASE(NAME, NODE) \
2071 case Intrinsic::loongarch_##NAME: { \
2072 SDValue IOCSRRDResults = \
2073 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2074 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2075 Results.push_back( \
2076 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2077 Results.push_back(IOCSRRDResults.getValue(1)); \
2078 break; \
2079 }
2080 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2081 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2082 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2083#undef IOCSRRD_CASE
2084 case Intrinsic::loongarch_cpucfg: {
2085 SDValue CPUCFGResults =
2086 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2087 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2088 Results.push_back(
2089 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2090 Results.push_back(CPUCFGResults.getValue(1));
2091 break;
2092 }
2093 case Intrinsic::loongarch_lddir_d: {
2094 if (!Subtarget.is64Bit()) {
2095 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2096 return;
2097 }
2098 break;
2099 }
2100 }
2101 break;
2102 }
2103 case ISD::READ_REGISTER: {
2104 if (Subtarget.is64Bit())
2105 DAG.getContext()->emitError(
2106 "On LA64, only 64-bit registers can be read.");
2107 else
2108 DAG.getContext()->emitError(
2109 "On LA32, only 32-bit registers can be read.");
2110 Results.push_back(DAG.getUNDEF(VT));
2111 Results.push_back(N->getOperand(0));
2112 break;
2113 }
2115 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2116 break;
2117 }
2118 }
2119}
2120
2123 const LoongArchSubtarget &Subtarget) {
2124 if (DCI.isBeforeLegalizeOps())
2125 return SDValue();
2126
2127 SDValue FirstOperand = N->getOperand(0);
2128 SDValue SecondOperand = N->getOperand(1);
2129 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2130 EVT ValTy = N->getValueType(0);
2131 SDLoc DL(N);
2132 uint64_t lsb, msb;
2133 unsigned SMIdx, SMLen;
2134 ConstantSDNode *CN;
2135 SDValue NewOperand;
2136 MVT GRLenVT = Subtarget.getGRLenVT();
2137
2138 // Op's second operand must be a shifted mask.
2139 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2140 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2141 return SDValue();
2142
2143 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2144 // Pattern match BSTRPICK.
2145 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2146 // => BSTRPICK $dst, $src, msb, lsb
2147 // where msb = lsb + len - 1
2148
2149 // The second operand of the shift must be an immediate.
2150 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2151 return SDValue();
2152
2153 lsb = CN->getZExtValue();
2154
2155 // Return if the shifted mask does not start at bit 0 or the sum of its
2156 // length and lsb exceeds the word's size.
2157 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2158 return SDValue();
2159
2160 NewOperand = FirstOperand.getOperand(0);
2161 } else {
2162 // Pattern match BSTRPICK.
2163 // $dst = and $src, (2**len- 1) , if len > 12
2164 // => BSTRPICK $dst, $src, msb, lsb
2165 // where lsb = 0 and msb = len - 1
2166
2167 // If the mask is <= 0xfff, andi can be used instead.
2168 if (CN->getZExtValue() <= 0xfff)
2169 return SDValue();
2170
2171 // Return if the MSB exceeds.
2172 if (SMIdx + SMLen > ValTy.getSizeInBits())
2173 return SDValue();
2174
2175 if (SMIdx > 0) {
2176 // Omit if the constant has more than 2 uses. This a conservative
2177 // decision. Whether it is a win depends on the HW microarchitecture.
2178 // However it should always be better for 1 and 2 uses.
2179 if (CN->use_size() > 2)
2180 return SDValue();
2181 // Return if the constant can be composed by a single LU12I.W.
2182 if ((CN->getZExtValue() & 0xfff) == 0)
2183 return SDValue();
2184 // Return if the constand can be composed by a single ADDI with
2185 // the zero register.
2186 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2187 return SDValue();
2188 }
2189
2190 lsb = SMIdx;
2191 NewOperand = FirstOperand;
2192 }
2193
2194 msb = lsb + SMLen - 1;
2195 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2196 DAG.getConstant(msb, DL, GRLenVT),
2197 DAG.getConstant(lsb, DL, GRLenVT));
2198 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2199 return NR0;
2200 // Try to optimize to
2201 // bstrpick $Rd, $Rs, msb, lsb
2202 // slli $Rd, $Rd, lsb
2203 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2204 DAG.getConstant(lsb, DL, GRLenVT));
2205}
2206
2209 const LoongArchSubtarget &Subtarget) {
2210 if (DCI.isBeforeLegalizeOps())
2211 return SDValue();
2212
2213 // $dst = srl (and $src, Mask), Shamt
2214 // =>
2215 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2216 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2217 //
2218
2219 SDValue FirstOperand = N->getOperand(0);
2220 ConstantSDNode *CN;
2221 EVT ValTy = N->getValueType(0);
2222 SDLoc DL(N);
2223 MVT GRLenVT = Subtarget.getGRLenVT();
2224 unsigned MaskIdx, MaskLen;
2225 uint64_t Shamt;
2226
2227 // The first operand must be an AND and the second operand of the AND must be
2228 // a shifted mask.
2229 if (FirstOperand.getOpcode() != ISD::AND ||
2230 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2231 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2232 return SDValue();
2233
2234 // The second operand (shift amount) must be an immediate.
2235 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2236 return SDValue();
2237
2238 Shamt = CN->getZExtValue();
2239 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2240 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2241 FirstOperand->getOperand(0),
2242 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2243 DAG.getConstant(Shamt, DL, GRLenVT));
2244
2245 return SDValue();
2246}
2247
2250 const LoongArchSubtarget &Subtarget) {
2251 MVT GRLenVT = Subtarget.getGRLenVT();
2252 EVT ValTy = N->getValueType(0);
2253 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2254 ConstantSDNode *CN0, *CN1;
2255 SDLoc DL(N);
2256 unsigned ValBits = ValTy.getSizeInBits();
2257 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2258 unsigned Shamt;
2259 bool SwapAndRetried = false;
2260
2261 if (DCI.isBeforeLegalizeOps())
2262 return SDValue();
2263
2264 if (ValBits != 32 && ValBits != 64)
2265 return SDValue();
2266
2267Retry:
2268 // 1st pattern to match BSTRINS:
2269 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2270 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2271 // =>
2272 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2273 if (N0.getOpcode() == ISD::AND &&
2274 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2275 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2276 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2277 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2278 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2279 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2280 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2281 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2282 (MaskIdx0 + MaskLen0 <= ValBits)) {
2283 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2284 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2285 N1.getOperand(0).getOperand(0),
2286 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2287 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2288 }
2289
2290 // 2nd pattern to match BSTRINS:
2291 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2292 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2293 // =>
2294 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2295 if (N0.getOpcode() == ISD::AND &&
2296 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2297 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2298 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2299 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2300 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2301 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2302 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2303 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2304 (MaskIdx0 + MaskLen0 <= ValBits)) {
2305 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2306 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2307 N1.getOperand(0).getOperand(0),
2308 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2309 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2310 }
2311
2312 // 3rd pattern to match BSTRINS:
2313 // R = or (and X, mask0), (and Y, mask1)
2314 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2315 // =>
2316 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2317 // where msb = lsb + size - 1
2318 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2319 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2320 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2321 (MaskIdx0 + MaskLen0 <= 64) &&
2322 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2323 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2325 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2326 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2327 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2328 DAG.getConstant(ValBits == 32
2329 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2330 : (MaskIdx0 + MaskLen0 - 1),
2331 DL, GRLenVT),
2332 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2333 }
2334
2335 // 4th pattern to match BSTRINS:
2336 // R = or (and X, mask), (shl Y, shamt)
2337 // where mask = (2**shamt - 1)
2338 // =>
2339 // R = BSTRINS X, Y, ValBits - 1, shamt
2340 // where ValBits = 32 or 64
2341 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2342 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2343 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2344 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2345 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2346 (MaskIdx0 + MaskLen0 <= ValBits)) {
2347 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2348 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2349 N1.getOperand(0),
2350 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2351 DAG.getConstant(Shamt, DL, GRLenVT));
2352 }
2353
2354 // 5th pattern to match BSTRINS:
2355 // R = or (and X, mask), const
2356 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2357 // =>
2358 // R = BSTRINS X, (const >> lsb), msb, lsb
2359 // where msb = lsb + size - 1
2360 if (N0.getOpcode() == ISD::AND &&
2361 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2362 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2363 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2364 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2365 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2366 return DAG.getNode(
2367 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2368 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2369 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2370 : (MaskIdx0 + MaskLen0 - 1),
2371 DL, GRLenVT),
2372 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2373 }
2374
2375 // 6th pattern.
2376 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2377 // by the incoming bits are known to be zero.
2378 // =>
2379 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2380 //
2381 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2382 // pattern is more common than the 1st. So we put the 1st before the 6th in
2383 // order to match as many nodes as possible.
2384 ConstantSDNode *CNMask, *CNShamt;
2385 unsigned MaskIdx, MaskLen;
2386 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2387 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2388 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2389 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2390 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2391 Shamt = CNShamt->getZExtValue();
2392 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2393 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2394 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2395 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2396 N1.getOperand(0).getOperand(0),
2397 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2398 DAG.getConstant(Shamt, DL, GRLenVT));
2399 }
2400 }
2401
2402 // 7th pattern.
2403 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2404 // overwritten by the incoming bits are known to be zero.
2405 // =>
2406 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2407 //
2408 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2409 // before the 7th in order to match as many nodes as possible.
2410 if (N1.getOpcode() == ISD::AND &&
2411 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2412 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2413 N1.getOperand(0).getOpcode() == ISD::SHL &&
2414 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2415 CNShamt->getZExtValue() == MaskIdx) {
2416 APInt ShMask(ValBits, CNMask->getZExtValue());
2417 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2418 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2419 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2420 N1.getOperand(0).getOperand(0),
2421 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2422 DAG.getConstant(MaskIdx, DL, GRLenVT));
2423 }
2424 }
2425
2426 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2427 if (!SwapAndRetried) {
2428 std::swap(N0, N1);
2429 SwapAndRetried = true;
2430 goto Retry;
2431 }
2432
2433 SwapAndRetried = false;
2434Retry2:
2435 // 8th pattern.
2436 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2437 // the incoming bits are known to be zero.
2438 // =>
2439 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2440 //
2441 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2442 // we put it here in order to match as many nodes as possible or generate less
2443 // instructions.
2444 if (N1.getOpcode() == ISD::AND &&
2445 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2446 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2447 APInt ShMask(ValBits, CNMask->getZExtValue());
2448 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2449 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2450 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2451 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2452 N1->getOperand(0),
2453 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2454 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2455 DAG.getConstant(MaskIdx, DL, GRLenVT));
2456 }
2457 }
2458 // Swap N0/N1 and retry.
2459 if (!SwapAndRetried) {
2460 std::swap(N0, N1);
2461 SwapAndRetried = true;
2462 goto Retry2;
2463 }
2464
2465 return SDValue();
2466}
2467
2468// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2471 const LoongArchSubtarget &Subtarget) {
2472 if (DCI.isBeforeLegalizeOps())
2473 return SDValue();
2474
2475 SDValue Src = N->getOperand(0);
2476 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2477 return SDValue();
2478
2479 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2480 Src.getOperand(0));
2481}
2482
2483template <unsigned N>
2485 SelectionDAG &DAG,
2486 const LoongArchSubtarget &Subtarget,
2487 bool IsSigned = false) {
2488 SDLoc DL(Node);
2489 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2490 // Check the ImmArg.
2491 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2492 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2493 DAG.getContext()->emitError(Node->getOperationName(0) +
2494 ": argument out of range.");
2495 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2496 }
2497 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2498}
2499
2500template <unsigned N>
2501static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2502 SelectionDAG &DAG, bool IsSigned = false) {
2503 SDLoc DL(Node);
2504 EVT ResTy = Node->getValueType(0);
2505 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2506
2507 // Check the ImmArg.
2508 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2509 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2510 DAG.getContext()->emitError(Node->getOperationName(0) +
2511 ": argument out of range.");
2512 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2513 }
2514 return DAG.getConstant(
2516 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2517 DL, ResTy);
2518}
2519
2521 SDLoc DL(Node);
2522 EVT ResTy = Node->getValueType(0);
2523 SDValue Vec = Node->getOperand(2);
2524 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2525 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2526}
2527
2529 SDLoc DL(Node);
2530 EVT ResTy = Node->getValueType(0);
2531 SDValue One = DAG.getConstant(1, DL, ResTy);
2532 SDValue Bit =
2533 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2534
2535 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2536 DAG.getNOT(DL, Bit, ResTy));
2537}
2538
2539template <unsigned N>
2541 SDLoc DL(Node);
2542 EVT ResTy = Node->getValueType(0);
2543 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2544 // Check the unsigned ImmArg.
2545 if (!isUInt<N>(CImm->getZExtValue())) {
2546 DAG.getContext()->emitError(Node->getOperationName(0) +
2547 ": argument out of range.");
2548 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2549 }
2550
2551 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2552 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2553
2554 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2555}
2556
2557template <unsigned N>
2559 SDLoc DL(Node);
2560 EVT ResTy = Node->getValueType(0);
2561 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2562 // Check the unsigned ImmArg.
2563 if (!isUInt<N>(CImm->getZExtValue())) {
2564 DAG.getContext()->emitError(Node->getOperationName(0) +
2565 ": argument out of range.");
2566 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2567 }
2568
2569 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2570 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2571 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2572}
2573
2574template <unsigned N>
2576 SDLoc DL(Node);
2577 EVT ResTy = Node->getValueType(0);
2578 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2579 // Check the unsigned ImmArg.
2580 if (!isUInt<N>(CImm->getZExtValue())) {
2581 DAG.getContext()->emitError(Node->getOperationName(0) +
2582 ": argument out of range.");
2583 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2584 }
2585
2586 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2587 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2588 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2589}
2590
2591static SDValue
2594 const LoongArchSubtarget &Subtarget) {
2595 SDLoc DL(N);
2596 switch (N->getConstantOperandVal(0)) {
2597 default:
2598 break;
2599 case Intrinsic::loongarch_lsx_vadd_b:
2600 case Intrinsic::loongarch_lsx_vadd_h:
2601 case Intrinsic::loongarch_lsx_vadd_w:
2602 case Intrinsic::loongarch_lsx_vadd_d:
2603 case Intrinsic::loongarch_lasx_xvadd_b:
2604 case Intrinsic::loongarch_lasx_xvadd_h:
2605 case Intrinsic::loongarch_lasx_xvadd_w:
2606 case Intrinsic::loongarch_lasx_xvadd_d:
2607 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2608 N->getOperand(2));
2609 case Intrinsic::loongarch_lsx_vaddi_bu:
2610 case Intrinsic::loongarch_lsx_vaddi_hu:
2611 case Intrinsic::loongarch_lsx_vaddi_wu:
2612 case Intrinsic::loongarch_lsx_vaddi_du:
2613 case Intrinsic::loongarch_lasx_xvaddi_bu:
2614 case Intrinsic::loongarch_lasx_xvaddi_hu:
2615 case Intrinsic::loongarch_lasx_xvaddi_wu:
2616 case Intrinsic::loongarch_lasx_xvaddi_du:
2617 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2618 lowerVectorSplatImm<5>(N, 2, DAG));
2619 case Intrinsic::loongarch_lsx_vsub_b:
2620 case Intrinsic::loongarch_lsx_vsub_h:
2621 case Intrinsic::loongarch_lsx_vsub_w:
2622 case Intrinsic::loongarch_lsx_vsub_d:
2623 case Intrinsic::loongarch_lasx_xvsub_b:
2624 case Intrinsic::loongarch_lasx_xvsub_h:
2625 case Intrinsic::loongarch_lasx_xvsub_w:
2626 case Intrinsic::loongarch_lasx_xvsub_d:
2627 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2628 N->getOperand(2));
2629 case Intrinsic::loongarch_lsx_vsubi_bu:
2630 case Intrinsic::loongarch_lsx_vsubi_hu:
2631 case Intrinsic::loongarch_lsx_vsubi_wu:
2632 case Intrinsic::loongarch_lsx_vsubi_du:
2633 case Intrinsic::loongarch_lasx_xvsubi_bu:
2634 case Intrinsic::loongarch_lasx_xvsubi_hu:
2635 case Intrinsic::loongarch_lasx_xvsubi_wu:
2636 case Intrinsic::loongarch_lasx_xvsubi_du:
2637 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2638 lowerVectorSplatImm<5>(N, 2, DAG));
2639 case Intrinsic::loongarch_lsx_vneg_b:
2640 case Intrinsic::loongarch_lsx_vneg_h:
2641 case Intrinsic::loongarch_lsx_vneg_w:
2642 case Intrinsic::loongarch_lsx_vneg_d:
2643 case Intrinsic::loongarch_lasx_xvneg_b:
2644 case Intrinsic::loongarch_lasx_xvneg_h:
2645 case Intrinsic::loongarch_lasx_xvneg_w:
2646 case Intrinsic::loongarch_lasx_xvneg_d:
2647 return DAG.getNode(
2648 ISD::SUB, DL, N->getValueType(0),
2649 DAG.getConstant(
2650 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2651 /*isSigned=*/true),
2652 SDLoc(N), N->getValueType(0)),
2653 N->getOperand(1));
2654 case Intrinsic::loongarch_lsx_vmax_b:
2655 case Intrinsic::loongarch_lsx_vmax_h:
2656 case Intrinsic::loongarch_lsx_vmax_w:
2657 case Intrinsic::loongarch_lsx_vmax_d:
2658 case Intrinsic::loongarch_lasx_xvmax_b:
2659 case Intrinsic::loongarch_lasx_xvmax_h:
2660 case Intrinsic::loongarch_lasx_xvmax_w:
2661 case Intrinsic::loongarch_lasx_xvmax_d:
2662 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2663 N->getOperand(2));
2664 case Intrinsic::loongarch_lsx_vmax_bu:
2665 case Intrinsic::loongarch_lsx_vmax_hu:
2666 case Intrinsic::loongarch_lsx_vmax_wu:
2667 case Intrinsic::loongarch_lsx_vmax_du:
2668 case Intrinsic::loongarch_lasx_xvmax_bu:
2669 case Intrinsic::loongarch_lasx_xvmax_hu:
2670 case Intrinsic::loongarch_lasx_xvmax_wu:
2671 case Intrinsic::loongarch_lasx_xvmax_du:
2672 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2673 N->getOperand(2));
2674 case Intrinsic::loongarch_lsx_vmaxi_b:
2675 case Intrinsic::loongarch_lsx_vmaxi_h:
2676 case Intrinsic::loongarch_lsx_vmaxi_w:
2677 case Intrinsic::loongarch_lsx_vmaxi_d:
2678 case Intrinsic::loongarch_lasx_xvmaxi_b:
2679 case Intrinsic::loongarch_lasx_xvmaxi_h:
2680 case Intrinsic::loongarch_lasx_xvmaxi_w:
2681 case Intrinsic::loongarch_lasx_xvmaxi_d:
2682 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2683 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2684 case Intrinsic::loongarch_lsx_vmaxi_bu:
2685 case Intrinsic::loongarch_lsx_vmaxi_hu:
2686 case Intrinsic::loongarch_lsx_vmaxi_wu:
2687 case Intrinsic::loongarch_lsx_vmaxi_du:
2688 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2689 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2690 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2691 case Intrinsic::loongarch_lasx_xvmaxi_du:
2692 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2693 lowerVectorSplatImm<5>(N, 2, DAG));
2694 case Intrinsic::loongarch_lsx_vmin_b:
2695 case Intrinsic::loongarch_lsx_vmin_h:
2696 case Intrinsic::loongarch_lsx_vmin_w:
2697 case Intrinsic::loongarch_lsx_vmin_d:
2698 case Intrinsic::loongarch_lasx_xvmin_b:
2699 case Intrinsic::loongarch_lasx_xvmin_h:
2700 case Intrinsic::loongarch_lasx_xvmin_w:
2701 case Intrinsic::loongarch_lasx_xvmin_d:
2702 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2703 N->getOperand(2));
2704 case Intrinsic::loongarch_lsx_vmin_bu:
2705 case Intrinsic::loongarch_lsx_vmin_hu:
2706 case Intrinsic::loongarch_lsx_vmin_wu:
2707 case Intrinsic::loongarch_lsx_vmin_du:
2708 case Intrinsic::loongarch_lasx_xvmin_bu:
2709 case Intrinsic::loongarch_lasx_xvmin_hu:
2710 case Intrinsic::loongarch_lasx_xvmin_wu:
2711 case Intrinsic::loongarch_lasx_xvmin_du:
2712 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2713 N->getOperand(2));
2714 case Intrinsic::loongarch_lsx_vmini_b:
2715 case Intrinsic::loongarch_lsx_vmini_h:
2716 case Intrinsic::loongarch_lsx_vmini_w:
2717 case Intrinsic::loongarch_lsx_vmini_d:
2718 case Intrinsic::loongarch_lasx_xvmini_b:
2719 case Intrinsic::loongarch_lasx_xvmini_h:
2720 case Intrinsic::loongarch_lasx_xvmini_w:
2721 case Intrinsic::loongarch_lasx_xvmini_d:
2722 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2723 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2724 case Intrinsic::loongarch_lsx_vmini_bu:
2725 case Intrinsic::loongarch_lsx_vmini_hu:
2726 case Intrinsic::loongarch_lsx_vmini_wu:
2727 case Intrinsic::loongarch_lsx_vmini_du:
2728 case Intrinsic::loongarch_lasx_xvmini_bu:
2729 case Intrinsic::loongarch_lasx_xvmini_hu:
2730 case Intrinsic::loongarch_lasx_xvmini_wu:
2731 case Intrinsic::loongarch_lasx_xvmini_du:
2732 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2733 lowerVectorSplatImm<5>(N, 2, DAG));
2734 case Intrinsic::loongarch_lsx_vmul_b:
2735 case Intrinsic::loongarch_lsx_vmul_h:
2736 case Intrinsic::loongarch_lsx_vmul_w:
2737 case Intrinsic::loongarch_lsx_vmul_d:
2738 case Intrinsic::loongarch_lasx_xvmul_b:
2739 case Intrinsic::loongarch_lasx_xvmul_h:
2740 case Intrinsic::loongarch_lasx_xvmul_w:
2741 case Intrinsic::loongarch_lasx_xvmul_d:
2742 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2743 N->getOperand(2));
2744 case Intrinsic::loongarch_lsx_vmadd_b:
2745 case Intrinsic::loongarch_lsx_vmadd_h:
2746 case Intrinsic::loongarch_lsx_vmadd_w:
2747 case Intrinsic::loongarch_lsx_vmadd_d:
2748 case Intrinsic::loongarch_lasx_xvmadd_b:
2749 case Intrinsic::loongarch_lasx_xvmadd_h:
2750 case Intrinsic::loongarch_lasx_xvmadd_w:
2751 case Intrinsic::loongarch_lasx_xvmadd_d: {
2752 EVT ResTy = N->getValueType(0);
2753 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2754 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2755 N->getOperand(3)));
2756 }
2757 case Intrinsic::loongarch_lsx_vmsub_b:
2758 case Intrinsic::loongarch_lsx_vmsub_h:
2759 case Intrinsic::loongarch_lsx_vmsub_w:
2760 case Intrinsic::loongarch_lsx_vmsub_d:
2761 case Intrinsic::loongarch_lasx_xvmsub_b:
2762 case Intrinsic::loongarch_lasx_xvmsub_h:
2763 case Intrinsic::loongarch_lasx_xvmsub_w:
2764 case Intrinsic::loongarch_lasx_xvmsub_d: {
2765 EVT ResTy = N->getValueType(0);
2766 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2767 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2768 N->getOperand(3)));
2769 }
2770 case Intrinsic::loongarch_lsx_vdiv_b:
2771 case Intrinsic::loongarch_lsx_vdiv_h:
2772 case Intrinsic::loongarch_lsx_vdiv_w:
2773 case Intrinsic::loongarch_lsx_vdiv_d:
2774 case Intrinsic::loongarch_lasx_xvdiv_b:
2775 case Intrinsic::loongarch_lasx_xvdiv_h:
2776 case Intrinsic::loongarch_lasx_xvdiv_w:
2777 case Intrinsic::loongarch_lasx_xvdiv_d:
2778 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2779 N->getOperand(2));
2780 case Intrinsic::loongarch_lsx_vdiv_bu:
2781 case Intrinsic::loongarch_lsx_vdiv_hu:
2782 case Intrinsic::loongarch_lsx_vdiv_wu:
2783 case Intrinsic::loongarch_lsx_vdiv_du:
2784 case Intrinsic::loongarch_lasx_xvdiv_bu:
2785 case Intrinsic::loongarch_lasx_xvdiv_hu:
2786 case Intrinsic::loongarch_lasx_xvdiv_wu:
2787 case Intrinsic::loongarch_lasx_xvdiv_du:
2788 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2789 N->getOperand(2));
2790 case Intrinsic::loongarch_lsx_vmod_b:
2791 case Intrinsic::loongarch_lsx_vmod_h:
2792 case Intrinsic::loongarch_lsx_vmod_w:
2793 case Intrinsic::loongarch_lsx_vmod_d:
2794 case Intrinsic::loongarch_lasx_xvmod_b:
2795 case Intrinsic::loongarch_lasx_xvmod_h:
2796 case Intrinsic::loongarch_lasx_xvmod_w:
2797 case Intrinsic::loongarch_lasx_xvmod_d:
2798 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2799 N->getOperand(2));
2800 case Intrinsic::loongarch_lsx_vmod_bu:
2801 case Intrinsic::loongarch_lsx_vmod_hu:
2802 case Intrinsic::loongarch_lsx_vmod_wu:
2803 case Intrinsic::loongarch_lsx_vmod_du:
2804 case Intrinsic::loongarch_lasx_xvmod_bu:
2805 case Intrinsic::loongarch_lasx_xvmod_hu:
2806 case Intrinsic::loongarch_lasx_xvmod_wu:
2807 case Intrinsic::loongarch_lasx_xvmod_du:
2808 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2809 N->getOperand(2));
2810 case Intrinsic::loongarch_lsx_vand_v:
2811 case Intrinsic::loongarch_lasx_xvand_v:
2812 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2813 N->getOperand(2));
2814 case Intrinsic::loongarch_lsx_vor_v:
2815 case Intrinsic::loongarch_lasx_xvor_v:
2816 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2817 N->getOperand(2));
2818 case Intrinsic::loongarch_lsx_vxor_v:
2819 case Intrinsic::loongarch_lasx_xvxor_v:
2820 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2821 N->getOperand(2));
2822 case Intrinsic::loongarch_lsx_vnor_v:
2823 case Intrinsic::loongarch_lasx_xvnor_v: {
2824 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2825 N->getOperand(2));
2826 return DAG.getNOT(DL, Res, Res->getValueType(0));
2827 }
2828 case Intrinsic::loongarch_lsx_vandi_b:
2829 case Intrinsic::loongarch_lasx_xvandi_b:
2830 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2831 lowerVectorSplatImm<8>(N, 2, DAG));
2832 case Intrinsic::loongarch_lsx_vori_b:
2833 case Intrinsic::loongarch_lasx_xvori_b:
2834 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2835 lowerVectorSplatImm<8>(N, 2, DAG));
2836 case Intrinsic::loongarch_lsx_vxori_b:
2837 case Intrinsic::loongarch_lasx_xvxori_b:
2838 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2839 lowerVectorSplatImm<8>(N, 2, DAG));
2840 case Intrinsic::loongarch_lsx_vsll_b:
2841 case Intrinsic::loongarch_lsx_vsll_h:
2842 case Intrinsic::loongarch_lsx_vsll_w:
2843 case Intrinsic::loongarch_lsx_vsll_d:
2844 case Intrinsic::loongarch_lasx_xvsll_b:
2845 case Intrinsic::loongarch_lasx_xvsll_h:
2846 case Intrinsic::loongarch_lasx_xvsll_w:
2847 case Intrinsic::loongarch_lasx_xvsll_d:
2848 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2849 truncateVecElts(N, DAG));
2850 case Intrinsic::loongarch_lsx_vslli_b:
2851 case Intrinsic::loongarch_lasx_xvslli_b:
2852 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2853 lowerVectorSplatImm<3>(N, 2, DAG));
2854 case Intrinsic::loongarch_lsx_vslli_h:
2855 case Intrinsic::loongarch_lasx_xvslli_h:
2856 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2857 lowerVectorSplatImm<4>(N, 2, DAG));
2858 case Intrinsic::loongarch_lsx_vslli_w:
2859 case Intrinsic::loongarch_lasx_xvslli_w:
2860 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2861 lowerVectorSplatImm<5>(N, 2, DAG));
2862 case Intrinsic::loongarch_lsx_vslli_d:
2863 case Intrinsic::loongarch_lasx_xvslli_d:
2864 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2865 lowerVectorSplatImm<6>(N, 2, DAG));
2866 case Intrinsic::loongarch_lsx_vsrl_b:
2867 case Intrinsic::loongarch_lsx_vsrl_h:
2868 case Intrinsic::loongarch_lsx_vsrl_w:
2869 case Intrinsic::loongarch_lsx_vsrl_d:
2870 case Intrinsic::loongarch_lasx_xvsrl_b:
2871 case Intrinsic::loongarch_lasx_xvsrl_h:
2872 case Intrinsic::loongarch_lasx_xvsrl_w:
2873 case Intrinsic::loongarch_lasx_xvsrl_d:
2874 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2875 truncateVecElts(N, DAG));
2876 case Intrinsic::loongarch_lsx_vsrli_b:
2877 case Intrinsic::loongarch_lasx_xvsrli_b:
2878 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2879 lowerVectorSplatImm<3>(N, 2, DAG));
2880 case Intrinsic::loongarch_lsx_vsrli_h:
2881 case Intrinsic::loongarch_lasx_xvsrli_h:
2882 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2883 lowerVectorSplatImm<4>(N, 2, DAG));
2884 case Intrinsic::loongarch_lsx_vsrli_w:
2885 case Intrinsic::loongarch_lasx_xvsrli_w:
2886 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2887 lowerVectorSplatImm<5>(N, 2, DAG));
2888 case Intrinsic::loongarch_lsx_vsrli_d:
2889 case Intrinsic::loongarch_lasx_xvsrli_d:
2890 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2891 lowerVectorSplatImm<6>(N, 2, DAG));
2892 case Intrinsic::loongarch_lsx_vsra_b:
2893 case Intrinsic::loongarch_lsx_vsra_h:
2894 case Intrinsic::loongarch_lsx_vsra_w:
2895 case Intrinsic::loongarch_lsx_vsra_d:
2896 case Intrinsic::loongarch_lasx_xvsra_b:
2897 case Intrinsic::loongarch_lasx_xvsra_h:
2898 case Intrinsic::loongarch_lasx_xvsra_w:
2899 case Intrinsic::loongarch_lasx_xvsra_d:
2900 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2901 truncateVecElts(N, DAG));
2902 case Intrinsic::loongarch_lsx_vsrai_b:
2903 case Intrinsic::loongarch_lasx_xvsrai_b:
2904 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2905 lowerVectorSplatImm<3>(N, 2, DAG));
2906 case Intrinsic::loongarch_lsx_vsrai_h:
2907 case Intrinsic::loongarch_lasx_xvsrai_h:
2908 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2909 lowerVectorSplatImm<4>(N, 2, DAG));
2910 case Intrinsic::loongarch_lsx_vsrai_w:
2911 case Intrinsic::loongarch_lasx_xvsrai_w:
2912 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2913 lowerVectorSplatImm<5>(N, 2, DAG));
2914 case Intrinsic::loongarch_lsx_vsrai_d:
2915 case Intrinsic::loongarch_lasx_xvsrai_d:
2916 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2917 lowerVectorSplatImm<6>(N, 2, DAG));
2918 case Intrinsic::loongarch_lsx_vclz_b:
2919 case Intrinsic::loongarch_lsx_vclz_h:
2920 case Intrinsic::loongarch_lsx_vclz_w:
2921 case Intrinsic::loongarch_lsx_vclz_d:
2922 case Intrinsic::loongarch_lasx_xvclz_b:
2923 case Intrinsic::loongarch_lasx_xvclz_h:
2924 case Intrinsic::loongarch_lasx_xvclz_w:
2925 case Intrinsic::loongarch_lasx_xvclz_d:
2926 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2927 case Intrinsic::loongarch_lsx_vpcnt_b:
2928 case Intrinsic::loongarch_lsx_vpcnt_h:
2929 case Intrinsic::loongarch_lsx_vpcnt_w:
2930 case Intrinsic::loongarch_lsx_vpcnt_d:
2931 case Intrinsic::loongarch_lasx_xvpcnt_b:
2932 case Intrinsic::loongarch_lasx_xvpcnt_h:
2933 case Intrinsic::loongarch_lasx_xvpcnt_w:
2934 case Intrinsic::loongarch_lasx_xvpcnt_d:
2935 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2936 case Intrinsic::loongarch_lsx_vbitclr_b:
2937 case Intrinsic::loongarch_lsx_vbitclr_h:
2938 case Intrinsic::loongarch_lsx_vbitclr_w:
2939 case Intrinsic::loongarch_lsx_vbitclr_d:
2940 case Intrinsic::loongarch_lasx_xvbitclr_b:
2941 case Intrinsic::loongarch_lasx_xvbitclr_h:
2942 case Intrinsic::loongarch_lasx_xvbitclr_w:
2943 case Intrinsic::loongarch_lasx_xvbitclr_d:
2944 return lowerVectorBitClear(N, DAG);
2945 case Intrinsic::loongarch_lsx_vbitclri_b:
2946 case Intrinsic::loongarch_lasx_xvbitclri_b:
2947 return lowerVectorBitClearImm<3>(N, DAG);
2948 case Intrinsic::loongarch_lsx_vbitclri_h:
2949 case Intrinsic::loongarch_lasx_xvbitclri_h:
2950 return lowerVectorBitClearImm<4>(N, DAG);
2951 case Intrinsic::loongarch_lsx_vbitclri_w:
2952 case Intrinsic::loongarch_lasx_xvbitclri_w:
2953 return lowerVectorBitClearImm<5>(N, DAG);
2954 case Intrinsic::loongarch_lsx_vbitclri_d:
2955 case Intrinsic::loongarch_lasx_xvbitclri_d:
2956 return lowerVectorBitClearImm<6>(N, DAG);
2957 case Intrinsic::loongarch_lsx_vbitset_b:
2958 case Intrinsic::loongarch_lsx_vbitset_h:
2959 case Intrinsic::loongarch_lsx_vbitset_w:
2960 case Intrinsic::loongarch_lsx_vbitset_d:
2961 case Intrinsic::loongarch_lasx_xvbitset_b:
2962 case Intrinsic::loongarch_lasx_xvbitset_h:
2963 case Intrinsic::loongarch_lasx_xvbitset_w:
2964 case Intrinsic::loongarch_lasx_xvbitset_d: {
2965 EVT VecTy = N->getValueType(0);
2966 SDValue One = DAG.getConstant(1, DL, VecTy);
2967 return DAG.getNode(
2968 ISD::OR, DL, VecTy, N->getOperand(1),
2969 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2970 }
2971 case Intrinsic::loongarch_lsx_vbitseti_b:
2972 case Intrinsic::loongarch_lasx_xvbitseti_b:
2973 return lowerVectorBitSetImm<3>(N, DAG);
2974 case Intrinsic::loongarch_lsx_vbitseti_h:
2975 case Intrinsic::loongarch_lasx_xvbitseti_h:
2976 return lowerVectorBitSetImm<4>(N, DAG);
2977 case Intrinsic::loongarch_lsx_vbitseti_w:
2978 case Intrinsic::loongarch_lasx_xvbitseti_w:
2979 return lowerVectorBitSetImm<5>(N, DAG);
2980 case Intrinsic::loongarch_lsx_vbitseti_d:
2981 case Intrinsic::loongarch_lasx_xvbitseti_d:
2982 return lowerVectorBitSetImm<6>(N, DAG);
2983 case Intrinsic::loongarch_lsx_vbitrev_b:
2984 case Intrinsic::loongarch_lsx_vbitrev_h:
2985 case Intrinsic::loongarch_lsx_vbitrev_w:
2986 case Intrinsic::loongarch_lsx_vbitrev_d:
2987 case Intrinsic::loongarch_lasx_xvbitrev_b:
2988 case Intrinsic::loongarch_lasx_xvbitrev_h:
2989 case Intrinsic::loongarch_lasx_xvbitrev_w:
2990 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2991 EVT VecTy = N->getValueType(0);
2992 SDValue One = DAG.getConstant(1, DL, VecTy);
2993 return DAG.getNode(
2994 ISD::XOR, DL, VecTy, N->getOperand(1),
2995 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2996 }
2997 case Intrinsic::loongarch_lsx_vbitrevi_b:
2998 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2999 return lowerVectorBitRevImm<3>(N, DAG);
3000 case Intrinsic::loongarch_lsx_vbitrevi_h:
3001 case Intrinsic::loongarch_lasx_xvbitrevi_h:
3002 return lowerVectorBitRevImm<4>(N, DAG);
3003 case Intrinsic::loongarch_lsx_vbitrevi_w:
3004 case Intrinsic::loongarch_lasx_xvbitrevi_w:
3005 return lowerVectorBitRevImm<5>(N, DAG);
3006 case Intrinsic::loongarch_lsx_vbitrevi_d:
3007 case Intrinsic::loongarch_lasx_xvbitrevi_d:
3008 return lowerVectorBitRevImm<6>(N, DAG);
3009 case Intrinsic::loongarch_lsx_vfadd_s:
3010 case Intrinsic::loongarch_lsx_vfadd_d:
3011 case Intrinsic::loongarch_lasx_xvfadd_s:
3012 case Intrinsic::loongarch_lasx_xvfadd_d:
3013 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
3014 N->getOperand(2));
3015 case Intrinsic::loongarch_lsx_vfsub_s:
3016 case Intrinsic::loongarch_lsx_vfsub_d:
3017 case Intrinsic::loongarch_lasx_xvfsub_s:
3018 case Intrinsic::loongarch_lasx_xvfsub_d:
3019 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
3020 N->getOperand(2));
3021 case Intrinsic::loongarch_lsx_vfmul_s:
3022 case Intrinsic::loongarch_lsx_vfmul_d:
3023 case Intrinsic::loongarch_lasx_xvfmul_s:
3024 case Intrinsic::loongarch_lasx_xvfmul_d:
3025 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3026 N->getOperand(2));
3027 case Intrinsic::loongarch_lsx_vfdiv_s:
3028 case Intrinsic::loongarch_lsx_vfdiv_d:
3029 case Intrinsic::loongarch_lasx_xvfdiv_s:
3030 case Intrinsic::loongarch_lasx_xvfdiv_d:
3031 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3032 N->getOperand(2));
3033 case Intrinsic::loongarch_lsx_vfmadd_s:
3034 case Intrinsic::loongarch_lsx_vfmadd_d:
3035 case Intrinsic::loongarch_lasx_xvfmadd_s:
3036 case Intrinsic::loongarch_lasx_xvfmadd_d:
3037 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3038 N->getOperand(2), N->getOperand(3));
3039 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3040 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3041 N->getOperand(1), N->getOperand(2),
3042 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3043 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3044 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3045 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3046 N->getOperand(1), N->getOperand(2),
3047 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3048 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3049 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3050 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3051 N->getOperand(1), N->getOperand(2),
3052 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3053 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3054 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3055 N->getOperand(1), N->getOperand(2),
3056 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3057 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3058 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3059 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3060 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3061 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3062 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3063 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3064 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3065 EVT ResTy = N->getValueType(0);
3066 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3067 return DAG.getBuildVector(ResTy, DL, Ops);
3068 }
3069 case Intrinsic::loongarch_lsx_vreplve_b:
3070 case Intrinsic::loongarch_lsx_vreplve_h:
3071 case Intrinsic::loongarch_lsx_vreplve_w:
3072 case Intrinsic::loongarch_lsx_vreplve_d:
3073 case Intrinsic::loongarch_lasx_xvreplve_b:
3074 case Intrinsic::loongarch_lasx_xvreplve_h:
3075 case Intrinsic::loongarch_lasx_xvreplve_w:
3076 case Intrinsic::loongarch_lasx_xvreplve_d:
3077 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3078 N->getOperand(1),
3079 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3080 N->getOperand(2)));
3081 }
3082 return SDValue();
3083}
3084
3086 DAGCombinerInfo &DCI) const {
3087 SelectionDAG &DAG = DCI.DAG;
3088 switch (N->getOpcode()) {
3089 default:
3090 break;
3091 case ISD::AND:
3092 return performANDCombine(N, DAG, DCI, Subtarget);
3093 case ISD::OR:
3094 return performORCombine(N, DAG, DCI, Subtarget);
3095 case ISD::SRL:
3096 return performSRLCombine(N, DAG, DCI, Subtarget);
3098 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3100 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3101 }
3102 return SDValue();
3103}
3104
3107 if (!ZeroDivCheck)
3108 return MBB;
3109
3110 // Build instructions:
3111 // MBB:
3112 // div(or mod) $dst, $dividend, $divisor
3113 // bnez $divisor, SinkMBB
3114 // BreakMBB:
3115 // break 7 // BRK_DIVZERO
3116 // SinkMBB:
3117 // fallthrough
3118 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3120 MachineFunction *MF = MBB->getParent();
3121 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3122 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3123 MF->insert(It, BreakMBB);
3124 MF->insert(It, SinkMBB);
3125
3126 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3127 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3128 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3129
3130 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3131 DebugLoc DL = MI.getDebugLoc();
3132 MachineOperand &Divisor = MI.getOperand(2);
3133 Register DivisorReg = Divisor.getReg();
3134
3135 // MBB:
3136 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3137 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3138 .addMBB(SinkMBB);
3139 MBB->addSuccessor(BreakMBB);
3140 MBB->addSuccessor(SinkMBB);
3141
3142 // BreakMBB:
3143 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3144 // definition of BRK_DIVZERO.
3145 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3146 BreakMBB->addSuccessor(SinkMBB);
3147
3148 // Clear Divisor's kill flag.
3149 Divisor.setIsKill(false);
3150
3151 return SinkMBB;
3152}
3153
3154static MachineBasicBlock *
3156 const LoongArchSubtarget &Subtarget) {
3157 unsigned CondOpc;
3158 switch (MI.getOpcode()) {
3159 default:
3160 llvm_unreachable("Unexpected opcode");
3161 case LoongArch::PseudoVBZ:
3162 CondOpc = LoongArch::VSETEQZ_V;
3163 break;
3164 case LoongArch::PseudoVBZ_B:
3165 CondOpc = LoongArch::VSETANYEQZ_B;
3166 break;
3167 case LoongArch::PseudoVBZ_H:
3168 CondOpc = LoongArch::VSETANYEQZ_H;
3169 break;
3170 case LoongArch::PseudoVBZ_W:
3171 CondOpc = LoongArch::VSETANYEQZ_W;
3172 break;
3173 case LoongArch::PseudoVBZ_D:
3174 CondOpc = LoongArch::VSETANYEQZ_D;
3175 break;
3176 case LoongArch::PseudoVBNZ:
3177 CondOpc = LoongArch::VSETNEZ_V;
3178 break;
3179 case LoongArch::PseudoVBNZ_B:
3180 CondOpc = LoongArch::VSETALLNEZ_B;
3181 break;
3182 case LoongArch::PseudoVBNZ_H:
3183 CondOpc = LoongArch::VSETALLNEZ_H;
3184 break;
3185 case LoongArch::PseudoVBNZ_W:
3186 CondOpc = LoongArch::VSETALLNEZ_W;
3187 break;
3188 case LoongArch::PseudoVBNZ_D:
3189 CondOpc = LoongArch::VSETALLNEZ_D;
3190 break;
3191 case LoongArch::PseudoXVBZ:
3192 CondOpc = LoongArch::XVSETEQZ_V;
3193 break;
3194 case LoongArch::PseudoXVBZ_B:
3195 CondOpc = LoongArch::XVSETANYEQZ_B;
3196 break;
3197 case LoongArch::PseudoXVBZ_H:
3198 CondOpc = LoongArch::XVSETANYEQZ_H;
3199 break;
3200 case LoongArch::PseudoXVBZ_W:
3201 CondOpc = LoongArch::XVSETANYEQZ_W;
3202 break;
3203 case LoongArch::PseudoXVBZ_D:
3204 CondOpc = LoongArch::XVSETANYEQZ_D;
3205 break;
3206 case LoongArch::PseudoXVBNZ:
3207 CondOpc = LoongArch::XVSETNEZ_V;
3208 break;
3209 case LoongArch::PseudoXVBNZ_B:
3210 CondOpc = LoongArch::XVSETALLNEZ_B;
3211 break;
3212 case LoongArch::PseudoXVBNZ_H:
3213 CondOpc = LoongArch::XVSETALLNEZ_H;
3214 break;
3215 case LoongArch::PseudoXVBNZ_W:
3216 CondOpc = LoongArch::XVSETALLNEZ_W;
3217 break;
3218 case LoongArch::PseudoXVBNZ_D:
3219 CondOpc = LoongArch::XVSETALLNEZ_D;
3220 break;
3221 }
3222
3223 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3224 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3225 DebugLoc DL = MI.getDebugLoc();
3228
3229 MachineFunction *F = BB->getParent();
3230 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3231 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3232 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3233
3234 F->insert(It, FalseBB);
3235 F->insert(It, TrueBB);
3236 F->insert(It, SinkBB);
3237
3238 // Transfer the remainder of MBB and its successor edges to Sink.
3239 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3241
3242 // Insert the real instruction to BB.
3243 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3244 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3245
3246 // Insert branch.
3247 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3248 BB->addSuccessor(FalseBB);
3249 BB->addSuccessor(TrueBB);
3250
3251 // FalseBB.
3252 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3253 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3254 .addReg(LoongArch::R0)
3255 .addImm(0);
3256 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3257 FalseBB->addSuccessor(SinkBB);
3258
3259 // TrueBB.
3260 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3261 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3262 .addReg(LoongArch::R0)
3263 .addImm(1);
3264 TrueBB->addSuccessor(SinkBB);
3265
3266 // SinkBB: merge the results.
3267 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3268 MI.getOperand(0).getReg())
3269 .addReg(RD1)
3270 .addMBB(FalseBB)
3271 .addReg(RD2)
3272 .addMBB(TrueBB);
3273
3274 // The pseudo instruction is gone now.
3275 MI.eraseFromParent();
3276 return SinkBB;
3277}
3278
3279static MachineBasicBlock *
3281 const LoongArchSubtarget &Subtarget) {
3282 unsigned InsOp;
3283 unsigned HalfSize;
3284 switch (MI.getOpcode()) {
3285 default:
3286 llvm_unreachable("Unexpected opcode");
3287 case LoongArch::PseudoXVINSGR2VR_B:
3288 HalfSize = 16;
3289 InsOp = LoongArch::VINSGR2VR_B;
3290 break;
3291 case LoongArch::PseudoXVINSGR2VR_H:
3292 HalfSize = 8;
3293 InsOp = LoongArch::VINSGR2VR_H;
3294 break;
3295 }
3296 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3297 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3298 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3299 DebugLoc DL = MI.getDebugLoc();
3301 // XDst = vector_insert XSrc, Elt, Idx
3302 Register XDst = MI.getOperand(0).getReg();
3303 Register XSrc = MI.getOperand(1).getReg();
3304 Register Elt = MI.getOperand(2).getReg();
3305 unsigned Idx = MI.getOperand(3).getImm();
3306
3307 Register ScratchReg1 = XSrc;
3308 if (Idx >= HalfSize) {
3309 ScratchReg1 = MRI.createVirtualRegister(RC);
3310 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3311 .addReg(XSrc)
3312 .addReg(XSrc)
3313 .addImm(1);
3314 }
3315
3316 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3317 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3318 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3319 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3320 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3321 .addReg(ScratchSubReg1)
3322 .addReg(Elt)
3323 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3324
3325 Register ScratchReg2 = XDst;
3326 if (Idx >= HalfSize)
3327 ScratchReg2 = MRI.createVirtualRegister(RC);
3328
3329 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3330 .addImm(0)
3331 .addReg(ScratchSubReg2)
3332 .addImm(LoongArch::sub_128);
3333
3334 if (Idx >= HalfSize)
3335 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3336 .addReg(XSrc)
3337 .addReg(ScratchReg2)
3338 .addImm(2);
3339
3340 MI.eraseFromParent();
3341 return BB;
3342}
3343
3344MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3345 MachineInstr &MI, MachineBasicBlock *BB) const {
3346 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3347 DebugLoc DL = MI.getDebugLoc();
3348
3349 switch (MI.getOpcode()) {
3350 default:
3351 llvm_unreachable("Unexpected instr type to insert");
3352 case LoongArch::DIV_W:
3353 case LoongArch::DIV_WU:
3354 case LoongArch::MOD_W:
3355 case LoongArch::MOD_WU:
3356 case LoongArch::DIV_D:
3357 case LoongArch::DIV_DU:
3358 case LoongArch::MOD_D:
3359 case LoongArch::MOD_DU:
3360 return insertDivByZeroTrap(MI, BB);
3361 break;
3362 case LoongArch::WRFCSR: {
3363 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3364 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3365 .addReg(MI.getOperand(1).getReg());
3366 MI.eraseFromParent();
3367 return BB;
3368 }
3369 case LoongArch::RDFCSR: {
3370 MachineInstr *ReadFCSR =
3371 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3372 MI.getOperand(0).getReg())
3373 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3374 ReadFCSR->getOperand(1).setIsUndef();
3375 MI.eraseFromParent();
3376 return BB;
3377 }
3378 case LoongArch::PseudoVBZ:
3379 case LoongArch::PseudoVBZ_B:
3380 case LoongArch::PseudoVBZ_H:
3381 case LoongArch::PseudoVBZ_W:
3382 case LoongArch::PseudoVBZ_D:
3383 case LoongArch::PseudoVBNZ:
3384 case LoongArch::PseudoVBNZ_B:
3385 case LoongArch::PseudoVBNZ_H:
3386 case LoongArch::PseudoVBNZ_W:
3387 case LoongArch::PseudoVBNZ_D:
3388 case LoongArch::PseudoXVBZ:
3389 case LoongArch::PseudoXVBZ_B:
3390 case LoongArch::PseudoXVBZ_H:
3391 case LoongArch::PseudoXVBZ_W:
3392 case LoongArch::PseudoXVBZ_D:
3393 case LoongArch::PseudoXVBNZ:
3394 case LoongArch::PseudoXVBNZ_B:
3395 case LoongArch::PseudoXVBNZ_H:
3396 case LoongArch::PseudoXVBNZ_W:
3397 case LoongArch::PseudoXVBNZ_D:
3398 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3399 case LoongArch::PseudoXVINSGR2VR_B:
3400 case LoongArch::PseudoXVINSGR2VR_H:
3401 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3402 }
3403}
3404
3406 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3407 unsigned *Fast) const {
3408 if (!Subtarget.hasUAL())
3409 return false;
3410
3411 // TODO: set reasonable speed number.
3412 if (Fast)
3413 *Fast = 1;
3414 return true;
3415}
3416
3417const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3418 switch ((LoongArchISD::NodeType)Opcode) {
3420 break;
3421
3422#define NODE_NAME_CASE(node) \
3423 case LoongArchISD::node: \
3424 return "LoongArchISD::" #node;
3425
3426 // TODO: Add more target-dependent nodes later.
3427 NODE_NAME_CASE(CALL)
3428 NODE_NAME_CASE(CALL_MEDIUM)
3429 NODE_NAME_CASE(CALL_LARGE)
3430 NODE_NAME_CASE(RET)
3431 NODE_NAME_CASE(TAIL)
3432 NODE_NAME_CASE(TAIL_MEDIUM)
3433 NODE_NAME_CASE(TAIL_LARGE)
3434 NODE_NAME_CASE(SLL_W)
3435 NODE_NAME_CASE(SRA_W)
3436 NODE_NAME_CASE(SRL_W)
3437 NODE_NAME_CASE(BSTRINS)
3438 NODE_NAME_CASE(BSTRPICK)
3439 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3440 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3441 NODE_NAME_CASE(FTINT)
3442 NODE_NAME_CASE(REVB_2H)
3443 NODE_NAME_CASE(REVB_2W)
3444 NODE_NAME_CASE(BITREV_4B)
3445 NODE_NAME_CASE(BITREV_W)
3446 NODE_NAME_CASE(ROTR_W)
3447 NODE_NAME_CASE(ROTL_W)
3448 NODE_NAME_CASE(CLZ_W)
3449 NODE_NAME_CASE(CTZ_W)
3450 NODE_NAME_CASE(DBAR)
3451 NODE_NAME_CASE(IBAR)
3452 NODE_NAME_CASE(BREAK)
3453 NODE_NAME_CASE(SYSCALL)
3454 NODE_NAME_CASE(CRC_W_B_W)
3455 NODE_NAME_CASE(CRC_W_H_W)
3456 NODE_NAME_CASE(CRC_W_W_W)
3457 NODE_NAME_CASE(CRC_W_D_W)
3458 NODE_NAME_CASE(CRCC_W_B_W)
3459 NODE_NAME_CASE(CRCC_W_H_W)
3460 NODE_NAME_CASE(CRCC_W_W_W)
3461 NODE_NAME_CASE(CRCC_W_D_W)
3462 NODE_NAME_CASE(CSRRD)
3463 NODE_NAME_CASE(CSRWR)
3464 NODE_NAME_CASE(CSRXCHG)
3465 NODE_NAME_CASE(IOCSRRD_B)
3466 NODE_NAME_CASE(IOCSRRD_H)
3467 NODE_NAME_CASE(IOCSRRD_W)
3468 NODE_NAME_CASE(IOCSRRD_D)
3469 NODE_NAME_CASE(IOCSRWR_B)
3470 NODE_NAME_CASE(IOCSRWR_H)
3471 NODE_NAME_CASE(IOCSRWR_W)
3472 NODE_NAME_CASE(IOCSRWR_D)
3473 NODE_NAME_CASE(CPUCFG)
3474 NODE_NAME_CASE(MOVGR2FCSR)
3475 NODE_NAME_CASE(MOVFCSR2GR)
3476 NODE_NAME_CASE(CACOP_D)
3477 NODE_NAME_CASE(CACOP_W)
3478 NODE_NAME_CASE(VPICK_SEXT_ELT)
3479 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3480 NODE_NAME_CASE(VREPLVE)
3481 NODE_NAME_CASE(VALL_ZERO)
3482 NODE_NAME_CASE(VANY_ZERO)
3483 NODE_NAME_CASE(VALL_NONZERO)
3484 NODE_NAME_CASE(VANY_NONZERO)
3485 }
3486#undef NODE_NAME_CASE
3487 return nullptr;
3488}
3489
3490//===----------------------------------------------------------------------===//
3491// Calling Convention Implementation
3492//===----------------------------------------------------------------------===//
3493
3494// Eight general-purpose registers a0-a7 used for passing integer arguments,
3495// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3496// fixed-point arguments, and floating-point arguments when no FPR is available
3497// or with soft float ABI.
3498const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3499 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3500 LoongArch::R10, LoongArch::R11};
3501// Eight floating-point registers fa0-fa7 used for passing floating-point
3502// arguments, and fa0-fa1 are also used to return values.
3503const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3504 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3505 LoongArch::F6, LoongArch::F7};
3506// FPR32 and FPR64 alias each other.
3508 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3509 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3510
3511const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3512 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3513 LoongArch::VR6, LoongArch::VR7};
3514
3515const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3516 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3517 LoongArch::XR6, LoongArch::XR7};
3518
3519// Pass a 2*GRLen argument that has been split into two GRLen values through
3520// registers or the stack as necessary.
3521static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3522 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3523 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3524 ISD::ArgFlagsTy ArgFlags2) {
3525 unsigned GRLenInBytes = GRLen / 8;
3526 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3527 // At least one half can be passed via register.
3528 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3529 VA1.getLocVT(), CCValAssign::Full));
3530 } else {
3531 // Both halves must be passed on the stack, with proper alignment.
3532 Align StackAlign =
3533 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3534 State.addLoc(
3536 State.AllocateStack(GRLenInBytes, StackAlign),
3537 VA1.getLocVT(), CCValAssign::Full));
3539 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3540 LocVT2, CCValAssign::Full));
3541 return false;
3542 }
3543 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3544 // The second half can also be passed via register.
3545 State.addLoc(
3546 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3547 } else {
3548 // The second half is passed via the stack, without additional alignment.
3550 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3551 LocVT2, CCValAssign::Full));
3552 }
3553 return false;
3554}
3555
3556// Implements the LoongArch calling convention. Returns true upon failure.
3558 unsigned ValNo, MVT ValVT,
3559 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3560 CCState &State, bool IsFixed, bool IsRet,
3561 Type *OrigTy) {
3562 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3563 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3564 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3565 MVT LocVT = ValVT;
3566
3567 // Any return value split into more than two values can't be returned
3568 // directly.
3569 if (IsRet && ValNo > 1)
3570 return true;
3571
3572 // If passing a variadic argument, or if no FPR is available.
3573 bool UseGPRForFloat = true;
3574
3575 switch (ABI) {
3576 default:
3577 llvm_unreachable("Unexpected ABI");
3581 report_fatal_error("Unimplemented ABI");
3582 break;
3585 UseGPRForFloat = !IsFixed;
3586 break;
3588 break;
3589 }
3590
3591 // FPR32 and FPR64 alias each other.
3592 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3593 UseGPRForFloat = true;
3594
3595 if (UseGPRForFloat && ValVT == MVT::f32) {
3596 LocVT = GRLenVT;
3597 LocInfo = CCValAssign::BCvt;
3598 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3599 LocVT = MVT::i64;
3600 LocInfo = CCValAssign::BCvt;
3601 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3602 // TODO: Handle passing f64 on LA32 with D feature.
3603 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3604 }
3605
3606 // If this is a variadic argument, the LoongArch calling convention requires
3607 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3608 // byte alignment. An aligned register should be used regardless of whether
3609 // the original argument was split during legalisation or not. The argument
3610 // will not be passed by registers if the original type is larger than
3611 // 2*GRLen, so the register alignment rule does not apply.
3612 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3613 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3614 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3615 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3616 // Skip 'odd' register if necessary.
3617 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3618 State.AllocateReg(ArgGPRs);
3619 }
3620
3621 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3622 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3623 State.getPendingArgFlags();
3624
3625 assert(PendingLocs.size() == PendingArgFlags.size() &&
3626 "PendingLocs and PendingArgFlags out of sync");
3627
3628 // Split arguments might be passed indirectly, so keep track of the pending
3629 // values.
3630 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3631 LocVT = GRLenVT;
3632 LocInfo = CCValAssign::Indirect;
3633 PendingLocs.push_back(
3634 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3635 PendingArgFlags.push_back(ArgFlags);
3636 if (!ArgFlags.isSplitEnd()) {
3637 return false;
3638 }
3639 }
3640
3641 // If the split argument only had two elements, it should be passed directly
3642 // in registers or on the stack.
3643 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3644 PendingLocs.size() <= 2) {
3645 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3646 // Apply the normal calling convention rules to the first half of the
3647 // split argument.
3648 CCValAssign VA = PendingLocs[0];
3649 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3650 PendingLocs.clear();
3651 PendingArgFlags.clear();
3652 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3653 ArgFlags);
3654 }
3655
3656 // Allocate to a register if possible, or else a stack slot.
3657 Register Reg;
3658 unsigned StoreSizeBytes = GRLen / 8;
3659 Align StackAlign = Align(GRLen / 8);
3660
3661 if (ValVT == MVT::f32 && !UseGPRForFloat)
3662 Reg = State.AllocateReg(ArgFPR32s);
3663 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3664 Reg = State.AllocateReg(ArgFPR64s);
3665 else if (ValVT.is128BitVector())
3666 Reg = State.AllocateReg(ArgVRs);
3667 else if (ValVT.is256BitVector())
3668 Reg = State.AllocateReg(ArgXRs);
3669 else
3670 Reg = State.AllocateReg(ArgGPRs);
3671
3672 unsigned StackOffset =
3673 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3674
3675 // If we reach this point and PendingLocs is non-empty, we must be at the
3676 // end of a split argument that must be passed indirectly.
3677 if (!PendingLocs.empty()) {
3678 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3679 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3680 for (auto &It : PendingLocs) {
3681 if (Reg)
3682 It.convertToReg(Reg);
3683 else
3684 It.convertToMem(StackOffset);
3685 State.addLoc(It);
3686 }
3687 PendingLocs.clear();
3688 PendingArgFlags.clear();
3689 return false;
3690 }
3691 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3692 "Expected an GRLenVT at this stage");
3693
3694 if (Reg) {
3695 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3696 return false;
3697 }
3698
3699 // When a floating-point value is passed on the stack, no bit-cast is needed.
3700 if (ValVT.isFloatingPoint()) {
3701 LocVT = ValVT;
3702 LocInfo = CCValAssign::Full;
3703 }
3704
3705 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3706 return false;
3707}
3708
3709void LoongArchTargetLowering::analyzeInputArgs(
3710 MachineFunction &MF, CCState &CCInfo,
3711 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3712 LoongArchCCAssignFn Fn) const {
3714 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3715 MVT ArgVT = Ins[i].VT;
3716 Type *ArgTy = nullptr;
3717 if (IsRet)
3718 ArgTy = FType->getReturnType();
3719 else if (Ins[i].isOrigArg())
3720 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3723 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3724 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3725 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3726 << '\n');
3727 llvm_unreachable("");
3728 }
3729 }
3730}
3731
3732void LoongArchTargetLowering::analyzeOutputArgs(
3733 MachineFunction &MF, CCState &CCInfo,
3734 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3735 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3736 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3737 MVT ArgVT = Outs[i].VT;
3738 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3741 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3742 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3743 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3744 << "\n");
3745 llvm_unreachable("");
3746 }
3747 }
3748}
3749
3750// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3751// values.
3753 const CCValAssign &VA, const SDLoc &DL) {
3754 switch (VA.getLocInfo()) {
3755 default:
3756 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3757 case CCValAssign::Full:
3759 break;
3760 case CCValAssign::BCvt:
3761 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3762 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3763 else
3764 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3765 break;
3766 }
3767 return Val;
3768}
3769
3771 const CCValAssign &VA, const SDLoc &DL,
3772 const ISD::InputArg &In,
3773 const LoongArchTargetLowering &TLI) {
3776 EVT LocVT = VA.getLocVT();
3777 SDValue Val;
3778 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3779 Register VReg = RegInfo.createVirtualRegister(RC);
3780 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3781 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3782
3783 // If input is sign extended from 32 bits, note it for the OptW pass.
3784 if (In.isOrigArg()) {
3785 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
3786 if (OrigArg->getType()->isIntegerTy()) {
3787 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
3788 // An input zero extended from i31 can also be considered sign extended.
3789 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
3790 (BitWidth < 32 && In.Flags.isZExt())) {
3793 LAFI->addSExt32Register(VReg);
3794 }
3795 }
3796 }
3797
3798 return convertLocVTToValVT(DAG, Val, VA, DL);
3799}
3800
3801// The caller is responsible for loading the full value if the argument is
3802// passed with CCValAssign::Indirect.
3804 const CCValAssign &VA, const SDLoc &DL) {
3806 MachineFrameInfo &MFI = MF.getFrameInfo();
3807 EVT ValVT = VA.getValVT();
3808 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3809 /*IsImmutable=*/true);
3810 SDValue FIN = DAG.getFrameIndex(
3812
3813 ISD::LoadExtType ExtType;
3814 switch (VA.getLocInfo()) {
3815 default:
3816 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3817 case CCValAssign::Full:
3819 case CCValAssign::BCvt:
3820 ExtType = ISD::NON_EXTLOAD;
3821 break;
3822 }
3823 return DAG.getExtLoad(
3824 ExtType, DL, VA.getLocVT(), Chain, FIN,
3826}
3827
3829 const CCValAssign &VA, const SDLoc &DL) {
3830 EVT LocVT = VA.getLocVT();
3831
3832 switch (VA.getLocInfo()) {
3833 default:
3834 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3835 case CCValAssign::Full:
3836 break;
3837 case CCValAssign::BCvt:
3838 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3839 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3840 else
3841 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3842 break;
3843 }
3844 return Val;
3845}
3846
3847static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3848 CCValAssign::LocInfo LocInfo,
3849 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3850 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3851 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3852 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3853 static const MCPhysReg GPRList[] = {
3854 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3855 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3856 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3857 if (unsigned Reg = State.AllocateReg(GPRList)) {
3858 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3859 return false;
3860 }
3861 }
3862
3863 if (LocVT == MVT::f32) {
3864 // Pass in STG registers: F1, F2, F3, F4
3865 // fs0,fs1,fs2,fs3
3866 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3867 LoongArch::F26, LoongArch::F27};
3868 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3869 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3870 return false;
3871 }
3872 }
3873
3874 if (LocVT == MVT::f64) {
3875 // Pass in STG registers: D1, D2, D3, D4
3876 // fs4,fs5,fs6,fs7
3877 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3878 LoongArch::F30_64, LoongArch::F31_64};
3879 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3880 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3881 return false;
3882 }
3883 }
3884
3885 report_fatal_error("No registers left in GHC calling convention");
3886 return true;
3887}
3888
3889// Transform physical registers into virtual registers.
3891 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3892 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3893 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3894
3896
3897 switch (CallConv) {
3898 default:
3899 llvm_unreachable("Unsupported calling convention");
3900 case CallingConv::C:
3901 case CallingConv::Fast:
3902 break;
3903 case CallingConv::GHC:
3904 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3905 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3907 "GHC calling convention requires the F and D extensions");
3908 }
3909
3910 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3911 MVT GRLenVT = Subtarget.getGRLenVT();
3912 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3913 // Used with varargs to acumulate store chains.
3914 std::vector<SDValue> OutChains;
3915
3916 // Assign locations to all of the incoming arguments.
3918 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3919
3920 if (CallConv == CallingConv::GHC)
3922 else
3923 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3924
3925 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3926 CCValAssign &VA = ArgLocs[i];
3927 SDValue ArgValue;
3928 if (VA.isRegLoc())
3929 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
3930 else
3931 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3932 if (VA.getLocInfo() == CCValAssign::Indirect) {
3933 // If the original argument was split and passed by reference, we need to
3934 // load all parts of it here (using the same address).
3935 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3937 unsigned ArgIndex = Ins[i].OrigArgIndex;
3938 unsigned ArgPartOffset = Ins[i].PartOffset;
3939 assert(ArgPartOffset == 0);
3940 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3941 CCValAssign &PartVA = ArgLocs[i + 1];
3942 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3943 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3944 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3945 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3947 ++i;
3948 }
3949 continue;
3950 }
3951 InVals.push_back(ArgValue);
3952 }
3953
3954 if (IsVarArg) {
3956 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3957 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3958 MachineFrameInfo &MFI = MF.getFrameInfo();
3959 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3960 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3961
3962 // Offset of the first variable argument from stack pointer, and size of
3963 // the vararg save area. For now, the varargs save area is either zero or
3964 // large enough to hold a0-a7.
3965 int VaArgOffset, VarArgsSaveSize;
3966
3967 // If all registers are allocated, then all varargs must be passed on the
3968 // stack and we don't need to save any argregs.
3969 if (ArgRegs.size() == Idx) {
3970 VaArgOffset = CCInfo.getStackSize();
3971 VarArgsSaveSize = 0;
3972 } else {
3973 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3974 VaArgOffset = -VarArgsSaveSize;
3975 }
3976
3977 // Record the frame index of the first variable argument
3978 // which is a value necessary to VASTART.
3979 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3980 LoongArchFI->setVarArgsFrameIndex(FI);
3981
3982 // If saving an odd number of registers then create an extra stack slot to
3983 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3984 // offsets to even-numbered registered remain 2*GRLen-aligned.
3985 if (Idx % 2) {
3986 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3987 true);
3988 VarArgsSaveSize += GRLenInBytes;
3989 }
3990
3991 // Copy the integer registers that may have been used for passing varargs
3992 // to the vararg save area.
3993 for (unsigned I = Idx; I < ArgRegs.size();
3994 ++I, VaArgOffset += GRLenInBytes) {
3995 const Register Reg = RegInfo.createVirtualRegister(RC);
3996 RegInfo.addLiveIn(ArgRegs[I], Reg);
3997 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3998 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3999 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4000 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
4002 cast<StoreSDNode>(Store.getNode())
4003 ->getMemOperand()
4004 ->setValue((Value *)nullptr);
4005 OutChains.push_back(Store);
4006 }
4007 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
4008 }
4009
4010 // All stores are grouped in one node to allow the matching between
4011 // the size of Ins and InVals. This only happens for vararg functions.
4012 if (!OutChains.empty()) {
4013 OutChains.push_back(Chain);
4014 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
4015 }
4016
4017 return Chain;
4018}
4019
4021 return CI->isTailCall();
4022}
4023
4024// Check if the return value is used as only a return value, as otherwise
4025// we can't perform a tail-call.
4027 SDValue &Chain) const {
4028 if (N->getNumValues() != 1)
4029 return false;
4030 if (!N->hasNUsesOfValue(1, 0))
4031 return false;
4032
4033 SDNode *Copy = *N->use_begin();
4034 if (Copy->getOpcode() != ISD::CopyToReg)
4035 return false;
4036
4037 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
4038 // isn't safe to perform a tail call.
4039 if (Copy->getGluedNode())
4040 return false;
4041
4042 // The copy must be used by a LoongArchISD::RET, and nothing else.
4043 bool HasRet = false;
4044 for (SDNode *Node : Copy->uses()) {
4045 if (Node->getOpcode() != LoongArchISD::RET)
4046 return false;
4047 HasRet = true;
4048 }
4049
4050 if (!HasRet)
4051 return false;
4052
4053 Chain = Copy->getOperand(0);
4054 return true;
4055}
4056
4057// Check whether the call is eligible for tail call optimization.
4058bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4059 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4060 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4061
4062 auto CalleeCC = CLI.CallConv;
4063 auto &Outs = CLI.Outs;
4064 auto &Caller = MF.getFunction();
4065 auto CallerCC = Caller.getCallingConv();
4066
4067 // Do not tail call opt if the stack is used to pass parameters.
4068 if (CCInfo.getStackSize() != 0)
4069 return false;
4070
4071 // Do not tail call opt if any parameters need to be passed indirectly.
4072 for (auto &VA : ArgLocs)
4073 if (VA.getLocInfo() == CCValAssign::Indirect)
4074 return false;
4075
4076 // Do not tail call opt if either caller or callee uses struct return
4077 // semantics.
4078 auto IsCallerStructRet = Caller.hasStructRetAttr();
4079 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4080 if (IsCallerStructRet || IsCalleeStructRet)
4081 return false;
4082
4083 // Do not tail call opt if either the callee or caller has a byval argument.
4084 for (auto &Arg : Outs)
4085 if (Arg.Flags.isByVal())
4086 return false;
4087
4088 // The callee has to preserve all registers the caller needs to preserve.
4089 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4090 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4091 if (CalleeCC != CallerCC) {
4092 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4093 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4094 return false;
4095 }
4096 return true;
4097}
4098
4100 return DAG.getDataLayout().getPrefTypeAlign(
4101 VT.getTypeForEVT(*DAG.getContext()));
4102}
4103
4104// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4105// and output parameter nodes.
4106SDValue
4108 SmallVectorImpl<SDValue> &InVals) const {
4109 SelectionDAG &DAG = CLI.DAG;
4110 SDLoc &DL = CLI.DL;
4112 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4114 SDValue Chain = CLI.Chain;
4115 SDValue Callee = CLI.Callee;
4116 CallingConv::ID CallConv = CLI.CallConv;
4117 bool IsVarArg = CLI.IsVarArg;
4118 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4119 MVT GRLenVT = Subtarget.getGRLenVT();
4120 bool &IsTailCall = CLI.IsTailCall;
4121
4123
4124 // Analyze the operands of the call, assigning locations to each operand.
4126 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4127
4128 if (CallConv == CallingConv::GHC)
4129 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4130 else
4131 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4132
4133 // Check if it's really possible to do a tail call.
4134 if (IsTailCall)
4135 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4136
4137 if (IsTailCall)
4138 ++NumTailCalls;
4139 else if (CLI.CB && CLI.CB->isMustTailCall())
4140 report_fatal_error("failed to perform tail call elimination on a call "
4141 "site marked musttail");
4142
4143 // Get a count of how many bytes are to be pushed on the stack.
4144 unsigned NumBytes = ArgCCInfo.getStackSize();
4145
4146 // Create local copies for byval args.
4147 SmallVector<SDValue> ByValArgs;
4148 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4149 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4150 if (!Flags.isByVal())
4151 continue;
4152
4153 SDValue Arg = OutVals[i];
4154 unsigned Size = Flags.getByValSize();
4155 Align Alignment = Flags.getNonZeroByValAlign();
4156
4157 int FI =
4158 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4159 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4160 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4161
4162 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4163 /*IsVolatile=*/false,
4164 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4166 ByValArgs.push_back(FIPtr);
4167 }
4168
4169 if (!IsTailCall)
4170 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4171
4172 // Copy argument values to their designated locations.
4174 SmallVector<SDValue> MemOpChains;
4175 SDValue StackPtr;
4176 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4177 CCValAssign &VA = ArgLocs[i];
4178 SDValue ArgValue = OutVals[i];
4179 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4180
4181 // Promote the value if needed.
4182 // For now, only handle fully promoted and indirect arguments.
4183 if (VA.getLocInfo() == CCValAssign::Indirect) {
4184 // Store the argument in a stack slot and pass its address.
4185 Align StackAlign =
4186 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4187 getPrefTypeAlign(ArgValue.getValueType(), DAG));
4188 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4189 // If the original argument was split and passed by reference, we need to
4190 // store the required parts of it here (and pass just one address).
4191 unsigned ArgIndex = Outs[i].OrigArgIndex;
4192 unsigned ArgPartOffset = Outs[i].PartOffset;
4193 assert(ArgPartOffset == 0);
4194 // Calculate the total size to store. We don't have access to what we're
4195 // actually storing other than performing the loop and collecting the
4196 // info.
4198 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4199 SDValue PartValue = OutVals[i + 1];
4200 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4201 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4202 EVT PartVT = PartValue.getValueType();
4203
4204 StoredSize += PartVT.getStoreSize();
4205 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4206 Parts.push_back(std::make_pair(PartValue, Offset));
4207 ++i;
4208 }
4209 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4210 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4211 MemOpChains.push_back(
4212 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4214 for (const auto &Part : Parts) {
4215 SDValue PartValue = Part.first;
4216 SDValue PartOffset = Part.second;
4218 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4219 MemOpChains.push_back(
4220 DAG.getStore(Chain, DL, PartValue, Address,
4222 }
4223 ArgValue = SpillSlot;
4224 } else {
4225 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4226 }
4227
4228 // Use local copy if it is a byval arg.
4229 if (Flags.isByVal())
4230 ArgValue = ByValArgs[j++];
4231
4232 if (VA.isRegLoc()) {
4233 // Queue up the argument copies and emit them at the end.
4234 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4235 } else {
4236 assert(VA.isMemLoc() && "Argument not register or memory");
4237 assert(!IsTailCall && "Tail call not allowed if stack is used "
4238 "for passing parameters");
4239
4240 // Work out the address of the stack slot.
4241 if (!StackPtr.getNode())
4242 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4244 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4246
4247 // Emit the store.
4248 MemOpChains.push_back(
4249 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4250 }
4251 }
4252
4253 // Join the stores, which are independent of one another.
4254 if (!MemOpChains.empty())
4255 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4256
4257 SDValue Glue;
4258
4259 // Build a sequence of copy-to-reg nodes, chained and glued together.
4260 for (auto &Reg : RegsToPass) {
4261 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4262 Glue = Chain.getValue(1);
4263 }
4264
4265 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4266 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4267 // split it and then direct call can be matched by PseudoCALL.
4268 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4269 const GlobalValue *GV = S->getGlobal();
4270 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
4273 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4274 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4275 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
4278 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4279 }
4280
4281 // The first call operand is the chain and the second is the target address.
4283 Ops.push_back(Chain);
4284 Ops.push_back(Callee);
4285
4286 // Add argument registers to the end of the list so that they are
4287 // known live into the call.
4288 for (auto &Reg : RegsToPass)
4289 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4290
4291 if (!IsTailCall) {
4292 // Add a register mask operand representing the call-preserved registers.
4293 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4294 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4295 assert(Mask && "Missing call preserved mask for calling convention");
4296 Ops.push_back(DAG.getRegisterMask(Mask));
4297 }
4298
4299 // Glue the call to the argument copies, if any.
4300 if (Glue.getNode())
4301 Ops.push_back(Glue);
4302
4303 // Emit the call.
4304 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4305 unsigned Op;
4306 switch (DAG.getTarget().getCodeModel()) {
4307 default:
4308 report_fatal_error("Unsupported code model");
4309 case CodeModel::Small:
4310 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4311 break;
4312 case CodeModel::Medium:
4313 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4315 break;
4316 case CodeModel::Large:
4317 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4319 break;
4320 }
4321
4322 if (IsTailCall) {
4324 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4325 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4326 return Ret;
4327 }
4328
4329 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4330 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4331 Glue = Chain.getValue(1);
4332
4333 // Mark the end of the call, which is glued to the call itself.
4334 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4335 Glue = Chain.getValue(1);
4336
4337 // Assign locations to each value returned by this call.
4339 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4340 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4341
4342 // Copy all of the result registers out of their specified physreg.
4343 for (auto &VA : RVLocs) {
4344 // Copy the value out.
4345 SDValue RetValue =
4346 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4347 // Glue the RetValue to the end of the call sequence.
4348 Chain = RetValue.getValue(1);
4349 Glue = RetValue.getValue(2);
4350
4351 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4352
4353 InVals.push_back(RetValue);
4354 }
4355
4356 return Chain;
4357}
4358
4360 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4361 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4363 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4364
4365 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4366 LoongArchABI::ABI ABI =
4367 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4368 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4369 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4370 nullptr))
4371 return false;
4372 }
4373 return true;
4374}
4375
4377 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4379 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4380 SelectionDAG &DAG) const {
4381 // Stores the assignment of the return value to a location.
4383
4384 // Info about the registers and stack slot.
4385 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4386 *DAG.getContext());
4387
4388 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4389 nullptr, CC_LoongArch);
4390 if (CallConv == CallingConv::GHC && !RVLocs.empty())
4391 report_fatal_error("GHC functions return void only");
4392 SDValue Glue;
4393 SmallVector<SDValue, 4> RetOps(1, Chain);
4394
4395 // Copy the result values into the output registers.
4396 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4397 CCValAssign &VA = RVLocs[i];
4398 assert(VA.isRegLoc() && "Can only return in registers!");
4399
4400 // Handle a 'normal' return.
4401 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4402 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4403
4404 // Guarantee that all emitted copies are stuck together.
4405 Glue = Chain.getValue(1);
4406 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4407 }
4408
4409 RetOps[0] = Chain; // Update chain.
4410
4411 // Add the glue node if we have it.
4412 if (Glue.getNode())
4413 RetOps.push_back(Glue);
4414
4415 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4416}
4417
4418bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4419 bool ForCodeSize) const {
4420 // TODO: Maybe need more checks here after vector extension is supported.
4421 if (VT == MVT::f32 && !Subtarget.hasBasicF())
4422 return false;
4423 if (VT == MVT::f64 && !Subtarget.hasBasicD())
4424 return false;
4425 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4426}
4427
4429 return true;
4430}
4431
4433 return true;
4434}
4435
4436bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4437 const Instruction *I) const {
4438 if (!Subtarget.is64Bit())
4439 return isa<LoadInst>(I) || isa<StoreInst>(I);
4440
4441 if (isa<LoadInst>(I))
4442 return true;
4443
4444 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4445 // require fences beacuse we can use amswap_db.[w/d].
4446 if (isa<StoreInst>(I)) {
4447 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4448 return (Size == 8 || Size == 16);
4449 }
4450
4451 return false;
4452}
4453
4455 LLVMContext &Context,
4456 EVT VT) const {
4457 if (!VT.isVector())
4458 return getPointerTy(DL);
4460}
4461
4463 // TODO: Support vectors.
4464 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4465}
4466
4468 const CallInst &I,
4469 MachineFunction &MF,
4470 unsigned Intrinsic) const {
4471 switch (Intrinsic) {
4472 default:
4473 return false;
4474 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4475 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4476 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4477 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4479 Info.memVT = MVT::i32;
4480 Info.ptrVal = I.getArgOperand(0);
4481 Info.offset = 0;
4482 Info.align = Align(4);
4485 return true;
4486 // TODO: Add more Intrinsics later.
4487 }
4488}
4489
4492 // TODO: Add more AtomicRMWInst that needs to be extended.
4493
4494 // Since floating-point operation requires a non-trivial set of data
4495 // operations, use CmpXChg to expand.
4496 if (AI->isFloatingPointOperation() ||
4500
4501 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4502 if (Size == 8 || Size == 16)
4505}
4506
4507static Intrinsic::ID
4509 AtomicRMWInst::BinOp BinOp) {
4510 if (GRLen == 64) {
4511 switch (BinOp) {
4512 default:
4513 llvm_unreachable("Unexpected AtomicRMW BinOp");
4515 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4516 case AtomicRMWInst::Add:
4517 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4518 case AtomicRMWInst::Sub:
4519 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4521 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4523 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4525 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4526 case AtomicRMWInst::Max:
4527 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4528 case AtomicRMWInst::Min:
4529 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4530 // TODO: support other AtomicRMWInst.
4531 }
4532 }
4533
4534 if (GRLen == 32) {
4535 switch (BinOp) {
4536 default:
4537 llvm_unreachable("Unexpected AtomicRMW BinOp");
4539 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4540 case AtomicRMWInst::Add:
4541 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4542 case AtomicRMWInst::Sub:
4543 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4545 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4546 // TODO: support other AtomicRMWInst.
4547 }
4548 }
4549
4550 llvm_unreachable("Unexpected GRLen\n");
4551}
4552
4555 AtomicCmpXchgInst *CI) const {
4557 if (Size == 8 || Size == 16)
4560}
4561
4563 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4564 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4565 AtomicOrdering FailOrd = CI->getFailureOrdering();
4566 Value *FailureOrdering =
4567 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4568
4569 // TODO: Support cmpxchg on LA32.
4570 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4571 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4572 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4573 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4574 Type *Tys[] = {AlignedAddr->getType()};
4575 Function *MaskedCmpXchg =
4576 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4577 Value *Result = Builder.CreateCall(
4578 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4579 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4580 return Result;
4581}
4582
4584 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4585 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4586 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4587 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4588 // mask, as this produces better code than the LL/SC loop emitted by
4589 // int_loongarch_masked_atomicrmw_xchg.
4590 if (AI->getOperation() == AtomicRMWInst::Xchg &&
4591 isa<ConstantInt>(AI->getValOperand())) {
4592 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4593 if (CVal->isZero())
4594 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4595 Builder.CreateNot(Mask, "Inv_Mask"),
4596 AI->getAlign(), Ord);
4597 if (CVal->isMinusOne())
4598 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4599 AI->getAlign(), Ord);
4600 }
4601
4602 unsigned GRLen = Subtarget.getGRLen();
4603 Value *Ordering =
4604 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4605 Type *Tys[] = {AlignedAddr->getType()};
4606 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4607 AI->getModule(),
4609
4610 if (GRLen == 64) {
4611 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4612 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4613 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4614 }
4615
4616 Value *Result;
4617
4618 // Must pass the shift amount needed to sign extend the loaded value prior
4619 // to performing a signed comparison for min/max. ShiftAmt is the number of
4620 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4621 // is the number of bits to left+right shift the value in order to
4622 // sign-extend.
4623 if (AI->getOperation() == AtomicRMWInst::Min ||
4625 const DataLayout &DL = AI->getModule()->getDataLayout();
4626 unsigned ValWidth =
4627 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4628 Value *SextShamt =
4629 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4630 Result = Builder.CreateCall(LlwOpScwLoop,
4631 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4632 } else {
4633 Result =
4634 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4635 }
4636
4637 if (GRLen == 64)
4638 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4639 return Result;
4640}
4641
4643 const MachineFunction &MF, EVT VT) const {
4644 VT = VT.getScalarType();
4645
4646 if (!VT.isSimple())
4647 return false;
4648
4649 switch (VT.getSimpleVT().SimpleTy) {
4650 case MVT::f32:
4651 case MVT::f64:
4652 return true;
4653 default:
4654 break;
4655 }
4656
4657 return false;
4658}
4659
4661 const Constant *PersonalityFn) const {
4662 return LoongArch::R4;
4663}
4664
4666 const Constant *PersonalityFn) const {
4667 return LoongArch::R5;
4668}
4669
4670//===----------------------------------------------------------------------===//
4671// LoongArch Inline Assembly Support
4672//===----------------------------------------------------------------------===//
4673
4675LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4676 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4677 //
4678 // 'f': A floating-point register (if available).
4679 // 'k': A memory operand whose address is formed by a base register and
4680 // (optionally scaled) index register.
4681 // 'l': A signed 16-bit constant.
4682 // 'm': A memory operand whose address is formed by a base register and
4683 // offset that is suitable for use in instructions with the same
4684 // addressing mode as st.w and ld.w.
4685 // 'I': A signed 12-bit constant (for arithmetic instructions).
4686 // 'J': Integer zero.
4687 // 'K': An unsigned 12-bit constant (for logic instructions).
4688 // "ZB": An address that is held in a general-purpose register. The offset is
4689 // zero.
4690 // "ZC": A memory operand whose address is formed by a base register and
4691 // offset that is suitable for use in instructions with the same
4692 // addressing mode as ll.w and sc.w.
4693 if (Constraint.size() == 1) {
4694 switch (Constraint[0]) {
4695 default:
4696 break;
4697 case 'f':
4698 return C_RegisterClass;
4699 case 'l':
4700 case 'I':
4701 case 'J':
4702 case 'K':
4703 return C_Immediate;
4704 case 'k':
4705 return C_Memory;
4706 }
4707 }
4708
4709 if (Constraint == "ZC" || Constraint == "ZB")
4710 return C_Memory;
4711
4712 // 'm' is handled here.
4713 return TargetLowering::getConstraintType(Constraint);
4714}
4715
4716InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4717 StringRef ConstraintCode) const {
4718 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4723}
4724
4725std::pair<unsigned, const TargetRegisterClass *>
4726LoongArchTargetLowering::getRegForInlineAsmConstraint(
4727 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4728 // First, see if this is a constraint that directly corresponds to a LoongArch
4729 // register class.
4730 if (Constraint.size() == 1) {
4731 switch (Constraint[0]) {
4732 case 'r':
4733 // TODO: Support fixed vectors up to GRLen?
4734 if (VT.isVector())
4735 break;
4736 return std::make_pair(0U, &LoongArch::GPRRegClass);
4737 case 'f':
4738 if (Subtarget.hasBasicF() && VT == MVT::f32)
4739 return std::make_pair(0U, &LoongArch::FPR32RegClass);
4740 if (Subtarget.hasBasicD() && VT == MVT::f64)
4741 return std::make_pair(0U, &LoongArch::FPR64RegClass);
4742 if (Subtarget.hasExtLSX() &&
4743 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4744 return std::make_pair(0U, &LoongArch::LSX128RegClass);
4745 if (Subtarget.hasExtLASX() &&
4746 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4747 return std::make_pair(0U, &LoongArch::LASX256RegClass);
4748 break;
4749 default:
4750 break;
4751 }
4752 }
4753
4754 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4755 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4756 // constraints while the official register name is prefixed with a '$'. So we
4757 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4758 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4759 // case insensitive, so no need to convert the constraint to upper case here.
4760 //
4761 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4762 // decode the usage of register name aliases into their official names. And
4763 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4764 // official register names.
4765 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4766 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4767 bool IsFP = Constraint[2] == 'f';
4768 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4769 std::pair<unsigned, const TargetRegisterClass *> R;
4771 TRI, join_items("", Temp.first, Temp.second), VT);
4772 // Match those names to the widest floating point register type available.
4773 if (IsFP) {
4774 unsigned RegNo = R.first;
4775 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4776 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4777 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4778 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4779 }
4780 }
4781 }
4782 return R;
4783 }
4784
4785 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4786}
4787
4788void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4789 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4790 SelectionDAG &DAG) const {
4791 // Currently only support length 1 constraints.
4792 if (Constraint.size() == 1) {
4793 switch (Constraint[0]) {
4794 case 'l':
4795 // Validate & create a 16-bit signed immediate operand.
4796 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4797 uint64_t CVal = C->getSExtValue();
4798 if (isInt<16>(CVal))
4799 Ops.push_back(
4800 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4801 }
4802 return;
4803 case 'I':
4804 // Validate & create a 12-bit signed immediate operand.
4805 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4806 uint64_t CVal = C->getSExtValue();
4807 if (isInt<12>(CVal))
4808 Ops.push_back(
4809 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4810 }
4811 return;
4812 case 'J':
4813 // Validate & create an integer zero operand.
4814 if (auto *C = dyn_cast<ConstantSDNode>(Op))
4815 if (C->getZExtValue() == 0)
4816 Ops.push_back(
4817 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4818 return;
4819 case 'K':
4820 // Validate & create a 12-bit unsigned immediate operand.
4821 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4822 uint64_t CVal = C->getZExtValue();
4823 if (isUInt<12>(CVal))
4824 Ops.push_back(
4825 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4826 }
4827 return;
4828 default:
4829 break;
4830 }
4831 }
4832 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4833}
4834
4835#define GET_REGISTER_MATCHER
4836#include "LoongArchGenAsmMatcher.inc"
4837
4840 const MachineFunction &MF) const {
4841 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4842 std::string NewRegName = Name.second.str();
4843 Register Reg = MatchRegisterAltName(NewRegName);
4844 if (Reg == LoongArch::NoRegister)
4845 Reg = MatchRegisterName(NewRegName);
4846 if (Reg == LoongArch::NoRegister)
4848 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4849 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4850 if (!ReservedRegs.test(Reg))
4851 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4852 StringRef(RegName) + "\"."));
4853 return Reg;
4854}
4855
4857 EVT VT, SDValue C) const {
4858 // TODO: Support vectors.
4859 if (!VT.isScalarInteger())
4860 return false;
4861
4862 // Omit the optimization if the data size exceeds GRLen.
4863 if (VT.getSizeInBits() > Subtarget.getGRLen())
4864 return false;
4865
4866 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4867 const APInt &Imm = ConstNode->getAPIntValue();
4868 // Break MUL into (SLLI + ADD/SUB) or ALSL.
4869 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4870 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4871 return true;
4872 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4873 if (ConstNode->hasOneUse() &&
4874 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4875 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4876 return true;
4877 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4878 // in which the immediate has two set bits. Or Break (MUL x, imm)
4879 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4880 // equals to (1 << s0) - (1 << s1).
4881 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4882 unsigned Shifts = Imm.countr_zero();
4883 // Reject immediates which can be composed via a single LUI.
4884 if (Shifts >= 12)
4885 return false;
4886 // Reject multiplications can be optimized to
4887 // (SLLI (ALSL x, x, 1/2/3/4), s).
4888 APInt ImmPop = Imm.ashr(Shifts);
4889 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4890 return false;
4891 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4892 // since it needs one more instruction than other 3 cases.
4893 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4894 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4895 (ImmSmall - Imm).isPowerOf2())
4896 return true;
4897 }
4898 }
4899
4900 return false;
4901}
4902
4904 const AddrMode &AM,
4905 Type *Ty, unsigned AS,
4906 Instruction *I) const {
4907 // LoongArch has four basic addressing modes:
4908 // 1. reg
4909 // 2. reg + 12-bit signed offset
4910 // 3. reg + 14-bit signed offset left-shifted by 2
4911 // 4. reg1 + reg2
4912 // TODO: Add more checks after support vector extension.
4913
4914 // No global is ever allowed as a base.
4915 if (AM.BaseGV)
4916 return false;
4917
4918 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
4919 // with `UAL` feature.
4920 if (!isInt<12>(AM.BaseOffs) &&
4921 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
4922 return false;
4923
4924 switch (AM.Scale) {
4925 case 0:
4926 // "r+i" or just "i", depending on HasBaseReg.
4927 break;
4928 case 1:
4929 // "r+r+i" is not allowed.
4930 if (AM.HasBaseReg && AM.BaseOffs)
4931 return false;
4932 // Otherwise we have "r+r" or "r+i".
4933 break;
4934 case 2:
4935 // "2*r+r" or "2*r+i" is not allowed.
4936 if (AM.HasBaseReg || AM.BaseOffs)
4937 return false;
4938 // Allow "2*r" as "r+r".
4939 break;
4940 default:
4941 return false;
4942 }
4943
4944 return true;
4945}
4946
4948 return isInt<12>(Imm);
4949}
4950
4952 return isInt<12>(Imm);
4953}
4954
4956 // Zexts are free if they can be combined with a load.
4957 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4958 // poorly with type legalization of compares preferring sext.
4959 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4960 EVT MemVT = LD->getMemoryVT();
4961 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4962 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4963 LD->getExtensionType() == ISD::ZEXTLOAD))
4964 return true;
4965 }
4966
4967 return TargetLowering::isZExtFree(Val, VT2);
4968}
4969
4971 EVT DstVT) const {
4972 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4973}
4974
4976 // TODO: Support vectors.
4977 if (Y.getValueType().isVector())
4978 return false;
4979
4980 return !isa<ConstantSDNode>(Y);
4981}
4982
4984 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4985 return ISD::SIGN_EXTEND;
4986}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define NODE_NAME_CASE(node)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
Argument * getArg(unsigned i) const
Definition: Function.h:836
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
Class to represent integer types.
Definition: DerivedTypes.h:40
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
Align getPrefLoopAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:692
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)