LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
527 if (!Subtarget.hasStdExtZfa())
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa())
553 else
555 }
556
557 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
559
560 if (Subtarget.hasStdExtDOrZdinx()) {
561 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
562
563 if (!Subtarget.is64Bit())
565
566 if (Subtarget.hasStdExtZfa()) {
567 setOperationAction(FPRndMode, MVT::f64, Legal);
569 } else {
570 if (Subtarget.is64Bit())
571 setOperationAction(FPRndMode, MVT::f64, Custom);
572
574 }
575
578 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
582 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
583 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
584 setOperationAction(FPOpToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
587 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
588 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
592 Subtarget.isSoftFPABI() ? LibCall : Custom);
595 }
596
597 if (Subtarget.is64Bit()) {
600 MVT::i32, Custom);
602 }
603
604 if (Subtarget.hasStdExtFOrZfinx()) {
606 Custom);
607
610 XLenVT, Legal);
611
612 if (RV64LegalI32 && Subtarget.is64Bit())
615 MVT::i32, Legal);
616
619 }
620
623 XLenVT, Custom);
624
626
627 if (Subtarget.is64Bit())
629
630 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
631 // Unfortunately this can't be determined just from the ISA naming string.
633 Subtarget.is64Bit() ? Legal : Custom);
635 Subtarget.is64Bit() ? Legal : Custom);
636
639 if (Subtarget.is64Bit())
641
642 if (Subtarget.hasStdExtZicbop()) {
644 }
645
646 if (Subtarget.hasStdExtA()) {
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
650 else
652 } else if (Subtarget.hasForcedAtomics()) {
654 } else {
656 }
657
659
661
662 if (Subtarget.hasVInstructions()) {
664
666 if (RV64LegalI32 && Subtarget.is64Bit())
668
669 // RVV intrinsics may have illegal operands.
670 // We also need to custom legalize vmv.x.s.
673 {MVT::i8, MVT::i16}, Custom);
674 if (Subtarget.is64Bit())
676 MVT::i32, Custom);
677 else
679 MVT::i64, Custom);
680
682 MVT::Other, Custom);
683
684 static const unsigned IntegerVPOps[] = {
685 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
686 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
687 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
688 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
689 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
690 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
691 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
692 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
693 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
694 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
695 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
696 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
697 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
698 ISD::VP_USUBSAT};
699
700 static const unsigned FloatingPointVPOps[] = {
701 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
702 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
703 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
704 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
705 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
706 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
707 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
708 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
709 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
710 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
711 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
712 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
713 ISD::EXPERIMENTAL_VP_SPLICE};
714
715 static const unsigned IntegerVecReduceOps[] = {
719
720 static const unsigned FloatingPointVecReduceOps[] = {
723
724 if (!Subtarget.is64Bit()) {
725 // We must custom-lower certain vXi64 operations on RV32 due to the vector
726 // element type being illegal.
728 MVT::i64, Custom);
729
730 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
731
732 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
733 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
734 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
735 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
736 MVT::i64, Custom);
737 }
738
739 for (MVT VT : BoolVecVTs) {
740 if (!isTypeLegal(VT))
741 continue;
742
744
745 // Mask VTs are custom-expanded into a series of standard nodes
749 VT, Custom);
750
752 Custom);
753
756 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
757 Expand);
758
759 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
760
763 Custom);
764
766 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
767 Custom);
768
769 // RVV has native int->float & float->int conversions where the
770 // element type sizes are within one power-of-two of each other. Any
771 // wider distances between type sizes have to be lowered as sequences
772 // which progressively narrow the gap in stages.
777 VT, Custom);
779 Custom);
780
781 // Expand all extending loads to types larger than this, and truncating
782 // stores from types larger than this.
784 setTruncStoreAction(VT, OtherVT, Expand);
786 OtherVT, Expand);
787 }
788
789 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
790 ISD::VP_TRUNCATE, ISD::VP_SETCC},
791 VT, Custom);
792
795
797
798 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
799 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
800
803 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
804 }
805
806 for (MVT VT : IntVecVTs) {
807 if (!isTypeLegal(VT))
808 continue;
809
812
813 // Vectors implement MULHS/MULHU.
815
816 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
817 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
819
821 Legal);
822
824
825 // Custom-lower extensions and truncations from/to mask types.
827 VT, Custom);
828
829 // RVV has native int->float & float->int conversions where the
830 // element type sizes are within one power-of-two of each other. Any
831 // wider distances between type sizes have to be lowered as sequences
832 // which progressively narrow the gap in stages.
837 VT, Custom);
839 Custom);
842 VT, Legal);
843
844 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
845 // nodes which truncate by one power of two at a time.
847
848 // Custom-lower insert/extract operations to simplify patterns.
850 Custom);
851
852 // Custom-lower reduction operations to set up the corresponding custom
853 // nodes' operands.
854 setOperationAction(IntegerVecReduceOps, VT, Custom);
855
856 setOperationAction(IntegerVPOps, VT, Custom);
857
859
861 VT, Custom);
862
864 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
865 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
866 VT, Custom);
867
870 VT, Custom);
871
874
876
878 setTruncStoreAction(VT, OtherVT, Expand);
880 OtherVT, Expand);
881 }
882
885
886 // Splice
888
889 if (Subtarget.hasStdExtZvkb()) {
891 setOperationAction(ISD::VP_BSWAP, VT, Custom);
892 } else {
893 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
895 }
896
897 if (Subtarget.hasStdExtZvbb()) {
899 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
900 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
901 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
902 VT, Custom);
903 } else {
904 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
906 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
907 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
908 VT, Expand);
909
910 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
911 // range of f32.
912 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
913 if (isTypeLegal(FloatVT)) {
915 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
916 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
917 VT, Custom);
918 }
919 }
920 }
921
922 // Expand various CCs to best match the RVV ISA, which natively supports UNE
923 // but no other unordered comparisons, and supports all ordered comparisons
924 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
925 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
926 // and we pattern-match those back to the "original", swapping operands once
927 // more. This way we catch both operations and both "vf" and "fv" forms with
928 // fewer patterns.
929 static const ISD::CondCode VFPCCToExpand[] = {
933 };
934
935 // TODO: support more ops.
936 static const unsigned ZvfhminPromoteOps[] = {
944
945 // TODO: support more vp ops.
946 static const unsigned ZvfhminPromoteVPOps[] = {
947 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
948 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
949 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
950 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
951 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
952 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
953 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
954 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
955 ISD::VP_FMAXIMUM};
956
957 // Sets common operation actions on RVV floating-point vector types.
958 const auto SetCommonVFPActions = [&](MVT VT) {
960 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
961 // sizes are within one power-of-two of each other. Therefore conversions
962 // between vXf16 and vXf64 must be lowered as sequences which convert via
963 // vXf32.
966 // Custom-lower insert/extract operations to simplify patterns.
968 Custom);
969 // Expand various condition codes (explained above).
970 setCondCodeAction(VFPCCToExpand, VT, Expand);
971
974
978 VT, Custom);
979
980 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
981
982 // Expand FP operations that need libcalls.
994
996
998
1000 VT, Custom);
1001
1003 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1004 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1005 VT, Custom);
1006
1009
1012 VT, Custom);
1013
1016
1018
1019 setOperationAction(FloatingPointVPOps, VT, Custom);
1020
1022 Custom);
1025 VT, Legal);
1030 VT, Custom);
1031 };
1032
1033 // Sets common extload/truncstore actions on RVV floating-point vector
1034 // types.
1035 const auto SetCommonVFPExtLoadTruncStoreActions =
1036 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1037 for (auto SmallVT : SmallerVTs) {
1038 setTruncStoreAction(VT, SmallVT, Expand);
1039 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1040 }
1041 };
1042
1043 if (Subtarget.hasVInstructionsF16()) {
1044 for (MVT VT : F16VecVTs) {
1045 if (!isTypeLegal(VT))
1046 continue;
1047 SetCommonVFPActions(VT);
1048 }
1049 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1050 for (MVT VT : F16VecVTs) {
1051 if (!isTypeLegal(VT))
1052 continue;
1055 Custom);
1056 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1057 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1058 Custom);
1061 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1062 VT, Custom);
1065 VT, Custom);
1066 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1068 // load/store
1070
1071 // Custom split nxv32f16 since nxv32f32 if not legal.
1072 if (VT == MVT::nxv32f16) {
1073 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1074 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1075 continue;
1076 }
1077 // Add more promote ops.
1078 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1079 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1080 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1081 }
1082 }
1083
1084 if (Subtarget.hasVInstructionsF32()) {
1085 for (MVT VT : F32VecVTs) {
1086 if (!isTypeLegal(VT))
1087 continue;
1088 SetCommonVFPActions(VT);
1089 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1090 }
1091 }
1092
1093 if (Subtarget.hasVInstructionsF64()) {
1094 for (MVT VT : F64VecVTs) {
1095 if (!isTypeLegal(VT))
1096 continue;
1097 SetCommonVFPActions(VT);
1098 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1099 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1100 }
1101 }
1102
1103 if (Subtarget.useRVVForFixedLengthVectors()) {
1105 if (!useRVVForFixedLengthVectorVT(VT))
1106 continue;
1107
1108 // By default everything must be expanded.
1109 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1112 setTruncStoreAction(VT, OtherVT, Expand);
1114 OtherVT, Expand);
1115 }
1116
1117 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1118 // expansion to a build_vector of 0s.
1120
1121 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1123 Custom);
1124
1126 Custom);
1127
1129 VT, Custom);
1130
1132
1134
1136
1138
1140
1142
1145 Custom);
1146
1148 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1149 Custom);
1150
1152 {
1161 },
1162 VT, Custom);
1164 Custom);
1165
1167
1168 // Operations below are different for between masks and other vectors.
1169 if (VT.getVectorElementType() == MVT::i1) {
1170 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1171 ISD::OR, ISD::XOR},
1172 VT, Custom);
1173
1174 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1175 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1176 VT, Custom);
1177
1178 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1179 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1180 continue;
1181 }
1182
1183 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1184 // it before type legalization for i64 vectors on RV32. It will then be
1185 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1186 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1187 // improvements first.
1188 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1191 }
1192
1195
1196 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1197 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1198 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1199 ISD::VP_SCATTER},
1200 VT, Custom);
1201
1205 VT, Custom);
1206
1209
1211
1212 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1213 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1215
1218 VT, Custom);
1219
1222
1225
1226 // Custom-lower reduction operations to set up the corresponding custom
1227 // nodes' operands.
1231 VT, Custom);
1232
1233 setOperationAction(IntegerVPOps, VT, Custom);
1234
1235 if (Subtarget.hasStdExtZvkb())
1237
1238 if (Subtarget.hasStdExtZvbb()) {
1241 VT, Custom);
1242 } else {
1243 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1244 // range of f32.
1245 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1246 if (isTypeLegal(FloatVT))
1249 Custom);
1250 }
1251 }
1252
1254 // There are no extending loads or truncating stores.
1255 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1256 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1257 setTruncStoreAction(VT, InnerVT, Expand);
1258 }
1259
1260 if (!useRVVForFixedLengthVectorVT(VT))
1261 continue;
1262
1263 // By default everything must be expanded.
1264 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1266
1267 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1268 // expansion to a build_vector of 0s.
1270
1271 if (VT.getVectorElementType() == MVT::f16 &&
1272 !Subtarget.hasVInstructionsF16()) {
1275 Custom);
1276 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1278 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1279 Custom);
1281 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1282 VT, Custom);
1285 VT, Custom);
1288 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1289 // Don't promote f16 vector operations to f32 if f32 vector type is
1290 // not legal.
1291 // TODO: could split the f16 vector into two vectors and do promotion.
1292 if (!isTypeLegal(F32VecVT))
1293 continue;
1294 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1295 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1296 continue;
1297 }
1298
1299 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1301 Custom);
1302
1306 VT, Custom);
1307
1310 VT, Custom);
1311
1312 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1313 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1314 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1315 ISD::VP_SCATTER},
1316 VT, Custom);
1317
1322 VT, Custom);
1323
1325
1328 VT, Custom);
1329
1330 setCondCodeAction(VFPCCToExpand, VT, Expand);
1331
1335
1337
1338 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1339
1340 setOperationAction(FloatingPointVPOps, VT, Custom);
1341
1343 Custom);
1350 VT, Custom);
1351 }
1352
1353 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1354 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1355 Custom);
1356 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1358 if (Subtarget.hasStdExtFOrZfinx())
1360 if (Subtarget.hasStdExtDOrZdinx())
1362 }
1363 }
1364
1365 if (Subtarget.hasStdExtA()) {
1367 if (RV64LegalI32 && Subtarget.is64Bit())
1369 }
1370
1371 if (Subtarget.hasForcedAtomics()) {
1372 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1378 XLenVT, LibCall);
1379 }
1380
1381 if (Subtarget.hasVendorXTHeadMemIdx()) {
1382 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1383 setIndexedLoadAction(im, MVT::i8, Legal);
1384 setIndexedStoreAction(im, MVT::i8, Legal);
1385 setIndexedLoadAction(im, MVT::i16, Legal);
1386 setIndexedStoreAction(im, MVT::i16, Legal);
1387 setIndexedLoadAction(im, MVT::i32, Legal);
1388 setIndexedStoreAction(im, MVT::i32, Legal);
1389
1390 if (Subtarget.is64Bit()) {
1391 setIndexedLoadAction(im, MVT::i64, Legal);
1392 setIndexedStoreAction(im, MVT::i64, Legal);
1393 }
1394 }
1395 }
1396
1397 // Function alignments.
1398 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1399 setMinFunctionAlignment(FunctionAlignment);
1400 // Set preferred alignments.
1403
1407 if (Subtarget.is64Bit())
1409
1410 if (Subtarget.hasStdExtFOrZfinx())
1412
1413 if (Subtarget.hasStdExtZbb())
1415
1416 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1418
1419 if (Subtarget.hasStdExtZbkb())
1421 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1423 if (Subtarget.hasStdExtFOrZfinx())
1426 if (Subtarget.hasVInstructions())
1428 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1431 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1434 if (Subtarget.hasVendorXTHeadMemPair())
1436 if (Subtarget.useRVVForFixedLengthVectors())
1438
1439 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1440 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1441
1442 // Disable strict node mutation.
1443 IsStrictFPEnabled = true;
1444}
1445
1447 LLVMContext &Context,
1448 EVT VT) const {
1449 if (!VT.isVector())
1450 return getPointerTy(DL);
1451 if (Subtarget.hasVInstructions() &&
1452 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1453 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1455}
1456
1457MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1458 return Subtarget.getXLenVT();
1459}
1460
1461// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1462bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1463 unsigned VF,
1464 bool IsScalable) const {
1465 if (!Subtarget.hasVInstructions())
1466 return true;
1467
1468 if (!IsScalable)
1469 return true;
1470
1471 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1472 return true;
1473
1474 // Don't allow VF=1 if those types are't legal.
1475 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1476 return true;
1477
1478 // VLEN=32 support is incomplete.
1479 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1480 return true;
1481
1482 // The maximum VF is for the smallest element width with LMUL=8.
1483 // VF must be a power of 2.
1484 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1485 return VF > MaxVF || !isPowerOf2_32(VF);
1486}
1487
1488bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1489 return !Subtarget.hasVInstructions() ||
1490 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1491}
1492
1494 const CallInst &I,
1495 MachineFunction &MF,
1496 unsigned Intrinsic) const {
1497 auto &DL = I.getModule()->getDataLayout();
1498
1499 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1500 bool IsUnitStrided, bool UsePtrVal = false) {
1502 // We can't use ptrVal if the intrinsic can access memory before the
1503 // pointer. This means we can't use it for strided or indexed intrinsics.
1504 if (UsePtrVal)
1505 Info.ptrVal = I.getArgOperand(PtrOp);
1506 else
1507 Info.fallbackAddressSpace =
1508 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1509 Type *MemTy;
1510 if (IsStore) {
1511 // Store value is the first operand.
1512 MemTy = I.getArgOperand(0)->getType();
1513 } else {
1514 // Use return type. If it's segment load, return type is a struct.
1515 MemTy = I.getType();
1516 if (MemTy->isStructTy())
1517 MemTy = MemTy->getStructElementType(0);
1518 }
1519 if (!IsUnitStrided)
1520 MemTy = MemTy->getScalarType();
1521
1522 Info.memVT = getValueType(DL, MemTy);
1523 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1525 Info.flags |=
1527 return true;
1528 };
1529
1530 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1532
1534 switch (Intrinsic) {
1535 default:
1536 return false;
1537 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1538 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1539 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1540 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1541 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1542 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1543 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1544 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1545 case Intrinsic::riscv_masked_cmpxchg_i32:
1547 Info.memVT = MVT::i32;
1548 Info.ptrVal = I.getArgOperand(0);
1549 Info.offset = 0;
1550 Info.align = Align(4);
1553 return true;
1554 case Intrinsic::riscv_masked_strided_load:
1555 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1556 /*IsUnitStrided*/ false);
1557 case Intrinsic::riscv_masked_strided_store:
1558 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1559 /*IsUnitStrided*/ false);
1560 case Intrinsic::riscv_seg2_load:
1561 case Intrinsic::riscv_seg3_load:
1562 case Intrinsic::riscv_seg4_load:
1563 case Intrinsic::riscv_seg5_load:
1564 case Intrinsic::riscv_seg6_load:
1565 case Intrinsic::riscv_seg7_load:
1566 case Intrinsic::riscv_seg8_load:
1567 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1568 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1569 case Intrinsic::riscv_seg2_store:
1570 case Intrinsic::riscv_seg3_store:
1571 case Intrinsic::riscv_seg4_store:
1572 case Intrinsic::riscv_seg5_store:
1573 case Intrinsic::riscv_seg6_store:
1574 case Intrinsic::riscv_seg7_store:
1575 case Intrinsic::riscv_seg8_store:
1576 // Operands are (vec, ..., vec, ptr, vl)
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1578 /*IsStore*/ true,
1579 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1580 case Intrinsic::riscv_vle:
1581 case Intrinsic::riscv_vle_mask:
1582 case Intrinsic::riscv_vleff:
1583 case Intrinsic::riscv_vleff_mask:
1584 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1585 /*IsStore*/ false,
1586 /*IsUnitStrided*/ true,
1587 /*UsePtrVal*/ true);
1588 case Intrinsic::riscv_vse:
1589 case Intrinsic::riscv_vse_mask:
1590 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1591 /*IsStore*/ true,
1592 /*IsUnitStrided*/ true,
1593 /*UsePtrVal*/ true);
1594 case Intrinsic::riscv_vlse:
1595 case Intrinsic::riscv_vlse_mask:
1596 case Intrinsic::riscv_vloxei:
1597 case Intrinsic::riscv_vloxei_mask:
1598 case Intrinsic::riscv_vluxei:
1599 case Intrinsic::riscv_vluxei_mask:
1600 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1601 /*IsStore*/ false,
1602 /*IsUnitStrided*/ false);
1603 case Intrinsic::riscv_vsse:
1604 case Intrinsic::riscv_vsse_mask:
1605 case Intrinsic::riscv_vsoxei:
1606 case Intrinsic::riscv_vsoxei_mask:
1607 case Intrinsic::riscv_vsuxei:
1608 case Intrinsic::riscv_vsuxei_mask:
1609 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1610 /*IsStore*/ true,
1611 /*IsUnitStrided*/ false);
1612 case Intrinsic::riscv_vlseg2:
1613 case Intrinsic::riscv_vlseg3:
1614 case Intrinsic::riscv_vlseg4:
1615 case Intrinsic::riscv_vlseg5:
1616 case Intrinsic::riscv_vlseg6:
1617 case Intrinsic::riscv_vlseg7:
1618 case Intrinsic::riscv_vlseg8:
1619 case Intrinsic::riscv_vlseg2ff:
1620 case Intrinsic::riscv_vlseg3ff:
1621 case Intrinsic::riscv_vlseg4ff:
1622 case Intrinsic::riscv_vlseg5ff:
1623 case Intrinsic::riscv_vlseg6ff:
1624 case Intrinsic::riscv_vlseg7ff:
1625 case Intrinsic::riscv_vlseg8ff:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1627 /*IsStore*/ false,
1628 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1629 case Intrinsic::riscv_vlseg2_mask:
1630 case Intrinsic::riscv_vlseg3_mask:
1631 case Intrinsic::riscv_vlseg4_mask:
1632 case Intrinsic::riscv_vlseg5_mask:
1633 case Intrinsic::riscv_vlseg6_mask:
1634 case Intrinsic::riscv_vlseg7_mask:
1635 case Intrinsic::riscv_vlseg8_mask:
1636 case Intrinsic::riscv_vlseg2ff_mask:
1637 case Intrinsic::riscv_vlseg3ff_mask:
1638 case Intrinsic::riscv_vlseg4ff_mask:
1639 case Intrinsic::riscv_vlseg5ff_mask:
1640 case Intrinsic::riscv_vlseg6ff_mask:
1641 case Intrinsic::riscv_vlseg7ff_mask:
1642 case Intrinsic::riscv_vlseg8ff_mask:
1643 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1644 /*IsStore*/ false,
1645 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1646 case Intrinsic::riscv_vlsseg2:
1647 case Intrinsic::riscv_vlsseg3:
1648 case Intrinsic::riscv_vlsseg4:
1649 case Intrinsic::riscv_vlsseg5:
1650 case Intrinsic::riscv_vlsseg6:
1651 case Intrinsic::riscv_vlsseg7:
1652 case Intrinsic::riscv_vlsseg8:
1653 case Intrinsic::riscv_vloxseg2:
1654 case Intrinsic::riscv_vloxseg3:
1655 case Intrinsic::riscv_vloxseg4:
1656 case Intrinsic::riscv_vloxseg5:
1657 case Intrinsic::riscv_vloxseg6:
1658 case Intrinsic::riscv_vloxseg7:
1659 case Intrinsic::riscv_vloxseg8:
1660 case Intrinsic::riscv_vluxseg2:
1661 case Intrinsic::riscv_vluxseg3:
1662 case Intrinsic::riscv_vluxseg4:
1663 case Intrinsic::riscv_vluxseg5:
1664 case Intrinsic::riscv_vluxseg6:
1665 case Intrinsic::riscv_vluxseg7:
1666 case Intrinsic::riscv_vluxseg8:
1667 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1668 /*IsStore*/ false,
1669 /*IsUnitStrided*/ false);
1670 case Intrinsic::riscv_vlsseg2_mask:
1671 case Intrinsic::riscv_vlsseg3_mask:
1672 case Intrinsic::riscv_vlsseg4_mask:
1673 case Intrinsic::riscv_vlsseg5_mask:
1674 case Intrinsic::riscv_vlsseg6_mask:
1675 case Intrinsic::riscv_vlsseg7_mask:
1676 case Intrinsic::riscv_vlsseg8_mask:
1677 case Intrinsic::riscv_vloxseg2_mask:
1678 case Intrinsic::riscv_vloxseg3_mask:
1679 case Intrinsic::riscv_vloxseg4_mask:
1680 case Intrinsic::riscv_vloxseg5_mask:
1681 case Intrinsic::riscv_vloxseg6_mask:
1682 case Intrinsic::riscv_vloxseg7_mask:
1683 case Intrinsic::riscv_vloxseg8_mask:
1684 case Intrinsic::riscv_vluxseg2_mask:
1685 case Intrinsic::riscv_vluxseg3_mask:
1686 case Intrinsic::riscv_vluxseg4_mask:
1687 case Intrinsic::riscv_vluxseg5_mask:
1688 case Intrinsic::riscv_vluxseg6_mask:
1689 case Intrinsic::riscv_vluxseg7_mask:
1690 case Intrinsic::riscv_vluxseg8_mask:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1692 /*IsStore*/ false,
1693 /*IsUnitStrided*/ false);
1694 case Intrinsic::riscv_vsseg2:
1695 case Intrinsic::riscv_vsseg3:
1696 case Intrinsic::riscv_vsseg4:
1697 case Intrinsic::riscv_vsseg5:
1698 case Intrinsic::riscv_vsseg6:
1699 case Intrinsic::riscv_vsseg7:
1700 case Intrinsic::riscv_vsseg8:
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false);
1704 case Intrinsic::riscv_vsseg2_mask:
1705 case Intrinsic::riscv_vsseg3_mask:
1706 case Intrinsic::riscv_vsseg4_mask:
1707 case Intrinsic::riscv_vsseg5_mask:
1708 case Intrinsic::riscv_vsseg6_mask:
1709 case Intrinsic::riscv_vsseg7_mask:
1710 case Intrinsic::riscv_vsseg8_mask:
1711 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1712 /*IsStore*/ true,
1713 /*IsUnitStrided*/ false);
1714 case Intrinsic::riscv_vssseg2:
1715 case Intrinsic::riscv_vssseg3:
1716 case Intrinsic::riscv_vssseg4:
1717 case Intrinsic::riscv_vssseg5:
1718 case Intrinsic::riscv_vssseg6:
1719 case Intrinsic::riscv_vssseg7:
1720 case Intrinsic::riscv_vssseg8:
1721 case Intrinsic::riscv_vsoxseg2:
1722 case Intrinsic::riscv_vsoxseg3:
1723 case Intrinsic::riscv_vsoxseg4:
1724 case Intrinsic::riscv_vsoxseg5:
1725 case Intrinsic::riscv_vsoxseg6:
1726 case Intrinsic::riscv_vsoxseg7:
1727 case Intrinsic::riscv_vsoxseg8:
1728 case Intrinsic::riscv_vsuxseg2:
1729 case Intrinsic::riscv_vsuxseg3:
1730 case Intrinsic::riscv_vsuxseg4:
1731 case Intrinsic::riscv_vsuxseg5:
1732 case Intrinsic::riscv_vsuxseg6:
1733 case Intrinsic::riscv_vsuxseg7:
1734 case Intrinsic::riscv_vsuxseg8:
1735 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1736 /*IsStore*/ true,
1737 /*IsUnitStrided*/ false);
1738 case Intrinsic::riscv_vssseg2_mask:
1739 case Intrinsic::riscv_vssseg3_mask:
1740 case Intrinsic::riscv_vssseg4_mask:
1741 case Intrinsic::riscv_vssseg5_mask:
1742 case Intrinsic::riscv_vssseg6_mask:
1743 case Intrinsic::riscv_vssseg7_mask:
1744 case Intrinsic::riscv_vssseg8_mask:
1745 case Intrinsic::riscv_vsoxseg2_mask:
1746 case Intrinsic::riscv_vsoxseg3_mask:
1747 case Intrinsic::riscv_vsoxseg4_mask:
1748 case Intrinsic::riscv_vsoxseg5_mask:
1749 case Intrinsic::riscv_vsoxseg6_mask:
1750 case Intrinsic::riscv_vsoxseg7_mask:
1751 case Intrinsic::riscv_vsoxseg8_mask:
1752 case Intrinsic::riscv_vsuxseg2_mask:
1753 case Intrinsic::riscv_vsuxseg3_mask:
1754 case Intrinsic::riscv_vsuxseg4_mask:
1755 case Intrinsic::riscv_vsuxseg5_mask:
1756 case Intrinsic::riscv_vsuxseg6_mask:
1757 case Intrinsic::riscv_vsuxseg7_mask:
1758 case Intrinsic::riscv_vsuxseg8_mask:
1759 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1760 /*IsStore*/ true,
1761 /*IsUnitStrided*/ false);
1762 }
1763}
1764
1766 const AddrMode &AM, Type *Ty,
1767 unsigned AS,
1768 Instruction *I) const {
1769 // No global is ever allowed as a base.
1770 if (AM.BaseGV)
1771 return false;
1772
1773 // RVV instructions only support register addressing.
1774 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1775 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1776
1777 // Require a 12-bit signed offset.
1778 if (!isInt<12>(AM.BaseOffs))
1779 return false;
1780
1781 switch (AM.Scale) {
1782 case 0: // "r+i" or just "i", depending on HasBaseReg.
1783 break;
1784 case 1:
1785 if (!AM.HasBaseReg) // allow "r+i".
1786 break;
1787 return false; // disallow "r+r" or "r+r+i".
1788 default:
1789 return false;
1790 }
1791
1792 return true;
1793}
1794
1796 return isInt<12>(Imm);
1797}
1798
1800 return isInt<12>(Imm);
1801}
1802
1803// On RV32, 64-bit integers are split into their high and low parts and held
1804// in two different registers, so the trunc is free since the low register can
1805// just be used.
1806// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1807// isTruncateFree?
1809 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1810 return false;
1811 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1812 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1813 return (SrcBits == 64 && DestBits == 32);
1814}
1815
1817 // We consider i64->i32 free on RV64 since we have good selection of W
1818 // instructions that make promoting operations back to i64 free in many cases.
1819 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1820 !DstVT.isInteger())
1821 return false;
1822 unsigned SrcBits = SrcVT.getSizeInBits();
1823 unsigned DestBits = DstVT.getSizeInBits();
1824 return (SrcBits == 64 && DestBits == 32);
1825}
1826
1828 // Zexts are free if they can be combined with a load.
1829 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1830 // poorly with type legalization of compares preferring sext.
1831 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1832 EVT MemVT = LD->getMemoryVT();
1833 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1834 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1835 LD->getExtensionType() == ISD::ZEXTLOAD))
1836 return true;
1837 }
1838
1839 return TargetLowering::isZExtFree(Val, VT2);
1840}
1841
1843 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1844}
1845
1847 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1848}
1849
1851 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1852}
1853
1855 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1856 Subtarget.hasVendorXCVbitmanip();
1857}
1858
1860 const Instruction &AndI) const {
1861 // We expect to be able to match a bit extraction instruction if the Zbs
1862 // extension is supported and the mask is a power of two. However, we
1863 // conservatively return false if the mask would fit in an ANDI instruction,
1864 // on the basis that it's possible the sinking+duplication of the AND in
1865 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1866 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1867 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1868 return false;
1869 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1870 if (!Mask)
1871 return false;
1872 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1873}
1874
1876 EVT VT = Y.getValueType();
1877
1878 // FIXME: Support vectors once we have tests.
1879 if (VT.isVector())
1880 return false;
1881
1882 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1883 !isa<ConstantSDNode>(Y);
1884}
1885
1887 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1888 if (Subtarget.hasStdExtZbs())
1889 return X.getValueType().isScalarInteger();
1890 auto *C = dyn_cast<ConstantSDNode>(Y);
1891 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1892 if (Subtarget.hasVendorXTHeadBs())
1893 return C != nullptr;
1894 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1895 return C && C->getAPIntValue().ule(10);
1896}
1897
1899 EVT VT) const {
1900 // Only enable for rvv.
1901 if (!VT.isVector() || !Subtarget.hasVInstructions())
1902 return false;
1903
1904 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1905 return false;
1906
1907 return true;
1908}
1909
1911 Type *Ty) const {
1912 assert(Ty->isIntegerTy());
1913
1914 unsigned BitSize = Ty->getIntegerBitWidth();
1915 if (BitSize > Subtarget.getXLen())
1916 return false;
1917
1918 // Fast path, assume 32-bit immediates are cheap.
1919 int64_t Val = Imm.getSExtValue();
1920 if (isInt<32>(Val))
1921 return true;
1922
1923 // A constant pool entry may be more aligned thant he load we're trying to
1924 // replace. If we don't support unaligned scalar mem, prefer the constant
1925 // pool.
1926 // TODO: Can the caller pass down the alignment?
1927 if (!Subtarget.enableUnalignedScalarMem())
1928 return true;
1929
1930 // Prefer to keep the load if it would require many instructions.
1931 // This uses the same threshold we use for constant pools but doesn't
1932 // check useConstantPoolForLargeInts.
1933 // TODO: Should we keep the load only when we're definitely going to emit a
1934 // constant pool?
1935
1937 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1938}
1939
1943 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1944 SelectionDAG &DAG) const {
1945 // One interesting pattern that we'd want to form is 'bit extract':
1946 // ((1 >> Y) & 1) ==/!= 0
1947 // But we also need to be careful not to try to reverse that fold.
1948
1949 // Is this '((1 >> Y) & 1)'?
1950 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1951 return false; // Keep the 'bit extract' pattern.
1952
1953 // Will this be '((1 >> Y) & 1)' after the transform?
1954 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1955 return true; // Do form the 'bit extract' pattern.
1956
1957 // If 'X' is a constant, and we transform, then we will immediately
1958 // try to undo the fold, thus causing endless combine loop.
1959 // So only do the transform if X is not a constant. This matches the default
1960 // implementation of this function.
1961 return !XC;
1962}
1963
1964bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1965 switch (Opcode) {
1966 case Instruction::Add:
1967 case Instruction::Sub:
1968 case Instruction::Mul:
1969 case Instruction::And:
1970 case Instruction::Or:
1971 case Instruction::Xor:
1972 case Instruction::FAdd:
1973 case Instruction::FSub:
1974 case Instruction::FMul:
1975 case Instruction::FDiv:
1976 case Instruction::ICmp:
1977 case Instruction::FCmp:
1978 return true;
1979 case Instruction::Shl:
1980 case Instruction::LShr:
1981 case Instruction::AShr:
1982 case Instruction::UDiv:
1983 case Instruction::SDiv:
1984 case Instruction::URem:
1985 case Instruction::SRem:
1986 return Operand == 1;
1987 default:
1988 return false;
1989 }
1990}
1991
1992
1994 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1995 return false;
1996
1997 if (canSplatOperand(I->getOpcode(), Operand))
1998 return true;
1999
2000 auto *II = dyn_cast<IntrinsicInst>(I);
2001 if (!II)
2002 return false;
2003
2004 switch (II->getIntrinsicID()) {
2005 case Intrinsic::fma:
2006 case Intrinsic::vp_fma:
2007 return Operand == 0 || Operand == 1;
2008 case Intrinsic::vp_shl:
2009 case Intrinsic::vp_lshr:
2010 case Intrinsic::vp_ashr:
2011 case Intrinsic::vp_udiv:
2012 case Intrinsic::vp_sdiv:
2013 case Intrinsic::vp_urem:
2014 case Intrinsic::vp_srem:
2015 case Intrinsic::ssub_sat:
2016 case Intrinsic::vp_ssub_sat:
2017 case Intrinsic::usub_sat:
2018 case Intrinsic::vp_usub_sat:
2019 return Operand == 1;
2020 // These intrinsics are commutative.
2021 case Intrinsic::vp_add:
2022 case Intrinsic::vp_mul:
2023 case Intrinsic::vp_and:
2024 case Intrinsic::vp_or:
2025 case Intrinsic::vp_xor:
2026 case Intrinsic::vp_fadd:
2027 case Intrinsic::vp_fmul:
2028 case Intrinsic::vp_icmp:
2029 case Intrinsic::vp_fcmp:
2030 case Intrinsic::smin:
2031 case Intrinsic::vp_smin:
2032 case Intrinsic::umin:
2033 case Intrinsic::vp_umin:
2034 case Intrinsic::smax:
2035 case Intrinsic::vp_smax:
2036 case Intrinsic::umax:
2037 case Intrinsic::vp_umax:
2038 case Intrinsic::sadd_sat:
2039 case Intrinsic::vp_sadd_sat:
2040 case Intrinsic::uadd_sat:
2041 case Intrinsic::vp_uadd_sat:
2042 // These intrinsics have 'vr' versions.
2043 case Intrinsic::vp_sub:
2044 case Intrinsic::vp_fsub:
2045 case Intrinsic::vp_fdiv:
2046 return Operand == 0 || Operand == 1;
2047 default:
2048 return false;
2049 }
2050}
2051
2052/// Check if sinking \p I's operands to I's basic block is profitable, because
2053/// the operands can be folded into a target instruction, e.g.
2054/// splats of scalars can fold into vector instructions.
2056 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2057 using namespace llvm::PatternMatch;
2058
2059 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2060 return false;
2061
2062 // Don't sink splat operands if the target prefers it. Some targets requires
2063 // S2V transfer buffers and we can run out of them copying the same value
2064 // repeatedly.
2065 // FIXME: It could still be worth doing if it would improve vector register
2066 // pressure and prevent a vector spill.
2067 if (!Subtarget.sinkSplatOperands())
2068 return false;
2069
2070 for (auto OpIdx : enumerate(I->operands())) {
2071 if (!canSplatOperand(I, OpIdx.index()))
2072 continue;
2073
2074 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2075 // Make sure we are not already sinking this operand
2076 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2077 continue;
2078
2079 // We are looking for a splat that can be sunk.
2081 m_Undef(), m_ZeroMask())))
2082 continue;
2083
2084 // Don't sink i1 splats.
2085 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2086 continue;
2087
2088 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2089 // and vector registers
2090 for (Use &U : Op->uses()) {
2091 Instruction *Insn = cast<Instruction>(U.getUser());
2092 if (!canSplatOperand(Insn, U.getOperandNo()))
2093 return false;
2094 }
2095
2096 Ops.push_back(&Op->getOperandUse(0));
2097 Ops.push_back(&OpIdx.value());
2098 }
2099 return true;
2100}
2101
2103 unsigned Opc = VecOp.getOpcode();
2104
2105 // Assume target opcodes can't be scalarized.
2106 // TODO - do we have any exceptions?
2107 if (Opc >= ISD::BUILTIN_OP_END)
2108 return false;
2109
2110 // If the vector op is not supported, try to convert to scalar.
2111 EVT VecVT = VecOp.getValueType();
2112 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2113 return true;
2114
2115 // If the vector op is supported, but the scalar op is not, the transform may
2116 // not be worthwhile.
2117 // Permit a vector binary operation can be converted to scalar binary
2118 // operation which is custom lowered with illegal type.
2119 EVT ScalarVT = VecVT.getScalarType();
2120 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2121 isOperationCustom(Opc, ScalarVT);
2122}
2123
2125 const GlobalAddressSDNode *GA) const {
2126 // In order to maximise the opportunity for common subexpression elimination,
2127 // keep a separate ADD node for the global address offset instead of folding
2128 // it in the global address node. Later peephole optimisations may choose to
2129 // fold it back in when profitable.
2130 return false;
2131}
2132
2133// Return one of the followings:
2134// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2135// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2136// positive counterpart, which will be materialized from the first returned
2137// element. The second returned element indicated that there should be a FNEG
2138// followed.
2139// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2140std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2141 EVT VT) const {
2142 if (!Subtarget.hasStdExtZfa())
2143 return std::make_pair(-1, false);
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return std::make_pair(-1, false);
2157
2159 if (Index < 0 && Imm.isNegative())
2160 // Try the combination of its positive counterpart + FNEG.
2161 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2162 else
2163 return std::make_pair(Index, false);
2164}
2165
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence.
2197 const int Cost =
2198 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2199 Subtarget);
2200 return Cost <= FPImmCost;
2201}
2202
2203// TODO: This is very conservative.
2205 unsigned Index) const {
2207 return false;
2208
2209 // Only support extracting a fixed from a fixed vector for now.
2210 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2211 return false;
2212
2213 EVT EltVT = ResVT.getVectorElementType();
2214 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2215
2216 // The smallest type we can slide is i8.
2217 // TODO: We can extract index 0 from a mask vector without a slide.
2218 if (EltVT == MVT::i1)
2219 return false;
2220
2221 unsigned ResElts = ResVT.getVectorNumElements();
2222 unsigned SrcElts = SrcVT.getVectorNumElements();
2223
2224 unsigned MinVLen = Subtarget.getRealMinVLen();
2225 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2226
2227 // If we're extracting only data from the first VLEN bits of the source
2228 // then we can always do this with an m1 vslidedown.vx. Restricting the
2229 // Index ensures we can use a vslidedown.vi.
2230 // TODO: We can generalize this when the exact VLEN is known.
2231 if (Index + ResElts <= MinVLMAX && Index < 31)
2232 return true;
2233
2234 // Convervatively only handle extracting half of a vector.
2235 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2236 // a cheap extract. However, this case is important in practice for
2237 // shuffled extracts of longer vectors. How resolve?
2238 if ((ResElts * 2) != SrcElts)
2239 return false;
2240
2241 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2242 // cheap.
2243 if (Index >= 32)
2244 return false;
2245
2246 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2247 // the upper half of a vector until we have more test coverage.
2248 return Index == 0 || Index == ResElts;
2249}
2250
2253 EVT VT) const {
2254 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2255 // We might still end up using a GPR but that will be decided based on ABI.
2256 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2257 !Subtarget.hasStdExtZfhminOrZhinxmin())
2258 return MVT::f32;
2259
2261
2262 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2263 return MVT::i64;
2264
2265 return PartVT;
2266}
2267
2270 EVT VT) const {
2271 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2272 // We might still end up using a GPR but that will be decided based on ABI.
2273 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2274 !Subtarget.hasStdExtZfhminOrZhinxmin())
2275 return 1;
2276
2278}
2279
2281 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2282 unsigned &NumIntermediates, MVT &RegisterVT) const {
2284 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2285
2286 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2287 IntermediateVT = MVT::i64;
2288
2289 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2290 RegisterVT = MVT::i64;
2291
2292 return NumRegs;
2293}
2294
2295// Changes the condition code and swaps operands if necessary, so the SetCC
2296// operation matches one of the comparisons supported directly by branches
2297// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2298// with 1/-1.
2299static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2300 ISD::CondCode &CC, SelectionDAG &DAG) {
2301 // If this is a single bit test that can't be handled by ANDI, shift the
2302 // bit to be tested to the MSB and perform a signed compare with 0.
2303 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2304 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2305 isa<ConstantSDNode>(LHS.getOperand(1))) {
2306 uint64_t Mask = LHS.getConstantOperandVal(1);
2307 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2308 unsigned ShAmt = 0;
2309 if (isPowerOf2_64(Mask)) {
2311 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2312 } else {
2313 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2314 }
2315
2316 LHS = LHS.getOperand(0);
2317 if (ShAmt != 0)
2318 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2319 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2320 return;
2321 }
2322 }
2323
2324 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2325 int64_t C = RHSC->getSExtValue();
2326 switch (CC) {
2327 default: break;
2328 case ISD::SETGT:
2329 // Convert X > -1 to X >= 0.
2330 if (C == -1) {
2331 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2332 CC = ISD::SETGE;
2333 return;
2334 }
2335 break;
2336 case ISD::SETLT:
2337 // Convert X < 1 to 0 >= X.
2338 if (C == 1) {
2339 RHS = LHS;
2340 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2341 CC = ISD::SETGE;
2342 return;
2343 }
2344 break;
2345 }
2346 }
2347
2348 switch (CC) {
2349 default:
2350 break;
2351 case ISD::SETGT:
2352 case ISD::SETLE:
2353 case ISD::SETUGT:
2354 case ISD::SETULE:
2356 std::swap(LHS, RHS);
2357 break;
2358 }
2359}
2360
2362 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2363 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2364 if (VT.getVectorElementType() == MVT::i1)
2365 KnownSize *= 8;
2366
2367 switch (KnownSize) {
2368 default:
2369 llvm_unreachable("Invalid LMUL.");
2370 case 8:
2372 case 16:
2374 case 32:
2376 case 64:
2378 case 128:
2380 case 256:
2382 case 512:
2384 }
2385}
2386
2388 switch (LMul) {
2389 default:
2390 llvm_unreachable("Invalid LMUL.");
2395 return RISCV::VRRegClassID;
2397 return RISCV::VRM2RegClassID;
2399 return RISCV::VRM4RegClassID;
2401 return RISCV::VRM8RegClassID;
2402 }
2403}
2404
2406 RISCVII::VLMUL LMUL = getLMUL(VT);
2407 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2408 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2409 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2410 LMUL == RISCVII::VLMUL::LMUL_1) {
2411 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2412 "Unexpected subreg numbering");
2413 return RISCV::sub_vrm1_0 + Index;
2414 }
2415 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2416 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2417 "Unexpected subreg numbering");
2418 return RISCV::sub_vrm2_0 + Index;
2419 }
2420 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2421 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2422 "Unexpected subreg numbering");
2423 return RISCV::sub_vrm4_0 + Index;
2424 }
2425 llvm_unreachable("Invalid vector type.");
2426}
2427
2429 if (VT.getVectorElementType() == MVT::i1)
2430 return RISCV::VRRegClassID;
2431 return getRegClassIDForLMUL(getLMUL(VT));
2432}
2433
2434// Attempt to decompose a subvector insert/extract between VecVT and
2435// SubVecVT via subregister indices. Returns the subregister index that
2436// can perform the subvector insert/extract with the given element index, as
2437// well as the index corresponding to any leftover subvectors that must be
2438// further inserted/extracted within the register class for SubVecVT.
2439std::pair<unsigned, unsigned>
2441 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2442 const RISCVRegisterInfo *TRI) {
2443 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2444 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2445 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2446 "Register classes not ordered");
2447 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2448 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2449 // Try to compose a subregister index that takes us from the incoming
2450 // LMUL>1 register class down to the outgoing one. At each step we half
2451 // the LMUL:
2452 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2453 // Note that this is not guaranteed to find a subregister index, such as
2454 // when we are extracting from one VR type to another.
2455 unsigned SubRegIdx = RISCV::NoSubRegister;
2456 for (const unsigned RCID :
2457 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2458 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2459 VecVT = VecVT.getHalfNumVectorElementsVT();
2460 bool IsHi =
2461 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2462 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2463 getSubregIndexByMVT(VecVT, IsHi));
2464 if (IsHi)
2465 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2466 }
2467 return {SubRegIdx, InsertExtractIdx};
2468}
2469
2470// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2471// stores for those types.
2472bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2473 return !Subtarget.useRVVForFixedLengthVectors() ||
2474 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2475}
2476
2478 if (!ScalarTy.isSimple())
2479 return false;
2480 switch (ScalarTy.getSimpleVT().SimpleTy) {
2481 case MVT::iPTR:
2482 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2483 case MVT::i8:
2484 case MVT::i16:
2485 case MVT::i32:
2486 return true;
2487 case MVT::i64:
2488 return Subtarget.hasVInstructionsI64();
2489 case MVT::f16:
2490 return Subtarget.hasVInstructionsF16();
2491 case MVT::f32:
2492 return Subtarget.hasVInstructionsF32();
2493 case MVT::f64:
2494 return Subtarget.hasVInstructionsF64();
2495 default:
2496 return false;
2497 }
2498}
2499
2500
2501unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2502 return NumRepeatedDivisors;
2503}
2504
2506 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2507 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2508 "Unexpected opcode");
2509 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2510 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2512 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2513 if (!II)
2514 return SDValue();
2515 return Op.getOperand(II->VLOperand + 1 + HasChain);
2516}
2517
2519 const RISCVSubtarget &Subtarget) {
2520 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2521 if (!Subtarget.useRVVForFixedLengthVectors())
2522 return false;
2523
2524 // We only support a set of vector types with a consistent maximum fixed size
2525 // across all supported vector element types to avoid legalization issues.
2526 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2527 // fixed-length vector type we support is 1024 bytes.
2528 if (VT.getFixedSizeInBits() > 1024 * 8)
2529 return false;
2530
2531 unsigned MinVLen = Subtarget.getRealMinVLen();
2532
2533 MVT EltVT = VT.getVectorElementType();
2534
2535 // Don't use RVV for vectors we cannot scalarize if required.
2536 switch (EltVT.SimpleTy) {
2537 // i1 is supported but has different rules.
2538 default:
2539 return false;
2540 case MVT::i1:
2541 // Masks can only use a single register.
2542 if (VT.getVectorNumElements() > MinVLen)
2543 return false;
2544 MinVLen /= 8;
2545 break;
2546 case MVT::i8:
2547 case MVT::i16:
2548 case MVT::i32:
2549 break;
2550 case MVT::i64:
2551 if (!Subtarget.hasVInstructionsI64())
2552 return false;
2553 break;
2554 case MVT::f16:
2555 if (!Subtarget.hasVInstructionsF16Minimal())
2556 return false;
2557 break;
2558 case MVT::f32:
2559 if (!Subtarget.hasVInstructionsF32())
2560 return false;
2561 break;
2562 case MVT::f64:
2563 if (!Subtarget.hasVInstructionsF64())
2564 return false;
2565 break;
2566 }
2567
2568 // Reject elements larger than ELEN.
2569 if (EltVT.getSizeInBits() > Subtarget.getELen())
2570 return false;
2571
2572 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2573 // Don't use RVV for types that don't fit.
2574 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2575 return false;
2576
2577 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2578 // the base fixed length RVV support in place.
2579 if (!VT.isPow2VectorType())
2580 return false;
2581
2582 return true;
2583}
2584
2585bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2586 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2587}
2588
2589// Return the largest legal scalable vector type that matches VT's element type.
2591 const RISCVSubtarget &Subtarget) {
2592 // This may be called before legal types are setup.
2593 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2594 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2595 "Expected legal fixed length vector!");
2596
2597 unsigned MinVLen = Subtarget.getRealMinVLen();
2598 unsigned MaxELen = Subtarget.getELen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601 switch (EltVT.SimpleTy) {
2602 default:
2603 llvm_unreachable("unexpected element type for RVV container");
2604 case MVT::i1:
2605 case MVT::i8:
2606 case MVT::i16:
2607 case MVT::i32:
2608 case MVT::i64:
2609 case MVT::f16:
2610 case MVT::f32:
2611 case MVT::f64: {
2612 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2613 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2614 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2615 unsigned NumElts =
2617 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2618 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2619 return MVT::getScalableVectorVT(EltVT, NumElts);
2620 }
2621 }
2622}
2623
2625 const RISCVSubtarget &Subtarget) {
2627 Subtarget);
2628}
2629
2631 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2632}
2633
2634// Grow V to consume an entire RVV register.
2636 const RISCVSubtarget &Subtarget) {
2637 assert(VT.isScalableVector() &&
2638 "Expected to convert into a scalable vector!");
2639 assert(V.getValueType().isFixedLengthVector() &&
2640 "Expected a fixed length vector operand!");
2641 SDLoc DL(V);
2642 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2643 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2644}
2645
2646// Shrink V so it's just big enough to maintain a VT's worth of data.
2648 const RISCVSubtarget &Subtarget) {
2650 "Expected to convert into a fixed length vector!");
2651 assert(V.getValueType().isScalableVector() &&
2652 "Expected a scalable vector operand!");
2653 SDLoc DL(V);
2654 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2655 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2656}
2657
2658/// Return the type of the mask type suitable for masking the provided
2659/// vector type. This is simply an i1 element type vector of the same
2660/// (possibly scalable) length.
2661static MVT getMaskTypeFor(MVT VecVT) {
2662 assert(VecVT.isVector());
2664 return MVT::getVectorVT(MVT::i1, EC);
2665}
2666
2667/// Creates an all ones mask suitable for masking a vector of type VecTy with
2668/// vector length VL. .
2669static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2670 SelectionDAG &DAG) {
2671 MVT MaskVT = getMaskTypeFor(VecVT);
2672 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2673}
2674
2675static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2676 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2677 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2678 // canonicalize the representation. InsertVSETVLI will pick the immediate
2679 // encoding later if profitable.
2680 const auto [MinVLMAX, MaxVLMAX] =
2681 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2682 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2683 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2684
2685 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2686}
2687
2688static std::pair<SDValue, SDValue>
2690 const RISCVSubtarget &Subtarget) {
2691 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2692 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2693 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2694 return {Mask, VL};
2695}
2696
2697static std::pair<SDValue, SDValue>
2698getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2699 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2700 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2701 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2702 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2703 return {Mask, VL};
2704}
2705
2706// Gets the two common "VL" operands: an all-ones mask and the vector length.
2707// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2708// the vector type that the fixed-length vector is contained in. Otherwise if
2709// VecVT is scalable, then ContainerVT should be the same as VecVT.
2710static std::pair<SDValue, SDValue>
2711getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2712 const RISCVSubtarget &Subtarget) {
2713 if (VecVT.isFixedLengthVector())
2714 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2715 Subtarget);
2716 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2717 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2718}
2719
2721 SelectionDAG &DAG) const {
2722 assert(VecVT.isScalableVector() && "Expected scalable vector");
2723 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2724 VecVT.getVectorElementCount());
2725}
2726
2727std::pair<unsigned, unsigned>
2729 const RISCVSubtarget &Subtarget) {
2730 assert(VecVT.isScalableVector() && "Expected scalable vector");
2731
2732 unsigned EltSize = VecVT.getScalarSizeInBits();
2733 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2734
2735 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2736 unsigned MaxVLMAX =
2737 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2738
2739 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2740 unsigned MinVLMAX =
2741 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2742
2743 return std::make_pair(MinVLMAX, MaxVLMAX);
2744}
2745
2746// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2747// of either is (currently) supported. This can get us into an infinite loop
2748// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2749// as a ..., etc.
2750// Until either (or both) of these can reliably lower any node, reporting that
2751// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2752// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2753// which is not desirable.
2755 EVT VT, unsigned DefinedValues) const {
2756 return false;
2757}
2758
2760 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2761 // implementation-defined.
2762 if (!VT.isVector())
2764 unsigned DLenFactor = Subtarget.getDLenFactor();
2765 unsigned Cost;
2766 if (VT.isScalableVector()) {
2767 unsigned LMul;
2768 bool Fractional;
2769 std::tie(LMul, Fractional) =
2771 if (Fractional)
2772 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2773 else
2774 Cost = (LMul * DLenFactor);
2775 } else {
2776 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2777 }
2778 return Cost;
2779}
2780
2781
2782/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2783/// is generally quadratic in the number of vreg implied by LMUL. Note that
2784/// operand (index and possibly mask) are handled separately.
2786 return getLMULCost(VT) * getLMULCost(VT);
2787}
2788
2789/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2790/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2791/// or may track the vrgather.vv cost. It is implementation-dependent.
2793 return getLMULCost(VT);
2794}
2795
2796/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2797/// for the type VT. (This does not cover the vslide1up or vslide1down
2798/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799/// or may track the vrgather.vv cost. It is implementation-dependent.
2801 return getLMULCost(VT);
2802}
2803
2804/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2805/// for the type VT. (This does not cover the vslide1up or vslide1down
2806/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2807/// or may track the vrgather.vv cost. It is implementation-dependent.
2809 return getLMULCost(VT);
2810}
2811
2813 const RISCVSubtarget &Subtarget) {
2814 // RISC-V FP-to-int conversions saturate to the destination register size, but
2815 // don't produce 0 for nan. We can use a conversion instruction and fix the
2816 // nan case with a compare and a select.
2817 SDValue Src = Op.getOperand(0);
2818
2819 MVT DstVT = Op.getSimpleValueType();
2820 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2821
2822 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2823
2824 if (!DstVT.isVector()) {
2825 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2826 // the result.
2827 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2828 Src.getValueType() == MVT::bf16) {
2829 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2830 }
2831
2832 unsigned Opc;
2833 if (SatVT == DstVT)
2834 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2835 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2837 else
2838 return SDValue();
2839 // FIXME: Support other SatVTs by clamping before or after the conversion.
2840
2841 SDLoc DL(Op);
2842 SDValue FpToInt = DAG.getNode(
2843 Opc, DL, DstVT, Src,
2845
2846 if (Opc == RISCVISD::FCVT_WU_RV64)
2847 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2848
2849 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2850 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2852 }
2853
2854 // Vectors.
2855
2856 MVT DstEltVT = DstVT.getVectorElementType();
2857 MVT SrcVT = Src.getSimpleValueType();
2858 MVT SrcEltVT = SrcVT.getVectorElementType();
2859 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2860 unsigned DstEltSize = DstEltVT.getSizeInBits();
2861
2862 // Only handle saturating to the destination type.
2863 if (SatVT != DstEltVT)
2864 return SDValue();
2865
2866 // FIXME: Don't support narrowing by more than 1 steps for now.
2867 if (SrcEltSize > (2 * DstEltSize))
2868 return SDValue();
2869
2870 MVT DstContainerVT = DstVT;
2871 MVT SrcContainerVT = SrcVT;
2872 if (DstVT.isFixedLengthVector()) {
2873 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2874 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2875 assert(DstContainerVT.getVectorElementCount() ==
2876 SrcContainerVT.getVectorElementCount() &&
2877 "Expected same element count");
2878 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2879 }
2880
2881 SDLoc DL(Op);
2882
2883 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2884
2885 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2886 {Src, Src, DAG.getCondCode(ISD::SETNE),
2887 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2888
2889 // Need to widen by more than 1 step, promote the FP type, then do a widening
2890 // convert.
2891 if (DstEltSize > (2 * SrcEltSize)) {
2892 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2893 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2894 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2895 }
2896
2897 unsigned RVVOpc =
2899 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2900
2901 SDValue SplatZero = DAG.getNode(
2902 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2903 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2904 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2905 Res, DAG.getUNDEF(DstContainerVT), VL);
2906
2907 if (DstVT.isFixedLengthVector())
2908 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2909
2910 return Res;
2911}
2912
2914 switch (Opc) {
2915 case ISD::FROUNDEVEN:
2917 case ISD::VP_FROUNDEVEN:
2918 return RISCVFPRndMode::RNE;
2919 case ISD::FTRUNC:
2920 case ISD::STRICT_FTRUNC:
2921 case ISD::VP_FROUNDTOZERO:
2922 return RISCVFPRndMode::RTZ;
2923 case ISD::FFLOOR:
2924 case ISD::STRICT_FFLOOR:
2925 case ISD::VP_FFLOOR:
2926 return RISCVFPRndMode::RDN;
2927 case ISD::FCEIL:
2928 case ISD::STRICT_FCEIL:
2929 case ISD::VP_FCEIL:
2930 return RISCVFPRndMode::RUP;
2931 case ISD::FROUND:
2932 case ISD::STRICT_FROUND:
2933 case ISD::VP_FROUND:
2934 return RISCVFPRndMode::RMM;
2935 case ISD::FRINT:
2936 return RISCVFPRndMode::DYN;
2937 }
2938
2940}
2941
2942// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2943// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2944// the integer domain and back. Taking care to avoid converting values that are
2945// nan or already correct.
2946static SDValue
2948 const RISCVSubtarget &Subtarget) {
2949 MVT VT = Op.getSimpleValueType();
2950 assert(VT.isVector() && "Unexpected type");
2951
2952 SDLoc DL(Op);
2953
2954 SDValue Src = Op.getOperand(0);
2955
2956 MVT ContainerVT = VT;
2957 if (VT.isFixedLengthVector()) {
2958 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2959 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2960 }
2961
2962 SDValue Mask, VL;
2963 if (Op->isVPOpcode()) {
2964 Mask = Op.getOperand(1);
2965 if (VT.isFixedLengthVector())
2966 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2967 Subtarget);
2968 VL = Op.getOperand(2);
2969 } else {
2970 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2971 }
2972
2973 // Freeze the source since we are increasing the number of uses.
2974 Src = DAG.getFreeze(Src);
2975
2976 // We do the conversion on the absolute value and fix the sign at the end.
2977 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2978
2979 // Determine the largest integer that can be represented exactly. This and
2980 // values larger than it don't have any fractional bits so don't need to
2981 // be converted.
2982 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2983 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2984 APFloat MaxVal = APFloat(FltSem);
2985 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2986 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2987 SDValue MaxValNode =
2988 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2989 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2990 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2991
2992 // If abs(Src) was larger than MaxVal or nan, keep it.
2993 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2994 Mask =
2995 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2996 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2997 Mask, Mask, VL});
2998
2999 // Truncate to integer and convert back to FP.
3000 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3001 MVT XLenVT = Subtarget.getXLenVT();
3002 SDValue Truncated;
3003
3004 switch (Op.getOpcode()) {
3005 default:
3006 llvm_unreachable("Unexpected opcode");
3007 case ISD::FCEIL:
3008 case ISD::VP_FCEIL:
3009 case ISD::FFLOOR:
3010 case ISD::VP_FFLOOR:
3011 case ISD::FROUND:
3012 case ISD::FROUNDEVEN:
3013 case ISD::VP_FROUND:
3014 case ISD::VP_FROUNDEVEN:
3015 case ISD::VP_FROUNDTOZERO: {
3018 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3019 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3020 break;
3021 }
3022 case ISD::FTRUNC:
3023 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3024 Mask, VL);
3025 break;
3026 case ISD::FRINT:
3027 case ISD::VP_FRINT:
3028 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3029 break;
3030 case ISD::FNEARBYINT:
3031 case ISD::VP_FNEARBYINT:
3032 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3033 Mask, VL);
3034 break;
3035 }
3036
3037 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3038 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3039 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3040 Mask, VL);
3041
3042 // Restore the original sign so that -0.0 is preserved.
3043 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3044 Src, Src, Mask, VL);
3045
3046 if (!VT.isFixedLengthVector())
3047 return Truncated;
3048
3049 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3050}
3051
3052// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3053// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3054// qNan and coverting the new source to integer and back to FP.
3055static SDValue
3057 const RISCVSubtarget &Subtarget) {
3058 SDLoc DL(Op);
3059 MVT VT = Op.getSimpleValueType();
3060 SDValue Chain = Op.getOperand(0);
3061 SDValue Src = Op.getOperand(1);
3062
3063 MVT ContainerVT = VT;
3064 if (VT.isFixedLengthVector()) {
3065 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3066 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3067 }
3068
3069 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3070
3071 // Freeze the source since we are increasing the number of uses.
3072 Src = DAG.getFreeze(Src);
3073
3074 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3075 MVT MaskVT = Mask.getSimpleValueType();
3077 DAG.getVTList(MaskVT, MVT::Other),
3078 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3079 DAG.getUNDEF(MaskVT), Mask, VL});
3080 Chain = Unorder.getValue(1);
3082 DAG.getVTList(ContainerVT, MVT::Other),
3083 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3084 Chain = Src.getValue(1);
3085
3086 // We do the conversion on the absolute value and fix the sign at the end.
3087 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3088
3089 // Determine the largest integer that can be represented exactly. This and
3090 // values larger than it don't have any fractional bits so don't need to
3091 // be converted.
3092 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3093 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3094 APFloat MaxVal = APFloat(FltSem);
3095 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3096 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3097 SDValue MaxValNode =
3098 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3099 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3100 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3101
3102 // If abs(Src) was larger than MaxVal or nan, keep it.
3103 Mask = DAG.getNode(
3104 RISCVISD::SETCC_VL, DL, MaskVT,
3105 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3106
3107 // Truncate to integer and convert back to FP.
3108 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3109 MVT XLenVT = Subtarget.getXLenVT();
3110 SDValue Truncated;
3111
3112 switch (Op.getOpcode()) {
3113 default:
3114 llvm_unreachable("Unexpected opcode");
3115 case ISD::STRICT_FCEIL:
3116 case ISD::STRICT_FFLOOR:
3117 case ISD::STRICT_FROUND:
3121 Truncated = DAG.getNode(
3122 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3123 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3124 break;
3125 }
3126 case ISD::STRICT_FTRUNC:
3127 Truncated =
3129 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3130 break;
3133 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3134 Mask, VL);
3135 break;
3136 }
3137 Chain = Truncated.getValue(1);
3138
3139 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3140 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3141 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3142 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3143 Truncated, Mask, VL);
3144 Chain = Truncated.getValue(1);
3145 }
3146
3147 // Restore the original sign so that -0.0 is preserved.
3148 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3149 Src, Src, Mask, VL);
3150
3151 if (VT.isFixedLengthVector())
3152 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3153 return DAG.getMergeValues({Truncated, Chain}, DL);
3154}
3155
3156static SDValue
3158 const RISCVSubtarget &Subtarget) {
3159 MVT VT = Op.getSimpleValueType();
3160 if (VT.isVector())
3161 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3162
3163 if (DAG.shouldOptForSize())
3164 return SDValue();
3165
3166 SDLoc DL(Op);
3167 SDValue Src = Op.getOperand(0);
3168
3169 // Create an integer the size of the mantissa with the MSB set. This and all
3170 // values larger than it don't have any fractional bits so don't need to be
3171 // converted.
3172 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3173 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3174 APFloat MaxVal = APFloat(FltSem);
3175 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3176 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3177 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3178
3180 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3181 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3182}
3183
3184// Expand vector LRINT and LLRINT by converting to the integer domain.
3186 const RISCVSubtarget &Subtarget) {
3187 MVT VT = Op.getSimpleValueType();
3188 assert(VT.isVector() && "Unexpected type");
3189
3190 SDLoc DL(Op);
3191 SDValue Src = Op.getOperand(0);
3192 MVT ContainerVT = VT;
3193
3194 if (VT.isFixedLengthVector()) {
3195 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197 }
3198
3199 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3200 SDValue Truncated =
3201 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3202
3203 if (!VT.isFixedLengthVector())
3204 return Truncated;
3205
3206 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3207}
3208
3209static SDValue
3211 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3212 SDValue Offset, SDValue Mask, SDValue VL,
3214 if (Merge.isUndef())
3216 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3217 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3218 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3219}
3220
3221static SDValue
3222getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3224 SDValue VL,
3226 if (Merge.isUndef())
3228 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3229 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3230 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3231}
3232
3233static MVT getLMUL1VT(MVT VT) {
3235 "Unexpected vector MVT");
3239}
3240
3244 int64_t Addend;
3245};
3246
3247static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3249 // We will use a SINT_TO_FP to materialize this constant so we should use a
3250 // signed APSInt here.
3251 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3252 // We use an arbitrary rounding mode here. If a floating-point is an exact
3253 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3254 // the rounding mode changes the output value, then it is not an exact
3255 // integer.
3257 bool IsExact;
3258 // If it is out of signed integer range, it will return an invalid operation.
3259 // If it is not an exact integer, IsExact is false.
3260 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3262 !IsExact)
3263 return std::nullopt;
3264 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3265}
3266
3267// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3268// to the (non-zero) step S and start value X. This can be then lowered as the
3269// RVV sequence (VID * S) + X, for example.
3270// The step S is represented as an integer numerator divided by a positive
3271// denominator. Note that the implementation currently only identifies
3272// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3273// cannot detect 2/3, for example.
3274// Note that this method will also match potentially unappealing index
3275// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3276// determine whether this is worth generating code for.
3277static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3278 unsigned EltSizeInBits) {
3279 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3280 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3281 return std::nullopt;
3282 bool IsInteger = Op.getValueType().isInteger();
3283
3284 std::optional<unsigned> SeqStepDenom;
3285 std::optional<int64_t> SeqStepNum, SeqAddend;
3286 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3287 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3288
3289 // First extract the ops into a list of constant integer values. This may not
3290 // be possible for floats if they're not all representable as integers.
3292 const unsigned OpSize = Op.getScalarValueSizeInBits();
3293 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3294 if (Elt.isUndef()) {
3295 Elts[Idx] = std::nullopt;
3296 continue;
3297 }
3298 if (IsInteger) {
3299 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3300 } else {
3301 auto ExactInteger =
3302 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3303 if (!ExactInteger)
3304 return std::nullopt;
3305 Elts[Idx] = *ExactInteger;
3306 }
3307 }
3308
3309 for (auto [Idx, Elt] : enumerate(Elts)) {
3310 // Assume undef elements match the sequence; we just have to be careful
3311 // when interpolating across them.
3312 if (!Elt)
3313 continue;
3314
3315 if (PrevElt) {
3316 // Calculate the step since the last non-undef element, and ensure
3317 // it's consistent across the entire sequence.
3318 unsigned IdxDiff = Idx - PrevElt->second;
3319 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3320
3321 // A zero-value value difference means that we're somewhere in the middle
3322 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3323 // step change before evaluating the sequence.
3324 if (ValDiff == 0)
3325 continue;
3326
3327 int64_t Remainder = ValDiff % IdxDiff;
3328 // Normalize the step if it's greater than 1.
3329 if (Remainder != ValDiff) {
3330 // The difference must cleanly divide the element span.
3331 if (Remainder != 0)
3332 return std::nullopt;
3333 ValDiff /= IdxDiff;
3334 IdxDiff = 1;
3335 }
3336
3337 if (!SeqStepNum)
3338 SeqStepNum = ValDiff;
3339 else if (ValDiff != SeqStepNum)
3340 return std::nullopt;
3341
3342 if (!SeqStepDenom)
3343 SeqStepDenom = IdxDiff;
3344 else if (IdxDiff != *SeqStepDenom)
3345 return std::nullopt;
3346 }
3347
3348 // Record this non-undef element for later.
3349 if (!PrevElt || PrevElt->first != *Elt)
3350 PrevElt = std::make_pair(*Elt, Idx);
3351 }
3352
3353 // We need to have logged a step for this to count as a legal index sequence.
3354 if (!SeqStepNum || !SeqStepDenom)
3355 return std::nullopt;
3356
3357 // Loop back through the sequence and validate elements we might have skipped
3358 // while waiting for a valid step. While doing this, log any sequence addend.
3359 for (auto [Idx, Elt] : enumerate(Elts)) {
3360 if (!Elt)
3361 continue;
3362 uint64_t ExpectedVal =
3363 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3364 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3365 if (!SeqAddend)
3366 SeqAddend = Addend;
3367 else if (Addend != SeqAddend)
3368 return std::nullopt;
3369 }
3370
3371 assert(SeqAddend && "Must have an addend if we have a step");
3372
3373 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3374}
3375
3376// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3377// and lower it as a VRGATHER_VX_VL from the source vector.
3378static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3379 SelectionDAG &DAG,
3380 const RISCVSubtarget &Subtarget) {
3381 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3382 return SDValue();
3383 SDValue Vec = SplatVal.getOperand(0);
3384 // Only perform this optimization on vectors of the same size for simplicity.
3385 // Don't perform this optimization for i1 vectors.
3386 // FIXME: Support i1 vectors, maybe by promoting to i8?
3387 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3388 return SDValue();
3389 SDValue Idx = SplatVal.getOperand(1);
3390 // The index must be a legal type.
3391 if (Idx.getValueType() != Subtarget.getXLenVT())
3392 return SDValue();
3393
3394 MVT ContainerVT = VT;
3395 if (VT.isFixedLengthVector()) {
3396 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3397 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3398 }
3399
3400 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3401
3402 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3403 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3404
3405 if (!VT.isFixedLengthVector())
3406 return Gather;
3407
3408 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3409}
3410
3411
3412/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3413/// which constitute a large proportion of the elements. In such cases we can
3414/// splat a vector with the dominant element and make up the shortfall with
3415/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3416/// Note that this includes vectors of 2 elements by association. The
3417/// upper-most element is the "dominant" one, allowing us to use a splat to
3418/// "insert" the upper element, and an insert of the lower element at position
3419/// 0, which improves codegen.
3421 const RISCVSubtarget &Subtarget) {
3422 MVT VT = Op.getSimpleValueType();
3423 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3424
3425 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3426
3427 SDLoc DL(Op);
3428 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3429
3430 MVT XLenVT = Subtarget.getXLenVT();
3431 unsigned NumElts = Op.getNumOperands();
3432
3433 SDValue DominantValue;
3434 unsigned MostCommonCount = 0;
3435 DenseMap<SDValue, unsigned> ValueCounts;
3436 unsigned NumUndefElts =
3437 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3438
3439 // Track the number of scalar loads we know we'd be inserting, estimated as
3440 // any non-zero floating-point constant. Other kinds of element are either
3441 // already in registers or are materialized on demand. The threshold at which
3442 // a vector load is more desirable than several scalar materializion and
3443 // vector-insertion instructions is not known.
3444 unsigned NumScalarLoads = 0;
3445
3446 for (SDValue V : Op->op_values()) {
3447 if (V.isUndef())
3448 continue;
3449
3450 ValueCounts.insert(std::make_pair(V, 0));
3451 unsigned &Count = ValueCounts[V];
3452 if (0 == Count)
3453 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3454 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3455
3456 // Is this value dominant? In case of a tie, prefer the highest element as
3457 // it's cheaper to insert near the beginning of a vector than it is at the
3458 // end.
3459 if (++Count >= MostCommonCount) {
3460 DominantValue = V;
3461 MostCommonCount = Count;
3462 }
3463 }
3464
3465 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3466 unsigned NumDefElts = NumElts - NumUndefElts;
3467 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3468
3469 // Don't perform this optimization when optimizing for size, since
3470 // materializing elements and inserting them tends to cause code bloat.
3471 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3472 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3473 ((MostCommonCount > DominantValueCountThreshold) ||
3474 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3475 // Start by splatting the most common element.
3476 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3477
3478 DenseSet<SDValue> Processed{DominantValue};
3479
3480 // We can handle an insert into the last element (of a splat) via
3481 // v(f)slide1down. This is slightly better than the vslideup insert
3482 // lowering as it avoids the need for a vector group temporary. It
3483 // is also better than using vmerge.vx as it avoids the need to
3484 // materialize the mask in a vector register.
3485 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3486 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3487 LastOp != DominantValue) {
3488 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3489 auto OpCode =
3491 if (!VT.isFloatingPoint())
3492 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3493 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3494 LastOp, Mask, VL);
3495 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3496 Processed.insert(LastOp);
3497 }
3498
3499 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3500 for (const auto &OpIdx : enumerate(Op->ops())) {
3501 const SDValue &V = OpIdx.value();
3502 if (V.isUndef() || !Processed.insert(V).second)
3503 continue;
3504 if (ValueCounts[V] == 1) {
3505 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3506 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3507 } else {
3508 // Blend in all instances of this value using a VSELECT, using a
3509 // mask where each bit signals whether that element is the one
3510 // we're after.
3512 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3513 return DAG.getConstant(V == V1, DL, XLenVT);
3514 });
3515 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3516 DAG.getBuildVector(SelMaskTy, DL, Ops),
3517 DAG.getSplatBuildVector(VT, DL, V), Vec);
3518 }
3519 }
3520
3521 return Vec;
3522 }
3523
3524 return SDValue();
3525}
3526
3528 const RISCVSubtarget &Subtarget) {
3529 MVT VT = Op.getSimpleValueType();
3530 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3531
3532 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3533
3534 SDLoc DL(Op);
3535 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3536
3537 MVT XLenVT = Subtarget.getXLenVT();
3538 unsigned NumElts = Op.getNumOperands();
3539
3540 if (VT.getVectorElementType() == MVT::i1) {
3541 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3542 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3543 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3544 }
3545
3546 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3547 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3548 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3549 }
3550
3551 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3552 // scalar integer chunks whose bit-width depends on the number of mask
3553 // bits and XLEN.
3554 // First, determine the most appropriate scalar integer type to use. This
3555 // is at most XLenVT, but may be shrunk to a smaller vector element type
3556 // according to the size of the final vector - use i8 chunks rather than
3557 // XLenVT if we're producing a v8i1. This results in more consistent
3558 // codegen across RV32 and RV64.
3559 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3560 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3561 // If we have to use more than one INSERT_VECTOR_ELT then this
3562 // optimization is likely to increase code size; avoid peforming it in
3563 // such a case. We can use a load from a constant pool in this case.
3564 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3565 return SDValue();
3566 // Now we can create our integer vector type. Note that it may be larger
3567 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3568 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3569 MVT IntegerViaVecVT =
3570 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3571 IntegerViaVecElts);
3572
3573 uint64_t Bits = 0;
3574 unsigned BitPos = 0, IntegerEltIdx = 0;
3575 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3576
3577 for (unsigned I = 0; I < NumElts;) {
3578 SDValue V = Op.getOperand(I);
3579 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3580 Bits |= ((uint64_t)BitValue << BitPos);
3581 ++BitPos;
3582 ++I;
3583
3584 // Once we accumulate enough bits to fill our scalar type or process the
3585 // last element, insert into our vector and clear our accumulated data.
3586 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3587 if (NumViaIntegerBits <= 32)
3588 Bits = SignExtend64<32>(Bits);
3589 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3590 Elts[IntegerEltIdx] = Elt;
3591 Bits = 0;
3592 BitPos = 0;
3593 IntegerEltIdx++;
3594 }
3595 }
3596
3597 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3598
3599 if (NumElts < NumViaIntegerBits) {
3600 // If we're producing a smaller vector than our minimum legal integer
3601 // type, bitcast to the equivalent (known-legal) mask type, and extract
3602 // our final mask.
3603 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3604 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3605 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3606 DAG.getConstant(0, DL, XLenVT));
3607 } else {
3608 // Else we must have produced an integer type with the same size as the
3609 // mask type; bitcast for the final result.
3610 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3611 Vec = DAG.getBitcast(VT, Vec);
3612 }
3613
3614 return Vec;
3615 }
3616
3617 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3618 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3620 if (!VT.isFloatingPoint())
3621 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3622 Splat =
3623 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3624 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3625 }
3626
3627 // Try and match index sequences, which we can lower to the vid instruction
3628 // with optional modifications. An all-undef vector is matched by
3629 // getSplatValue, above.
3630 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3631 int64_t StepNumerator = SimpleVID->StepNumerator;
3632 unsigned StepDenominator = SimpleVID->StepDenominator;
3633 int64_t Addend = SimpleVID->Addend;
3634
3635 assert(StepNumerator != 0 && "Invalid step");
3636 bool Negate = false;
3637 int64_t SplatStepVal = StepNumerator;
3638 unsigned StepOpcode = ISD::MUL;
3639 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3640 // anyway as the shift of 63 won't fit in uimm5.
3641 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3642 isPowerOf2_64(std::abs(StepNumerator))) {
3643 Negate = StepNumerator < 0;
3644 StepOpcode = ISD::SHL;
3645 SplatStepVal = Log2_64(std::abs(StepNumerator));
3646 }
3647
3648 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3649 // threshold since it's the immediate value many RVV instructions accept.
3650 // There is no vmul.vi instruction so ensure multiply constant can fit in
3651 // a single addi instruction.
3652 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3653 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3654 isPowerOf2_32(StepDenominator) &&
3655 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3656 MVT VIDVT =
3658 MVT VIDContainerVT =
3659 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3660 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3661 // Convert right out of the scalable type so we can use standard ISD
3662 // nodes for the rest of the computation. If we used scalable types with
3663 // these, we'd lose the fixed-length vector info and generate worse
3664 // vsetvli code.
3665 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3666 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3667 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3668 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3669 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3670 }
3671 if (StepDenominator != 1) {
3672 SDValue SplatStep =
3673 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3674 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3675 }
3676 if (Addend != 0 || Negate) {
3677 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3678 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3679 VID);
3680 }
3681 if (VT.isFloatingPoint()) {
3682 // TODO: Use vfwcvt to reduce register pressure.
3683 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3684 }
3685 return VID;
3686 }
3687 }
3688
3689 // For very small build_vectors, use a single scalar insert of a constant.
3690 // TODO: Base this on constant rematerialization cost, not size.
3691 const unsigned EltBitSize = VT.getScalarSizeInBits();
3692 if (VT.getSizeInBits() <= 32 &&
3694 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3695 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3696 "Unexpected sequence type");
3697 // If we can use the original VL with the modified element type, this
3698 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3699 // be moved into InsertVSETVLI?
3700 unsigned ViaVecLen =
3701 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3702 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3703
3704 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3705 uint64_t SplatValue = 0;
3706 // Construct the amalgamated value at this larger vector type.
3707 for (const auto &OpIdx : enumerate(Op->op_values())) {
3708 const auto &SeqV = OpIdx.value();
3709 if (!SeqV.isUndef())
3710 SplatValue |=
3711 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3712 }
3713
3714 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3715 // achieve better constant materializion.
3716 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3717 SplatValue = SignExtend64<32>(SplatValue);
3718
3719 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3720 DAG.getUNDEF(ViaVecVT),
3721 DAG.getConstant(SplatValue, DL, XLenVT),
3722 DAG.getVectorIdxConstant(0, DL));
3723 if (ViaVecLen != 1)
3725 MVT::getVectorVT(ViaIntVT, 1), Vec,
3726 DAG.getConstant(0, DL, XLenVT));
3727 return DAG.getBitcast(VT, Vec);
3728 }
3729
3730
3731 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3732 // when re-interpreted as a vector with a larger element type. For example,
3733 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3734 // could be instead splat as
3735 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3736 // TODO: This optimization could also work on non-constant splats, but it
3737 // would require bit-manipulation instructions to construct the splat value.
3738 SmallVector<SDValue> Sequence;
3739 const auto *BV = cast<BuildVectorSDNode>(Op);
3740 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3742 BV->getRepeatedSequence(Sequence) &&
3743 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3744 unsigned SeqLen = Sequence.size();
3745 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3746 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3747 ViaIntVT == MVT::i64) &&
3748 "Unexpected sequence type");
3749
3750 // If we can use the original VL with the modified element type, this
3751 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3752 // be moved into InsertVSETVLI?
3753 const unsigned RequiredVL = NumElts / SeqLen;
3754 const unsigned ViaVecLen =
3755 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3756 NumElts : RequiredVL;
3757 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3758
3759 unsigned EltIdx = 0;
3760 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3761 uint64_t SplatValue = 0;
3762 // Construct the amalgamated value which can be splatted as this larger
3763 // vector type.
3764 for (const auto &SeqV : Sequence) {
3765 if (!SeqV.isUndef())
3766 SplatValue |=
3767 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3768 EltIdx++;
3769 }
3770
3771 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3772 // achieve better constant materializion.
3773 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3774 SplatValue = SignExtend64<32>(SplatValue);
3775
3776 // Since we can't introduce illegal i64 types at this stage, we can only
3777 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3778 // way we can use RVV instructions to splat.
3779 assert((ViaIntVT.bitsLE(XLenVT) ||
3780 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3781 "Unexpected bitcast sequence");
3782 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3783 SDValue ViaVL =
3784 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3785 MVT ViaContainerVT =
3786 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3787 SDValue Splat =
3788 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3789 DAG.getUNDEF(ViaContainerVT),
3790 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3791 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3792 if (ViaVecLen != RequiredVL)
3794 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3795 DAG.getConstant(0, DL, XLenVT));
3796 return DAG.getBitcast(VT, Splat);
3797 }
3798 }
3799
3800 // If the number of signbits allows, see if we can lower as a <N x i8>.
3801 // Our main goal here is to reduce LMUL (and thus work) required to
3802 // build the constant, but we will also narrow if the resulting
3803 // narrow vector is known to materialize cheaply.
3804 // TODO: We really should be costing the smaller vector. There are
3805 // profitable cases this misses.
3806 if (EltBitSize > 8 && VT.isInteger() &&
3807 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3808 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3809 if (EltBitSize - SignBits < 8) {
3810 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3811 DL, Op->ops());
3812 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3813 Source, DAG, Subtarget);
3814 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3815 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3816 }
3817 }
3818
3819 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3820 return Res;
3821
3822 // For constant vectors, use generic constant pool lowering. Otherwise,
3823 // we'd have to materialize constants in GPRs just to move them into the
3824 // vector.
3825 return SDValue();
3826}
3827
3829 const RISCVSubtarget &Subtarget) {
3830 MVT VT = Op.getSimpleValueType();
3831 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3832
3833 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3835 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3836
3837 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3838
3839 SDLoc DL(Op);
3840 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3841
3842 MVT XLenVT = Subtarget.getXLenVT();
3843
3844 if (VT.getVectorElementType() == MVT::i1) {
3845 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3846 // vector type, we have a legal equivalently-sized i8 type, so we can use
3847 // that.
3848 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3849 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3850
3851 SDValue WideVec;
3852 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3853 // For a splat, perform a scalar truncate before creating the wider
3854 // vector.
3855 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3856 DAG.getConstant(1, DL, Splat.getValueType()));
3857 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3858 } else {
3859 SmallVector<SDValue, 8> Ops(Op->op_values());
3860 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3861 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3862 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3863 }
3864
3865 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3866 }
3867
3868 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3869 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3870 return Gather;
3871 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3873 if (!VT.isFloatingPoint())
3874 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3875 Splat =
3876 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3877 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3878 }
3879
3880 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3881 return Res;
3882
3883 // If we're compiling for an exact VLEN value, we can split our work per
3884 // register in the register group.
3885 if (const auto VLen = Subtarget.getRealVLen();
3886 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3887 MVT ElemVT = VT.getVectorElementType();
3888 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3889 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3890 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3891 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3892 assert(M1VT == getLMUL1VT(M1VT));
3893
3894 // The following semantically builds up a fixed length concat_vector
3895 // of the component build_vectors. We eagerly lower to scalable and
3896 // insert_subvector here to avoid DAG combining it back to a large
3897 // build_vector.
3898 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3899 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3900 SDValue Vec = DAG.getUNDEF(ContainerVT);
3901 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3902 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3903 SDValue SubBV =
3904 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3905 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3906 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3907 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3908 DAG.getVectorIdxConstant(InsertIdx, DL));
3909 }
3910 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3911 }
3912
3913 // For m1 vectors, if we have non-undef values in both halves of our vector,
3914 // split the vector into low and high halves, build them separately, then
3915 // use a vselect to combine them. For long vectors, this cuts the critical
3916 // path of the vslide1down sequence in half, and gives us an opportunity
3917 // to special case each half independently. Note that we don't change the
3918 // length of the sub-vectors here, so if both fallback to the generic
3919 // vslide1down path, we should be able to fold the vselect into the final
3920 // vslidedown (for the undef tail) for the first half w/ masking.
3921 unsigned NumElts = VT.getVectorNumElements();
3922 unsigned NumUndefElts =
3923 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3924 unsigned NumDefElts = NumElts - NumUndefElts;
3925 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3926 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3927 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3928 SmallVector<SDValue> MaskVals;
3929 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3930 SubVecAOps.reserve(NumElts);
3931 SubVecBOps.reserve(NumElts);
3932 for (unsigned i = 0; i < NumElts; i++) {
3933 SDValue Elem = Op->getOperand(i);
3934 if (i < NumElts / 2) {
3935 SubVecAOps.push_back(Elem);
3936 SubVecBOps.push_back(UndefElem);
3937 } else {
3938 SubVecAOps.push_back(UndefElem);
3939 SubVecBOps.push_back(Elem);
3940 }
3941 bool SelectMaskVal = (i < NumElts / 2);
3942 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3943 }
3944 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3945 MaskVals.size() == NumElts);
3946
3947 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3948 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3949 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3950 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3951 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3952 }
3953
3954 // Cap the cost at a value linear to the number of elements in the vector.
3955 // The default lowering is to use the stack. The vector store + scalar loads
3956 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3957 // being (at least) linear in LMUL. As a result, using the vslidedown
3958 // lowering for every element ends up being VL*LMUL..
3959 // TODO: Should we be directly costing the stack alternative? Doing so might
3960 // give us a more accurate upper bound.
3961 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3962
3963 // TODO: unify with TTI getSlideCost.
3964 InstructionCost PerSlideCost = 1;
3965 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3966 default: break;
3968 PerSlideCost = 2;
3969 break;
3971 PerSlideCost = 4;
3972 break;
3974 PerSlideCost = 8;
3975 break;
3976 }
3977
3978 // TODO: Should we be using the build instseq then cost + evaluate scheme
3979 // we use for integer constants here?
3980 unsigned UndefCount = 0;
3981 for (const SDValue &V : Op->ops()) {
3982 if (V.isUndef()) {
3983 UndefCount++;
3984 continue;
3985 }
3986 if (UndefCount) {
3987 LinearBudget -= PerSlideCost;
3988 UndefCount = 0;
3989 }
3990 LinearBudget -= PerSlideCost;
3991 }
3992 if (UndefCount) {
3993 LinearBudget -= PerSlideCost;
3994 }
3995
3996 if (LinearBudget < 0)
3997 return SDValue();
3998
3999 assert((!VT.isFloatingPoint() ||
4000 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4001 "Illegal type which will result in reserved encoding");
4002
4003 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4004
4005 SDValue Vec;
4006 UndefCount = 0;
4007 for (SDValue V : Op->ops()) {
4008 if (V.isUndef()) {
4009 UndefCount++;
4010 continue;
4011 }
4012
4013 // Start our sequence with a TA splat in the hopes that hardware is able to
4014 // recognize there's no dependency on the prior value of our temporary
4015 // register.
4016 if (!Vec) {
4017 Vec = DAG.getSplatVector(VT, DL, V);
4018 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4019 UndefCount = 0;
4020 continue;
4021 }
4022
4023 if (UndefCount) {
4024 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4025 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4026 Vec, Offset, Mask, VL, Policy);
4027 UndefCount = 0;
4028 }
4029 auto OpCode =
4031 if (!VT.isFloatingPoint())
4032 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4033 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4034 V, Mask, VL);
4035 }
4036 if (UndefCount) {
4037 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4038 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4039 Vec, Offset, Mask, VL, Policy);
4040 }
4041 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4042}
4043
4044static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4046 SelectionDAG &DAG) {
4047 if (!Passthru)
4048 Passthru = DAG.getUNDEF(VT);
4049 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4050 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4051 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4052 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4053 // node in order to try and match RVV vector/scalar instructions.
4054 if ((LoC >> 31) == HiC)
4055 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4056
4057 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4058 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4059 // vlmax vsetvli or vsetivli to change the VL.
4060 // FIXME: Support larger constants?
4061 // FIXME: Support non-constant VLs by saturating?
4062 if (LoC == HiC) {
4063 SDValue NewVL;
4064 if (isAllOnesConstant(VL) ||
4065 (isa<RegisterSDNode>(VL) &&
4066 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4067 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4068 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4069 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4070
4071 if (NewVL) {
4072 MVT InterVT =
4073 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4074 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4075 DAG.getUNDEF(InterVT), Lo, NewVL);
4076 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4077 }
4078 }
4079 }
4080
4081 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4082 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4083 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4084 Hi.getConstantOperandVal(1) == 31)
4085 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4086
4087 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4088 // even if it might be sign extended.
4089 if (Hi.isUndef())
4090 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4091
4092 // Fall back to a stack store and stride x0 vector load.
4093 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4094 Hi, VL);
4095}
4096
4097// Called by type legalization to handle splat of i64 on RV32.
4098// FIXME: We can optimize this when the type has sign or zero bits in one
4099// of the halves.
4100static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4101 SDValue Scalar, SDValue VL,
4102 SelectionDAG &DAG) {
4103 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4104 SDValue Lo, Hi;
4105 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4106 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4107}
4108
4109// This function lowers a splat of a scalar operand Splat with the vector
4110// length VL. It ensures the final sequence is type legal, which is useful when
4111// lowering a splat after type legalization.
4112static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4113 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4114 const RISCVSubtarget &Subtarget) {
4115 bool HasPassthru = Passthru && !Passthru.isUndef();
4116 if (!HasPassthru && !Passthru)
4117 Passthru = DAG.getUNDEF(VT);
4118 if (VT.isFloatingPoint())
4119 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4120
4121 MVT XLenVT = Subtarget.getXLenVT();
4122
4123 // Simplest case is that the operand needs to be promoted to XLenVT.
4124 if (Scalar.getValueType().bitsLE(XLenVT)) {
4125 // If the operand is a constant, sign extend to increase our chances
4126 // of being able to use a .vi instruction. ANY_EXTEND would become a
4127 // a zero extend and the simm5 check in isel would fail.
4128 // FIXME: Should we ignore the upper bits in isel instead?
4129 unsigned ExtOpc =
4130 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4131 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4132 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4133 }
4134
4135 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4136 "Unexpected scalar for splat lowering!");
4137
4138 if (isOneConstant(VL) && isNullConstant(Scalar))
4139 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4140 DAG.getConstant(0, DL, XLenVT), VL);
4141
4142 // Otherwise use the more complicated splatting algorithm.
4143 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4144}
4145
4146// This function lowers an insert of a scalar operand Scalar into lane
4147// 0 of the vector regardless of the value of VL. The contents of the
4148// remaining lanes of the result vector are unspecified. VL is assumed
4149// to be non-zero.
4151 const SDLoc &DL, SelectionDAG &DAG,
4152 const RISCVSubtarget &Subtarget) {
4153 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4154
4155 const MVT XLenVT = Subtarget.getXLenVT();
4156 SDValue Passthru = DAG.getUNDEF(VT);
4157
4158 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4159 isNullConstant(Scalar.getOperand(1))) {
4160 SDValue ExtractedVal = Scalar.getOperand(0);
4161 // The element types must be the same.
4162 if (ExtractedVal.getValueType().getVectorElementType() ==
4163 VT.getVectorElementType()) {
4164 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4165 MVT ExtractedContainerVT = ExtractedVT;
4166 if (ExtractedContainerVT.isFixedLengthVector()) {
4167 ExtractedContainerVT = getContainerForFixedLengthVector(
4168 DAG, ExtractedContainerVT, Subtarget);
4169 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4170 ExtractedVal, DAG, Subtarget);
4171 }
4172 if (ExtractedContainerVT.bitsLE(VT))
4173 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4174 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4175 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4176 DAG.getVectorIdxConstant(0, DL));
4177 }
4178 }
4179
4180
4181 if (VT.isFloatingPoint())
4182 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4183 DAG.getUNDEF(VT), Scalar, VL);
4184
4185 // Avoid the tricky legalization cases by falling back to using the
4186 // splat code which already handles it gracefully.
4187 if (!Scalar.getValueType().bitsLE(XLenVT))
4188 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4189 DAG.getConstant(1, DL, XLenVT),
4190 VT, DL, DAG, Subtarget);
4191
4192 // If the operand is a constant, sign extend to increase our chances
4193 // of being able to use a .vi instruction. ANY_EXTEND would become a
4194 // a zero extend and the simm5 check in isel would fail.
4195 // FIXME: Should we ignore the upper bits in isel instead?
4196 unsigned ExtOpc =
4197 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4198 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4199 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4200 DAG.getUNDEF(VT), Scalar, VL);
4201}
4202
4203// Is this a shuffle extracts either the even or odd elements of a vector?
4204// That is, specifically, either (a) or (b) below.
4205// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4206// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4207// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4208// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4209// Returns {Src Vector, Even Elements} om success
4210static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4211 SDValue V2, ArrayRef<int> Mask,
4212 const RISCVSubtarget &Subtarget) {
4213 // Need to be able to widen the vector.
4214 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4215 return false;
4216
4217 // Both input must be extracts.
4218 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4219 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4220 return false;
4221
4222 // Extracting from the same source.
4223 SDValue Src = V1.getOperand(0);
4224 if (Src != V2.getOperand(0))
4225 return false;
4226
4227 // Src needs to have twice the number of elements.
4228 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4229 return false;
4230
4231 // The extracts must extract the two halves of the source.
4232 if (V1.getConstantOperandVal(1) != 0 ||
4233 V2.getConstantOperandVal(1) != Mask.size())
4234 return false;
4235
4236 // First index must be the first even or odd element from V1.
4237 if (Mask[0] != 0 && Mask[0] != 1)
4238 return false;
4239
4240 // The others must increase by 2 each time.
4241 // TODO: Support undef elements?
4242 for (unsigned i = 1; i != Mask.size(); ++i)
4243 if (Mask[i] != Mask[i - 1] + 2)
4244 return false;
4245
4246 return true;
4247}
4248
4249/// Is this shuffle interleaving contiguous elements from one vector into the
4250/// even elements and contiguous elements from another vector into the odd
4251/// elements. \p EvenSrc will contain the element that should be in the first
4252/// even element. \p OddSrc will contain the element that should be in the first
4253/// odd element. These can be the first element in a source or the element half
4254/// way through the source.
4255static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4256 int &OddSrc, const RISCVSubtarget &Subtarget) {
4257 // We need to be able to widen elements to the next larger integer type.
4258 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4259 return false;
4260
4261 int Size = Mask.size();
4262 int NumElts = VT.getVectorNumElements();
4263 assert(Size == (int)NumElts && "Unexpected mask size");
4264
4265 SmallVector<unsigned, 2> StartIndexes;
4266 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4267 return false;
4268
4269 EvenSrc = StartIndexes[0];
4270 OddSrc = StartIndexes[1];
4271
4272 // One source should be low half of first vector.
4273 if (EvenSrc != 0 && OddSrc != 0)
4274 return false;
4275
4276 // Subvectors will be subtracted from either at the start of the two input
4277 // vectors, or at the start and middle of the first vector if it's an unary
4278 // interleave.
4279 // In both cases, HalfNumElts will be extracted.
4280 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4281 // we'll create an illegal extract_subvector.
4282 // FIXME: We could support other values using a slidedown first.
4283 int HalfNumElts = NumElts / 2;
4284 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4285}
4286
4287/// Match shuffles that concatenate two vectors, rotate the concatenation,
4288/// and then extract the original number of elements from the rotated result.
4289/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4290/// returned rotation amount is for a rotate right, where elements move from
4291/// higher elements to lower elements. \p LoSrc indicates the first source
4292/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4293/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4294/// 0 or 1 if a rotation is found.
4295///
4296/// NOTE: We talk about rotate to the right which matches how bit shift and
4297/// rotate instructions are described where LSBs are on the right, but LLVM IR
4298/// and the table below write vectors with the lowest elements on the left.
4299static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4300 int Size = Mask.size();
4301
4302 // We need to detect various ways of spelling a rotation:
4303 // [11, 12, 13, 14, 15, 0, 1, 2]
4304 // [-1, 12, 13, 14, -1, -1, 1, -1]
4305 // [-1, -1, -1, -1, -1, -1, 1, 2]
4306 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4307 // [-1, 4, 5, 6, -1, -1, 9, -1]
4308 // [-1, 4, 5, 6, -1, -1, -1, -1]
4309 int Rotation = 0;
4310 LoSrc = -1;
4311 HiSrc = -1;
4312 for (int i = 0; i != Size; ++i) {
4313 int M = Mask[i];
4314 if (M < 0)
4315 continue;
4316
4317 // Determine where a rotate vector would have started.
4318 int StartIdx = i - (M % Size);
4319 // The identity rotation isn't interesting, stop.
4320 if (StartIdx == 0)
4321 return -1;
4322
4323 // If we found the tail of a vector the rotation must be the missing
4324 // front. If we found the head of a vector, it must be how much of the
4325 // head.
4326 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4327
4328 if (Rotation == 0)
4329 Rotation = CandidateRotation;
4330 else if (Rotation != CandidateRotation)
4331 // The rotations don't match, so we can't match this mask.
4332 return -1;
4333
4334 // Compute which value this mask is pointing at.
4335 int MaskSrc = M < Size ? 0 : 1;
4336
4337 // Compute which of the two target values this index should be assigned to.
4338 // This reflects whether the high elements are remaining or the low elemnts
4339 // are remaining.
4340 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4341
4342 // Either set up this value if we've not encountered it before, or check
4343 // that it remains consistent.
4344 if (TargetSrc < 0)
4345 TargetSrc = MaskSrc;
4346 else if (TargetSrc != MaskSrc)
4347 // This may be a rotation, but it pulls from the inputs in some
4348 // unsupported interleaving.
4349 return -1;
4350 }
4351
4352 // Check that we successfully analyzed the mask, and normalize the results.
4353 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4354 assert((LoSrc >= 0 || HiSrc >= 0) &&
4355 "Failed to find a rotated input vector!");
4356
4357 return Rotation;
4358}
4359
4360// Lower a deinterleave shuffle to vnsrl.
4361// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4362// -> [p, q, r, s] (EvenElts == false)
4363// VT is the type of the vector to return, <[vscale x ]n x ty>
4364// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4366 bool EvenElts,
4367 const RISCVSubtarget &Subtarget,
4368 SelectionDAG &DAG) {
4369 // The result is a vector of type <m x n x ty>
4370 MVT ContainerVT = VT;
4371 // Convert fixed vectors to scalable if needed
4372 if (ContainerVT.isFixedLengthVector()) {
4373 assert(Src.getSimpleValueType().isFixedLengthVector());
4374 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4375
4376 // The source is a vector of type <m x n*2 x ty>
4377 MVT SrcContainerVT =
4379 ContainerVT.getVectorElementCount() * 2);
4380 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4381 }
4382
4383 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4384
4385 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4386 // This also converts FP to int.
4387 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4388 MVT WideSrcContainerVT = MVT::getVectorVT(
4389 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4390 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4391
4392 // The integer version of the container type.
4393 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4394
4395 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4396 // the original element size.
4397 unsigned Shift = EvenElts ? 0 : EltBits;
4398 SDValue SplatShift = DAG.getNode(
4399 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4400 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4401 SDValue Res =
4402 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4403 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4404 // Cast back to FP if needed.
4405 Res = DAG.getBitcast(ContainerVT, Res);
4406
4407 if (VT.isFixedLengthVector())
4408 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4409 return Res;
4410}
4411
4412// Lower the following shuffle to vslidedown.
4413// a)
4414// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4415// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4416// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4417// b)
4418// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4419// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4420// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4421// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4422// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4423// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4425 SDValue V1, SDValue V2,
4426 ArrayRef<int> Mask,
4427 const RISCVSubtarget &Subtarget,
4428 SelectionDAG &DAG) {
4429 auto findNonEXTRACT_SUBVECTORParent =
4430 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4431 uint64_t Offset = 0;
4432 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4433 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4434 // a scalable vector. But we don't want to match the case.
4435 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4436 Offset += Parent.getConstantOperandVal(1);
4437 Parent = Parent.getOperand(0);
4438 }
4439 return std::make_pair(Parent, Offset);
4440 };
4441
4442 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4443 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4444
4445 // Extracting from the same source.
4446 SDValue Src = V1Src;
4447 if (Src != V2Src)
4448 return SDValue();
4449
4450 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4451 SmallVector<int, 16> NewMask(Mask);
4452 for (size_t i = 0; i != NewMask.size(); ++i) {
4453 if (NewMask[i] == -1)
4454 continue;
4455
4456 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4457 NewMask[i] = NewMask[i] + V1IndexOffset;
4458 } else {
4459 // Minus NewMask.size() is needed. Otherwise, the b case would be
4460 // <5,6,7,12> instead of <5,6,7,8>.
4461 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4462 }
4463 }
4464
4465 // First index must be known and non-zero. It will be used as the slidedown
4466 // amount.
4467 if (NewMask[0] <= 0)
4468 return SDValue();
4469
4470 // NewMask is also continuous.
4471 for (unsigned i = 1; i != NewMask.size(); ++i)
4472 if (NewMask[i - 1] + 1 != NewMask[i])
4473 return SDValue();
4474
4475 MVT XLenVT = Subtarget.getXLenVT();
4476 MVT SrcVT = Src.getSimpleValueType();
4477 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4478 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4479 SDValue Slidedown =
4480 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4481 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4482 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4483 return DAG.getNode(
4485 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4486 DAG.getConstant(0, DL, XLenVT));
4487}
4488
4489// Because vslideup leaves the destination elements at the start intact, we can
4490// use it to perform shuffles that insert subvectors:
4491//
4492// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4493// ->
4494// vsetvli zero, 8, e8, mf2, ta, ma
4495// vslideup.vi v8, v9, 4
4496//
4497// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4498// ->
4499// vsetvli zero, 5, e8, mf2, tu, ma
4500// vslideup.v1 v8, v9, 2
4502 SDValue V1, SDValue V2,
4503 ArrayRef<int> Mask,
4504 const RISCVSubtarget &Subtarget,
4505 SelectionDAG &DAG) {
4506 unsigned NumElts = VT.getVectorNumElements();
4507 int NumSubElts, Index;
4508 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4509 Index))
4510 return SDValue();
4511
4512 bool OpsSwapped = Mask[Index] < (int)NumElts;
4513 SDValue InPlace = OpsSwapped ? V2 : V1;
4514 SDValue ToInsert = OpsSwapped ? V1 : V2;
4515
4516 MVT XLenVT = Subtarget.getXLenVT();
4517 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4518 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4519 // We slide up by the index that the subvector is being inserted at, and set
4520 // VL to the index + the number of elements being inserted.
4522 // If the we're adding a suffix to the in place vector, i.e. inserting right
4523 // up to the very end of it, then we don't actually care about the tail.
4524 if (NumSubElts + Index >= (int)NumElts)
4525 Policy |= RISCVII::TAIL_AGNOSTIC;
4526
4527 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4528 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4529 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4530
4531 SDValue Res;
4532 // If we're inserting into the lowest elements, use a tail undisturbed
4533 // vmv.v.v.
4534 if (Index == 0)
4535 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4536 VL);
4537 else
4538 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4539 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4540 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4541}
4542
4543/// Match v(f)slide1up/down idioms. These operations involve sliding
4544/// N-1 elements to make room for an inserted scalar at one end.
4546 SDValue V1, SDValue V2,
4547 ArrayRef<int> Mask,
4548 const RISCVSubtarget &Subtarget,
4549 SelectionDAG &DAG) {
4550 bool OpsSwapped = false;
4551 if (!isa<BuildVectorSDNode>(V1)) {
4552 if (!isa<BuildVectorSDNode>(V2))
4553 return SDValue();
4554 std::swap(V1, V2);
4555 OpsSwapped = true;
4556 }
4557 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4558 if (!Splat)
4559 return SDValue();
4560
4561 // Return true if the mask could describe a slide of Mask.size() - 1
4562 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4563 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4564 const unsigned S = (Offset > 0) ? 0 : -Offset;
4565 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4566 for (unsigned i = S; i != E; ++i)
4567 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4568 return false;
4569 return true;
4570 };
4571
4572 const unsigned NumElts = VT.getVectorNumElements();
4573 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4574 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4575 return SDValue();
4576
4577 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4578 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4579 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4580 return SDValue();
4581
4582 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4583 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4584 auto OpCode = IsVSlidedown ?
4587 if (!VT.isFloatingPoint())
4588 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4589 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4590 DAG.getUNDEF(ContainerVT),
4591 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4592 Splat, TrueMask, VL);
4593 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4594}
4595
4596// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4597// to create an interleaved vector of <[vscale x] n*2 x ty>.
4598// This requires that the size of ty is less than the subtarget's maximum ELEN.
4600 const SDLoc &DL, SelectionDAG &DAG,
4601 const RISCVSubtarget &Subtarget) {
4602 MVT VecVT = EvenV.getSimpleValueType();
4603 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4604 // Convert fixed vectors to scalable if needed
4605 if (VecContainerVT.isFixedLengthVector()) {
4606 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4607 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4608 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4609 }
4610
4611 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4612
4613 // We're working with a vector of the same size as the resulting
4614 // interleaved vector, but with half the number of elements and
4615 // twice the SEW (Hence the restriction on not using the maximum
4616 // ELEN)
4617 MVT WideVT =
4619 VecVT.getVectorElementCount());
4620 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4621 if (WideContainerVT.isFixedLengthVector())
4622 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4623
4624 // Bitcast the input vectors to integers in case they are FP
4625 VecContainerVT = VecContainerVT.changeTypeToInteger();
4626 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4627 OddV = DAG.getBitcast(VecContainerVT, OddV);
4628
4629 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4630 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4631
4632 SDValue Interleaved;
4633 if (OddV.isUndef()) {
4634 // If OddV is undef, this is a zero extend.
4635 // FIXME: Not only does this optimize the code, it fixes some correctness
4636 // issues because MIR does not have freeze.
4637 Interleaved =
4638 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4639 } else if (Subtarget.hasStdExtZvbb()) {
4640 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4641 SDValue OffsetVec =
4642 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4643 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4644 OffsetVec, Passthru, Mask, VL);
4645 if (!EvenV.isUndef())
4646 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4647 Interleaved, EvenV, Passthru, Mask, VL);
4648 } else if (EvenV.isUndef()) {
4649 Interleaved =
4650 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4651
4652 SDValue OffsetVec =
4653 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4654 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4655 Interleaved, OffsetVec, Passthru, Mask, VL);
4656 } else {
4657 // FIXME: We should freeze the odd vector here. We already handled the case
4658 // of provably undef/poison above.
4659
4660 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4661 // vwaddu.vv
4662 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4663 OddV, Passthru, Mask, VL);
4664
4665 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4666 SDValue AllOnesVec = DAG.getSplatVector(
4667 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4668 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4669 OddV, AllOnesVec, Passthru, Mask, VL);
4670
4671 // Add the two together so we get
4672 // (OddV * 0xff...ff) + (OddV + EvenV)
4673 // = (OddV * 0x100...00) + EvenV
4674 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4675 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4676 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4677 Interleaved, OddsMul, Passthru, Mask, VL);
4678 }
4679
4680 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4681 MVT ResultContainerVT = MVT::getVectorVT(
4682 VecVT.getVectorElementType(), // Make sure to use original type
4683 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4684 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4685
4686 // Convert back to a fixed vector if needed
4687 MVT ResultVT =
4690 if (ResultVT.isFixedLengthVector())
4691 Interleaved =
4692 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4693
4694 return Interleaved;
4695}
4696
4697// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4698// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4700 SelectionDAG &DAG,
4701 const RISCVSubtarget &Subtarget) {
4702 SDLoc DL(SVN);
4703 MVT VT = SVN->getSimpleValueType(0);
4704 SDValue V = SVN->getOperand(0);
4705 unsigned NumElts = VT.getVectorNumElements();
4706
4707 assert(VT.getVectorElementType() == MVT::i1);
4708
4710 SVN->getMask().size()) ||
4711 !SVN->getOperand(1).isUndef())
4712 return SDValue();
4713
4714 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4715 EVT ViaVT = EVT::getVectorVT(
4716 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4717 EVT ViaBitVT =
4718 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4719
4720 // If we don't have zvbb or the larger element type > ELEN, the operation will
4721 // be illegal.
4723 ViaVT) ||
4724 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4725 return SDValue();
4726
4727 // If the bit vector doesn't fit exactly into the larger element type, we need
4728 // to insert it into the larger vector and then shift up the reversed bits
4729 // afterwards to get rid of the gap introduced.
4730 if (ViaEltSize > NumElts)
4731 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4732 V, DAG.getVectorIdxConstant(0, DL));
4733
4734 SDValue Res =
4735 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4736
4737 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4738 // element type.
4739 if (ViaEltSize > NumElts)
4740 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4741 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4742
4743 Res = DAG.getBitcast(ViaBitVT, Res);
4744
4745 if (ViaEltSize > NumElts)
4746 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4747 DAG.getVectorIdxConstant(0, DL));
4748 return Res;
4749}
4750
4752 SelectionDAG &DAG,
4753 const RISCVSubtarget &Subtarget,
4754 MVT &RotateVT, unsigned &RotateAmt) {
4755 SDLoc DL(SVN);
4756
4757 EVT VT = SVN->getValueType(0);
4758 unsigned NumElts = VT.getVectorNumElements();
4759 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4760 unsigned NumSubElts;
4761 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4762 NumElts, NumSubElts, RotateAmt))
4763 return false;
4764 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4765 NumElts / NumSubElts);
4766
4767 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4768 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4769}
4770
4771// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4772// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4773// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4775 SelectionDAG &DAG,
4776 const RISCVSubtarget &Subtarget) {
4777 SDLoc DL(SVN);
4778
4779 EVT VT = SVN->getValueType(0);
4780 unsigned RotateAmt;
4781 MVT RotateVT;
4782 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4783 return SDValue();
4784
4785 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4786
4787 SDValue Rotate;
4788 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4789 // so canonicalize to vrev8.
4790 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4791 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4792 else
4793 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4794 DAG.getConstant(RotateAmt, DL, RotateVT));
4795
4796 return DAG.getBitcast(VT, Rotate);
4797}
4798
4799// If compiling with an exactly known VLEN, see if we can split a
4800// shuffle on m2 or larger into a small number of m1 sized shuffles
4801// which write each destination registers exactly once.
4803 SelectionDAG &DAG,
4804 const RISCVSubtarget &Subtarget) {
4805 SDLoc DL(SVN);
4806 MVT VT = SVN->getSimpleValueType(0);
4807 SDValue V1 = SVN->getOperand(0);
4808 SDValue V2 = SVN->getOperand(1);
4809 ArrayRef<int> Mask = SVN->getMask();
4810 unsigned NumElts = VT.getVectorNumElements();
4811
4812 // If we don't know exact data layout, not much we can do. If this
4813 // is already m1 or smaller, no point in splitting further.
4814 const auto VLen = Subtarget.getRealVLen();
4815 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4816 return SDValue();
4817
4818 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4819 // expansion for.
4820 unsigned RotateAmt;
4821 MVT RotateVT;
4822 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4823 return SDValue();
4824
4825 MVT ElemVT = VT.getVectorElementType();
4826 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4827 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4828
4830 OutMasks(VRegsPerSrc, {-1, {}});
4831
4832 // Check if our mask can be done as a 1-to-1 mapping from source
4833 // to destination registers in the group without needing to
4834 // write each destination more than once.
4835 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4836 int DstVecIdx = DstIdx / ElemsPerVReg;
4837 int DstSubIdx = DstIdx % ElemsPerVReg;
4838 int SrcIdx = Mask[DstIdx];
4839 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4840 continue;
4841 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4842 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4843 if (OutMasks[DstVecIdx].first == -1)
4844 OutMasks[DstVecIdx].first = SrcVecIdx;
4845 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4846 // Note: This case could easily be handled by keeping track of a chain
4847 // of source values and generating two element shuffles below. This is
4848 // less an implementation question, and more a profitability one.
4849 return SDValue();
4850
4851 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4852 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4853 }
4854
4855 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4856 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4857 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4858 assert(M1VT == getLMUL1VT(M1VT));
4859 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4860 SDValue Vec = DAG.getUNDEF(ContainerVT);
4861 // The following semantically builds up a fixed length concat_vector
4862 // of the component shuffle_vectors. We eagerly lower to scalable here
4863 // to avoid DAG combining it back to a large shuffle_vector again.
4864 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4865 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4866 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4867 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4868 if (SrcVecIdx == -1)
4869 continue;
4870 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4871 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4872 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4873 DAG.getVectorIdxConstant(ExtractIdx, DL));
4874 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4875 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4876 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4877 unsigned InsertIdx = DstVecIdx * NumOpElts;
4878 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4879 DAG.getVectorIdxConstant(InsertIdx, DL));
4880 }
4881 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4882}
4883
4885 const RISCVSubtarget &Subtarget) {
4886 SDValue V1 = Op.getOperand(0);
4887 SDValue V2 = Op.getOperand(1);
4888 SDLoc DL(Op);
4889 MVT XLenVT = Subtarget.getXLenVT();
4890 MVT VT = Op.getSimpleValueType();
4891 unsigned NumElts = VT.getVectorNumElements();
4892 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4893
4894 if (VT.getVectorElementType() == MVT::i1) {
4895 // Lower to a vror.vi of a larger element type if possible before we promote
4896 // i1s to i8s.
4897 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4898 return V;
4899 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4900 return V;
4901
4902 // Promote i1 shuffle to i8 shuffle.
4903 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4904 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4905 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4906 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4907 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4908 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4909 ISD::SETNE);
4910 }
4911
4912 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4913
4914 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4915
4916 if (SVN->isSplat()) {
4917 const int Lane = SVN->getSplatIndex();
4918 if (Lane >= 0) {
4919 MVT SVT = VT.getVectorElementType();
4920
4921 // Turn splatted vector load into a strided load with an X0 stride.
4922 SDValue V = V1;
4923 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4924 // with undef.
4925 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4926 int Offset = Lane;
4927 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4928 int OpElements =
4929 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4930 V = V.getOperand(Offset / OpElements);
4931 Offset %= OpElements;
4932 }
4933
4934 // We need to ensure the load isn't atomic or volatile.
4935 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4936 auto *Ld = cast<LoadSDNode>(V);
4937 Offset *= SVT.getStoreSize();
4938 SDValue NewAddr = DAG.getMemBasePlusOffset(
4939 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4940
4941 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4942 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4943 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4944 SDValue IntID =
4945 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4946 SDValue Ops[] = {Ld->getChain(),
4947 IntID,
4948 DAG.getUNDEF(ContainerVT),
4949 NewAddr,
4950 DAG.getRegister(RISCV::X0, XLenVT),
4951 VL};
4952 SDValue NewLoad = DAG.getMemIntrinsicNode(
4953 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4955 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4956 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4957 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4958 }
4959
4960 // Otherwise use a scalar load and splat. This will give the best
4961 // opportunity to fold a splat into the operation. ISel can turn it into
4962 // the x0 strided load if we aren't able to fold away the select.
4963 if (SVT.isFloatingPoint())
4964 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4965 Ld->getPointerInfo().getWithOffset(Offset),
4966 Ld->getOriginalAlign(),
4967 Ld->getMemOperand()->getFlags());
4968 else
4969 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4970 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4971 Ld->getOriginalAlign(),
4972 Ld->getMemOperand()->getFlags());
4974
4975 unsigned Opc =
4977 SDValue Splat =
4978 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4979 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4980 }
4981
4982 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4983 assert(Lane < (int)NumElts && "Unexpected lane!");
4984 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4985 V1, DAG.getConstant(Lane, DL, XLenVT),
4986 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4987 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4988 }
4989 }
4990
4991 // For exact VLEN m2 or greater, try to split to m1 operations if we
4992 // can split cleanly.
4993 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4994 return V;
4995
4996 ArrayRef<int> Mask = SVN->getMask();
4997
4998 if (SDValue V =
4999 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5000 return V;
5001
5002 if (SDValue V =
5003 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5004 return V;
5005
5006 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5007 // available.
5008 if (Subtarget.hasStdExtZvkb())
5009 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5010 return V;
5011
5012 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5013 // be undef which can be handled with a single SLIDEDOWN/UP.
5014 int LoSrc, HiSrc;
5015 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5016 if (Rotation > 0) {
5017 SDValue LoV, HiV;
5018 if (LoSrc >= 0) {
5019 LoV = LoSrc == 0 ? V1 : V2;
5020 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5021 }
5022 if (HiSrc >= 0) {
5023 HiV = HiSrc == 0 ? V1 : V2;
5024 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5025 }
5026
5027 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5028 // to slide LoV up by (NumElts - Rotation).
5029 unsigned InvRotate = NumElts - Rotation;
5030
5031 SDValue Res = DAG.getUNDEF(ContainerVT);
5032 if (HiV) {
5033 // Even though we could use a smaller VL, don't to avoid a vsetivli
5034 // toggle.
5035 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5036 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5037 }
5038 if (LoV)
5039 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5040 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5042
5043 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5044 }
5045
5046 // If this is a deinterleave and we can widen the vector, then we can use
5047 // vnsrl to deinterleave.
5048 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5049 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5050 Subtarget, DAG);
5051 }
5052
5053 if (SDValue V =
5054 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5055 return V;
5056
5057 // Detect an interleave shuffle and lower to
5058 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5059 int EvenSrc, OddSrc;
5060 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5061 // Extract the halves of the vectors.
5062 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5063
5064 int Size = Mask.size();
5065 SDValue EvenV, OddV;
5066 assert(EvenSrc >= 0 && "Undef source?");
5067 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5068 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5069 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5070
5071 assert(OddSrc >= 0 && "Undef source?");
5072 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5073 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5074 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5075
5076 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5077 }
5078
5079
5080 // Handle any remaining single source shuffles
5081 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5082 if (V2.isUndef()) {
5083 // We might be able to express the shuffle as a bitrotate. But even if we
5084 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5085 // shifts and a vor will have a higher throughput than a vrgather.
5086 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5087 return V;
5088
5089 if (VT.getScalarSizeInBits() == 8 &&
5090 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5091 // On such a vector we're unable to use i8 as the index type.
5092 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5093 // may involve vector splitting if we're already at LMUL=8, or our
5094 // user-supplied maximum fixed-length LMUL.
5095 return SDValue();
5096 }
5097
5098 // Base case for the two operand recursion below - handle the worst case
5099 // single source shuffle.
5100 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5101 MVT IndexVT = VT.changeTypeToInteger();
5102 // Since we can't introduce illegal index types at this stage, use i16 and
5103 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5104 // than XLenVT.
5105 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5106 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5107 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5108 }
5109
5110 // If the mask allows, we can do all the index computation in 16 bits. This
5111 // requires less work and less register pressure at high LMUL, and creates
5112 // smaller constants which may be cheaper to materialize.
5113 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5114 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5115 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5116 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5117 }
5118
5119 MVT IndexContainerVT =
5120 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5121
5122 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5123 SmallVector<SDValue> GatherIndicesLHS;
5124 for (int MaskIndex : Mask) {
5125 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5126 GatherIndicesLHS.push_back(IsLHSIndex
5127 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5128 : DAG.getUNDEF(XLenVT));
5129 }
5130 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5131 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5132 Subtarget);
5133 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5134 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5135 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5136 }
5137
5138 // By default we preserve the original operand order, and use a mask to
5139 // select LHS as true and RHS as false. However, since RVV vector selects may
5140 // feature splats but only on the LHS, we may choose to invert our mask and
5141 // instead select between RHS and LHS.
5142 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5143
5144 // Detect shuffles which can be re-expressed as vector selects; these are
5145 // shuffles in which each element in the destination is taken from an element
5146 // at the corresponding index in either source vectors.
5147 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5148 int MaskIndex = MaskIdx.value();
5149 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5150 });
5151 if (IsSelect) {
5152 // Now construct the mask that will be used by the vselect operation.
5153 SmallVector<SDValue> MaskVals;
5154 for (int MaskIndex : Mask) {
5155 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5156 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5157 }
5158
5159 if (SwapOps)
5160 std::swap(V1, V2);
5161
5162 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5163 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5164 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5165 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5166 }
5167
5168 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5169 // merged with a second vrgather.
5170 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5171 SmallVector<SDValue> MaskVals;
5172
5173 // Now construct the mask that will be used by the blended vrgather operation.
5174 // Cconstruct the appropriate indices into each vector.
5175 for (int MaskIndex : Mask) {
5176 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5177 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5178 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5179 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5180 ? MaskIndex : -1);
5181 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5182 }
5183
5184 if (SwapOps) {
5185 std::swap(V1, V2);
5186 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5187 }
5188
5189 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5190 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5191 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5192
5193 // Recursively invoke lowering for each operand if we had two
5194 // independent single source shuffles, and then combine the result via a
5195 // vselect. Note that the vselect will likely be folded back into the
5196 // second permute (vrgather, or other) by the post-isel combine.
5197 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5198 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5199 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5200}
5201
5203 // Support splats for any type. These should type legalize well.
5204 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5205 return true;
5206
5207 // Only support legal VTs for other shuffles for now.
5208 if (!isTypeLegal(VT))
5209 return false;
5210
5211 MVT SVT = VT.getSimpleVT();
5212
5213 // Not for i1 vectors.
5214 if (SVT.getScalarType() == MVT::i1)
5215 return false;
5216
5217 int Dummy1, Dummy2;
5218 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5219 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5220}
5221
5222// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5223// the exponent.
5224SDValue
5225RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 MVT VT = Op.getSimpleValueType();
5228 unsigned EltSize = VT.getScalarSizeInBits();
5229 SDValue Src = Op.getOperand(0);
5230 SDLoc DL(Op);
5231 MVT ContainerVT = VT;
5232
5233 SDValue Mask, VL;
5234 if (Op->isVPOpcode()) {
5235 Mask = Op.getOperand(1);
5236 if (VT.isFixedLengthVector())
5237 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5238 Subtarget);
5239 VL = Op.getOperand(2);
5240 }
5241
5242 // We choose FP type that can represent the value if possible. Otherwise, we
5243 // use rounding to zero conversion for correct exponent of the result.
5244 // TODO: Use f16 for i8 when possible?
5245 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5246 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5247 FloatEltVT = MVT::f32;
5248 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5249
5250 // Legal types should have been checked in the RISCVTargetLowering
5251 // constructor.
5252 // TODO: Splitting may make sense in some cases.
5253 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5254 "Expected legal float type!");
5255
5256 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5257 // The trailing zero count is equal to log2 of this single bit value.
5258 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5259 SDValue Neg = DAG.getNegative(Src, DL, VT);
5260 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5261 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5262 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5263 Src, Mask, VL);
5264 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5265 }
5266
5267 // We have a legal FP type, convert to it.
5268 SDValue FloatVal;
5269 if (FloatVT.bitsGT(VT)) {
5270 if (Op->isVPOpcode())
5271 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5272 else
5273 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5274 } else {
5275 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5276 if (VT.isFixedLengthVector()) {
5277 ContainerVT = getContainerForFixedLengthVector(VT);
5278 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5279 }
5280 if (!Op->isVPOpcode())
5281 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5282 SDValue RTZRM =
5284 MVT ContainerFloatVT =
5285 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5286 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5287 Src, Mask, RTZRM, VL);
5288 if (VT.isFixedLengthVector())
5289 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5290 }
5291 // Bitcast to integer and shift the exponent to the LSB.
5292 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5293 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5294 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5295
5296 SDValue Exp;
5297 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5298 if (Op->isVPOpcode()) {
5299 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5300 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5301 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5302 } else {
5303 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5304 DAG.getConstant(ShiftAmt, DL, IntVT));
5305 if (IntVT.bitsLT(VT))
5306 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5307 else if (IntVT.bitsGT(VT))
5308 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5309 }
5310
5311 // The exponent contains log2 of the value in biased form.
5312 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5313 // For trailing zeros, we just need to subtract the bias.
5314 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5315 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5316 DAG.getConstant(ExponentBias, DL, VT));
5317 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5318 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5319 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5320
5321 // For leading zeros, we need to remove the bias and convert from log2 to
5322 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5323 unsigned Adjust = ExponentBias + (EltSize - 1);
5324 SDValue Res;
5325 if (Op->isVPOpcode())
5326 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5327 Mask, VL);
5328 else
5329 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5330
5331 // The above result with zero input equals to Adjust which is greater than
5332 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5333 if (Op.getOpcode() == ISD::CTLZ)
5334 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5335 else if (Op.getOpcode() == ISD::VP_CTLZ)
5336 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5337 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5338 return Res;
5339}
5340
5341// While RVV has alignment restrictions, we should always be able to load as a
5342// legal equivalently-sized byte-typed vector instead. This method is
5343// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5344// the load is already correctly-aligned, it returns SDValue().
5345SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 auto *Load = cast<LoadSDNode>(Op);
5348 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5349
5351 Load->getMemoryVT(),
5352 *Load->getMemOperand()))
5353 return SDValue();
5354
5355 SDLoc DL(Op);
5356 MVT VT = Op.getSimpleValueType();
5357 unsigned EltSizeBits = VT.getScalarSizeInBits();
5358 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5359 "Unexpected unaligned RVV load type");
5360 MVT NewVT =
5361 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5362 assert(NewVT.isValid() &&
5363 "Expecting equally-sized RVV vector types to be legal");
5364 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5365 Load->getPointerInfo(), Load->getOriginalAlign(),
5366 Load->getMemOperand()->getFlags());
5367 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5368}
5369
5370// While RVV has alignment restrictions, we should always be able to store as a
5371// legal equivalently-sized byte-typed vector instead. This method is
5372// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5373// returns SDValue() if the store is already correctly aligned.
5374SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5375 SelectionDAG &DAG) const {
5376 auto *Store = cast<StoreSDNode>(Op);
5377 assert(Store && Store->getValue().getValueType().isVector() &&
5378 "Expected vector store");
5379
5381 Store->getMemoryVT(),
5382 *Store->getMemOperand()))
5383 return SDValue();
5384
5385 SDLoc DL(Op);
5386 SDValue StoredVal = Store->getValue();
5387 MVT VT = StoredVal.getSimpleValueType();
5388 unsigned EltSizeBits = VT.getScalarSizeInBits();
5389 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5390 "Unexpected unaligned RVV store type");
5391 MVT NewVT =
5392 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5393 assert(NewVT.isValid() &&
5394 "Expecting equally-sized RVV vector types to be legal");
5395 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5396 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5397 Store->getPointerInfo(), Store->getOriginalAlign(),
5398 Store->getMemOperand()->getFlags());
5399}
5400
5402 const RISCVSubtarget &Subtarget) {
5403 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5404
5405 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5406
5407 // All simm32 constants should be handled by isel.
5408 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5409 // this check redundant, but small immediates are common so this check
5410 // should have better compile time.
5411 if (isInt<32>(Imm))
5412 return Op;
5413
5414 // We only need to cost the immediate, if constant pool lowering is enabled.
5415 if (!Subtarget.useConstantPoolForLargeInts())
5416 return Op;
5417
5419 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5420 return Op;
5421
5422 // Optimizations below are disabled for opt size. If we're optimizing for
5423 // size, use a constant pool.
5424 if (DAG.shouldOptForSize())
5425 return SDValue();
5426
5427 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5428 // that if it will avoid a constant pool.
5429 // It will require an extra temporary register though.
5430 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5431 // low and high 32 bits are the same and bit 31 and 63 are set.
5432 unsigned ShiftAmt, AddOpc;
5433 RISCVMatInt::InstSeq SeqLo =
5434 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5435 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5436 return Op;
5437
5438 return SDValue();
5439}
5440
5442 const RISCVSubtarget &Subtarget) {
5443 SDLoc dl(Op);
5444 AtomicOrdering FenceOrdering =
5445 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5446 SyncScope::ID FenceSSID =
5447 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5448
5449 if (Subtarget.hasStdExtZtso()) {
5450 // The only fence that needs an instruction is a sequentially-consistent
5451 // cross-thread fence.
5452 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5453 FenceSSID == SyncScope::System)
5454 return Op;
5455
5456 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5457 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5458 }
5459
5460 // singlethread fences only synchronize with signal handlers on the same
5461 // thread and thus only need to preserve instruction order, not actually
5462 // enforce memory ordering.
5463 if (FenceSSID == SyncScope::SingleThread)
5464 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5465 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5466
5467 return Op;
5468}
5469
5471 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5472 "Unexpected custom legalisation");
5473
5474 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5475 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5476 SDLoc DL(Op);
5477 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5478 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5479 SDValue Result =
5480 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5481
5482 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5483 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5484 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5485 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5486 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5487 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5488 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5489}
5490
5492 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5493 "Unexpected custom legalisation");
5494
5495 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5496 // sign extend allows overflow of the lower 32 bits to be detected on
5497 // the promoted size.
5498 SDLoc DL(Op);
5499 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5500 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5501 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5502 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5503}
5504
5505// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5507 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5508 "Unexpected custom legalisation");
5509 if (isa<ConstantSDNode>(Op.getOperand(1)))
5510 return SDValue();
5511
5512 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5513 SDLoc DL(Op);
5514 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5515 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5516 SDValue WideOp =
5517 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5518 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5519 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5520 DAG.getValueType(MVT::i32));
5521 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5522 ISD::SETNE);
5523 return DAG.getMergeValues({Res, Ovf}, DL);
5524}
5525
5526// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5528 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5529 "Unexpected custom legalisation");
5530 SDLoc DL(Op);
5531 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5532 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5533 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5534 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5535 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5536 DAG.getValueType(MVT::i32));
5537 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5538 ISD::SETNE);
5539 return DAG.getMergeValues({Res, Ovf}, DL);
5540}
5541
5542SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5543 SelectionDAG &DAG) const {
5544 SDLoc DL(Op);
5545 MVT VT = Op.getSimpleValueType();
5546 MVT XLenVT = Subtarget.getXLenVT();
5547 unsigned Check = Op.getConstantOperandVal(1);
5548 unsigned TDCMask = 0;
5549 if (Check & fcSNan)
5550 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5551 if (Check & fcQNan)
5552 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5553 if (Check & fcPosInf)
5555 if (Check & fcNegInf)
5557 if (Check & fcPosNormal)
5559 if (Check & fcNegNormal)
5561 if (Check & fcPosSubnormal)
5563 if (Check & fcNegSubnormal)
5565 if (Check & fcPosZero)
5566 TDCMask |= RISCV::FPMASK_Positive_Zero;
5567 if (Check & fcNegZero)
5568 TDCMask |= RISCV::FPMASK_Negative_Zero;
5569
5570 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5571
5572 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5573
5574 if (VT.isVector()) {
5575 SDValue Op0 = Op.getOperand(0);
5576 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5577
5578 if (VT.isScalableVector()) {
5580 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5581 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5582 Mask = Op.getOperand(2);
5583 VL = Op.getOperand(3);
5584 }
5585 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5586 VL, Op->getFlags());
5587 if (IsOneBitMask)
5588 return DAG.getSetCC(DL, VT, FPCLASS,
5589 DAG.getConstant(TDCMask, DL, DstVT),
5591 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5592 DAG.getConstant(TDCMask, DL, DstVT));
5593 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5594 ISD::SETNE);
5595 }
5596
5597 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5599 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5600 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5601 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5602 Mask = Op.getOperand(2);
5603 MVT MaskContainerVT =
5604 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5605 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5606 VL = Op.getOperand(3);
5607 }
5608 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5609
5610 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5611 Mask, VL, Op->getFlags());
5612
5613 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5614 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5615 if (IsOneBitMask) {
5616 SDValue VMSEQ =
5617 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5618 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5619 DAG.getUNDEF(ContainerVT), Mask, VL});
5620 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5621 }
5622 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5623 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5624
5625 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5626 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5627 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5628
5629 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5630 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5631 DAG.getUNDEF(ContainerVT), Mask, VL});
5632 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5633 }
5634
5635 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5636 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5637 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5639 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5640}
5641
5642// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5643// operations propagate nans.
5645 const RISCVSubtarget &Subtarget) {
5646 SDLoc DL(Op);
5647 MVT VT = Op.getSimpleValueType();
5648
5649 SDValue X = Op.getOperand(0);
5650 SDValue Y = Op.getOperand(1);
5651
5652 if (!VT.isVector()) {
5653 MVT XLenVT = Subtarget.getXLenVT();
5654
5655 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5656 // ensures that when one input is a nan, the other will also be a nan
5657 // allowing the nan to propagate. If both inputs are nan, this will swap the
5658 // inputs which is harmless.
5659
5660 SDValue NewY = Y;
5661 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5662 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5663 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5664 }
5665
5666 SDValue NewX = X;
5667 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5668 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5669 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5670 }
5671
5672 unsigned Opc =
5673 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5674 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5675 }
5676
5677 // Check no NaNs before converting to fixed vector scalable.
5678 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5679 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5680
5681 MVT ContainerVT = VT;
5682 if (VT.isFixedLengthVector()) {
5683 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5684 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5685 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5686 }
5687
5688 SDValue Mask, VL;
5689 if (Op->isVPOpcode()) {
5690 Mask = Op.getOperand(2);
5691 if (VT.isFixedLengthVector())
5692 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5693 Subtarget);
5694 VL = Op.getOperand(3);
5695 } else {
5696 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5697 }
5698
5699 SDValue NewY = Y;
5700 if (!XIsNeverNan) {
5701 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5702 {X, X, DAG.getCondCode(ISD::SETOEQ),
5703 DAG.getUNDEF(ContainerVT), Mask, VL});
5704 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5705 DAG.getUNDEF(ContainerVT), VL);
5706 }
5707
5708 SDValue NewX = X;
5709 if (!YIsNeverNan) {
5710 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5711 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5712 DAG.getUNDEF(ContainerVT), Mask, VL});
5713 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5714 DAG.getUNDEF(ContainerVT), VL);
5715 }
5716
5717 unsigned Opc =
5718 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5721 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5722 DAG.getUNDEF(ContainerVT), Mask, VL);
5723 if (VT.isFixedLengthVector())
5724 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5725 return Res;
5726}
5727
5728/// Get a RISC-V target specified VL op for a given SDNode.
5729static unsigned getRISCVVLOp(SDValue Op) {
5730#define OP_CASE(NODE) \
5731 case ISD::NODE: \
5732 return RISCVISD::NODE##_VL;
5733#define VP_CASE(NODE) \
5734 case ISD::VP_##NODE: \
5735 return RISCVISD::NODE##_VL;
5736 // clang-format off
5737 switch (Op.getOpcode()) {
5738 default:
5739 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5740 OP_CASE(ADD)
5741 OP_CASE(SUB)
5742 OP_CASE(MUL)
5743 OP_CASE(MULHS)
5744 OP_CASE(MULHU)
5745 OP_CASE(SDIV)
5746 OP_CASE(SREM)
5747 OP_CASE(UDIV)
5748 OP_CASE(UREM)
5749 OP_CASE(SHL)
5750 OP_CASE(SRA)
5751 OP_CASE(SRL)
5752 OP_CASE(ROTL)
5753 OP_CASE(ROTR)
5754 OP_CASE(BSWAP)
5755 OP_CASE(CTTZ)
5756 OP_CASE(CTLZ)
5757 OP_CASE(CTPOP)
5758 OP_CASE(BITREVERSE)
5759 OP_CASE(SADDSAT)
5760 OP_CASE(UADDSAT)
5761 OP_CASE(SSUBSAT)
5762 OP_CASE(USUBSAT)
5763 OP_CASE(AVGFLOORU)
5764 OP_CASE(AVGCEILU)
5765 OP_CASE(FADD)
5766 OP_CASE(FSUB)
5767 OP_CASE(FMUL)
5768 OP_CASE(FDIV)
5769 OP_CASE(FNEG)
5770 OP_CASE(FABS)
5771 OP_CASE(FSQRT)
5772 OP_CASE(SMIN)
5773 OP_CASE(SMAX)
5774 OP_CASE(UMIN)
5775 OP_CASE(UMAX)
5776 OP_CASE(STRICT_FADD)
5777 OP_CASE(STRICT_FSUB)
5778 OP_CASE(STRICT_FMUL)
5779 OP_CASE(STRICT_FDIV)
5780 OP_CASE(STRICT_FSQRT)
5781 VP_CASE(ADD) // VP_ADD
5782 VP_CASE(SUB) // VP_SUB
5783 VP_CASE(MUL) // VP_MUL
5784 VP_CASE(SDIV) // VP_SDIV
5785 VP_CASE(SREM) // VP_SREM
5786 VP_CASE(UDIV) // VP_UDIV
5787 VP_CASE(UREM) // VP_UREM
5788 VP_CASE(SHL) // VP_SHL
5789 VP_CASE(FADD) // VP_FADD
5790 VP_CASE(FSUB) // VP_FSUB
5791 VP_CASE(FMUL) // VP_FMUL
5792 VP_CASE(FDIV) // VP_FDIV
5793 VP_CASE(FNEG) // VP_FNEG
5794 VP_CASE(FABS) // VP_FABS
5795 VP_CASE(SMIN) // VP_SMIN
5796 VP_CASE(SMAX) // VP_SMAX
5797 VP_CASE(UMIN) // VP_UMIN
5798 VP_CASE(UMAX) // VP_UMAX
5799 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5800 VP_CASE(SETCC) // VP_SETCC
5801 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5802 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5803 VP_CASE(BITREVERSE) // VP_BITREVERSE
5804 VP_CASE(SADDSAT) // VP_SADDSAT
5805 VP_CASE(UADDSAT) // VP_UADDSAT
5806 VP_CASE(SSUBSAT) // VP_SSUBSAT
5807 VP_CASE(USUBSAT) // VP_USUBSAT
5808 VP_CASE(BSWAP) // VP_BSWAP
5809 VP_CASE(CTLZ) // VP_CTLZ
5810 VP_CASE(CTTZ) // VP_CTTZ
5811 VP_CASE(CTPOP) // VP_CTPOP
5813 case ISD::VP_CTLZ_ZERO_UNDEF:
5814 return RISCVISD::CTLZ_VL;
5816 case ISD::VP_CTTZ_ZERO_UNDEF:
5817 return RISCVISD::CTTZ_VL;
5818 case ISD::FMA:
5819 case ISD::VP_FMA:
5820 return RISCVISD::VFMADD_VL;
5821 case ISD::STRICT_FMA:
5823 case ISD::AND:
5824 case ISD::VP_AND:
5825 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5826 return RISCVISD::VMAND_VL;
5827 return RISCVISD::AND_VL;
5828 case ISD::OR:
5829 case ISD::VP_OR:
5830 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5831 return RISCVISD::VMOR_VL;
5832 return RISCVISD::OR_VL;
5833 case ISD::XOR:
5834 case ISD::VP_XOR:
5835 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5836 return RISCVISD::VMXOR_VL;
5837 return RISCVISD::XOR_VL;
5838 case ISD::VP_SELECT:
5839 case ISD::VP_MERGE:
5840 return RISCVISD::VMERGE_VL;
5841 case ISD::VP_ASHR:
5842 return RISCVISD::SRA_VL;
5843 case ISD::VP_LSHR:
5844 return RISCVISD::SRL_VL;
5845 case ISD::VP_SQRT:
5846 return RISCVISD::FSQRT_VL;
5847 case ISD::VP_SIGN_EXTEND:
5848 return RISCVISD::VSEXT_VL;
5849 case ISD::VP_ZERO_EXTEND:
5850 return RISCVISD::VZEXT_VL;
5851 case ISD::VP_FP_TO_SINT:
5853 case ISD::VP_FP_TO_UINT:
5855 case ISD::FMINNUM:
5856 case ISD::VP_FMINNUM:
5857 return RISCVISD::VFMIN_VL;
5858 case ISD::FMAXNUM:
5859 case ISD::VP_FMAXNUM:
5860 return RISCVISD::VFMAX_VL;
5861 case ISD::LRINT:
5862 case ISD::VP_LRINT:
5863 case ISD::LLRINT:
5864 case ISD::VP_LLRINT:
5866 }
5867 // clang-format on
5868#undef OP_CASE
5869#undef VP_CASE
5870}
5871
5872/// Return true if a RISC-V target specified op has a merge operand.
5873static bool hasMergeOp(unsigned Opcode) {
5874 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5876 "not a RISC-V target specific op");
5878 126 &&
5881 21 &&
5882 "adding target specific op should update this function");
5883 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5884 return true;
5885 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5886 return true;
5887 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5888 return true;
5889 if (Opcode == RISCVISD::SETCC_VL)
5890 return true;
5891 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5892 return true;
5893 if (Opcode == RISCVISD::VMERGE_VL)
5894 return true;
5895 return false;
5896}
5897
5898/// Return true if a RISC-V target specified op has a mask operand.
5899static bool hasMaskOp(unsigned Opcode) {
5900 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5902 "not a RISC-V target specific op");
5904 126 &&
5907 21 &&
5908 "adding target specific op should update this function");
5909 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5910 return true;
5911 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5912 return true;
5913 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5915 return true;
5916 return false;
5917}
5918
5920 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5921 SDLoc DL(Op);
5922
5925
5926 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5927 if (!Op.getOperand(j).getValueType().isVector()) {
5928 LoOperands[j] = Op.getOperand(j);
5929 HiOperands[j] = Op.getOperand(j);
5930 continue;
5931 }
5932 std::tie(LoOperands[j], HiOperands[j]) =
5933 DAG.SplitVector(Op.getOperand(j), DL);
5934 }
5935
5936 SDValue LoRes =
5937 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5938 SDValue HiRes =
5939 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5940
5941 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5942}
5943
5945 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5946 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5947 SDLoc DL(Op);
5948
5951
5952 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5953 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5954 std::tie(LoOperands[j], HiOperands[j]) =
5955 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5956 continue;
5957 }
5958 if (!Op.getOperand(j).getValueType().isVector()) {
5959 LoOperands[j] = Op.getOperand(j);
5960 HiOperands[j] = Op.getOperand(j);
5961 continue;
5962 }
5963 std::tie(LoOperands[j], HiOperands[j]) =
5964 DAG.SplitVector(Op.getOperand(j), DL);
5965 }
5966
5967 SDValue LoRes =
5968 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5969 SDValue HiRes =
5970 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5971
5972 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5973}
5974
5976 SDLoc DL(Op);
5977
5978 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5979 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5980 auto [EVLLo, EVLHi] =
5981 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5982
5983 SDValue ResLo =
5984 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5985 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5986 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5987 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5988}
5989
5991
5992 assert(Op->isStrictFPOpcode());
5993
5994 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5995
5996 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5997 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5998
5999 SDLoc DL(Op);
6000
6003
6004 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6005 if (!Op.getOperand(j).getValueType().isVector()) {
6006 LoOperands[j] = Op.getOperand(j);
6007 HiOperands[j] = Op.getOperand(j);
6008 continue;
6009 }
6010 std::tie(LoOperands[j], HiOperands[j]) =
6011 DAG.SplitVector(Op.getOperand(j), DL);
6012 }
6013
6014 SDValue LoRes =
6015 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6016 HiOperands[0] = LoRes.getValue(1);
6017 SDValue HiRes =
6018 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6019
6020 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6021 LoRes.getValue(0), HiRes.getValue(0));
6022 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6023}
6024
6026 SelectionDAG &DAG) const {
6027 switch (Op.getOpcode()) {
6028 default:
6029 report_fatal_error("unimplemented operand");
6030 case ISD::ATOMIC_FENCE:
6031 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6032 case ISD::GlobalAddress:
6033 return lowerGlobalAddress(Op, DAG);
6034 case ISD::BlockAddress:
6035 return lowerBlockAddress(Op, DAG);
6036 case ISD::ConstantPool:
6037 return lowerConstantPool(Op, DAG);
6038 case ISD::JumpTable:
6039 return lowerJumpTable(Op, DAG);
6041 return lowerGlobalTLSAddress(Op, DAG);
6042 case ISD::Constant:
6043 return lowerConstant(Op, DAG, Subtarget);
6044 case ISD::SELECT:
6045 return lowerSELECT(Op, DAG);
6046 case ISD::BRCOND:
6047 return lowerBRCOND(Op, DAG);
6048 case ISD::VASTART:
6049 return lowerVASTART(Op, DAG);
6050 case ISD::FRAMEADDR:
6051 return lowerFRAMEADDR(Op, DAG);
6052 case ISD::RETURNADDR:
6053 return lowerRETURNADDR(Op, DAG);
6054 case ISD::SADDO:
6055 case ISD::SSUBO:
6056 return lowerSADDO_SSUBO(Op, DAG);
6057 case ISD::SMULO:
6058 return lowerSMULO(Op, DAG);
6059 case ISD::SHL_PARTS:
6060 return lowerShiftLeftParts(Op, DAG);
6061 case ISD::SRA_PARTS:
6062 return lowerShiftRightParts(Op, DAG, true);
6063 case ISD::SRL_PARTS:
6064 return lowerShiftRightParts(Op, DAG, false);
6065 case ISD::ROTL:
6066 case ISD::ROTR:
6067 if (Op.getValueType().isFixedLengthVector()) {
6068 assert(Subtarget.hasStdExtZvkb());
6069 return lowerToScalableOp(Op, DAG);
6070 }
6071 assert(Subtarget.hasVendorXTHeadBb() &&
6072 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6073 "Unexpected custom legalization");
6074 // XTHeadBb only supports rotate by constant.
6075 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6076 return SDValue();
6077 return Op;
6078 case ISD::BITCAST: {
6079 SDLoc DL(Op);
6080 EVT VT = Op.getValueType();
6081 SDValue Op0 = Op.getOperand(0);
6082 EVT Op0VT = Op0.getValueType();
6083 MVT XLenVT = Subtarget.getXLenVT();
6084 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6085 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6086 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6087 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6088 return FPConv;
6089 }
6090 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6091 Subtarget.hasStdExtZfbfmin()) {
6092 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6093 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6094 return FPConv;
6095 }
6096 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6097 Subtarget.hasStdExtFOrZfinx()) {
6098 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6099 SDValue FPConv =
6100 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6101 return FPConv;
6102 }
6103 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6104 SDValue Lo, Hi;
6105 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6106 SDValue RetReg =
6107 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6108 return RetReg;
6109 }
6110
6111 // Consider other scalar<->scalar casts as legal if the types are legal.
6112 // Otherwise expand them.
6113 if (!VT.isVector() && !Op0VT.isVector()) {
6114 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6115 return Op;
6116 return SDValue();
6117 }
6118
6119 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6120 "Unexpected types");
6121
6122 if (VT.isFixedLengthVector()) {
6123 // We can handle fixed length vector bitcasts with a simple replacement
6124 // in isel.
6125 if (Op0VT.isFixedLengthVector())
6126 return Op;
6127 // When bitcasting from scalar to fixed-length vector, insert the scalar
6128 // into a one-element vector of the result type, and perform a vector
6129 // bitcast.
6130 if (!Op0VT.isVector()) {
6131 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6132 if (!isTypeLegal(BVT))
6133 return SDValue();
6134 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6135 DAG.getUNDEF(BVT), Op0,
6136 DAG.getVectorIdxConstant(0, DL)));
6137 }
6138 return SDValue();
6139 }
6140 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6141 // thus: bitcast the vector to a one-element vector type whose element type
6142 // is the same as the result type, and extract the first element.
6143 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6144 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6145 if (!isTypeLegal(BVT))
6146 return SDValue();
6147 SDValue BVec = DAG.getBitcast(BVT, Op0);
6148 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6149 DAG.getVectorIdxConstant(0, DL));
6150 }
6151 return SDValue();
6152 }
6154 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6156 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6158 return LowerINTRINSIC_VOID(Op, DAG);
6159 case ISD::IS_FPCLASS:
6160 return LowerIS_FPCLASS(Op, DAG);
6161 case ISD::BITREVERSE: {
6162 MVT VT = Op.getSimpleValueType();
6163 if (VT.isFixedLengthVector()) {
6164 assert(Subtarget.hasStdExtZvbb());
6165 return lowerToScalableOp(Op, DAG);
6166 }
6167 SDLoc DL(Op);
6168 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6169 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6170 // Expand bitreverse to a bswap(rev8) followed by brev8.
6171 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6172 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6173 }
6174 case ISD::TRUNCATE:
6175 // Only custom-lower vector truncates
6176 if (!Op.getSimpleValueType().isVector())
6177 return Op;
6178 return lowerVectorTruncLike(Op, DAG);
6179 case ISD::ANY_EXTEND:
6180 case ISD::ZERO_EXTEND:
6181 if (Op.getOperand(0).getValueType().isVector() &&
6182 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6183 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6184 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6185 case ISD::SIGN_EXTEND:
6186 if (Op.getOperand(0).getValueType().isVector() &&
6187 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6188 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6189 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6191 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6193 return lowerINSERT_VECTOR_ELT(Op, DAG);
6195 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6196 case ISD::SCALAR_TO_VECTOR: {
6197 MVT VT = Op.getSimpleValueType();
6198 SDLoc DL(Op);
6199 SDValue Scalar = Op.getOperand(0);
6200 if (VT.getVectorElementType() == MVT::i1) {
6201 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6202 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6203 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6204 }
6205 MVT ContainerVT = VT;
6206 if (VT.isFixedLengthVector())
6207 ContainerVT = getContainerForFixedLengthVector(VT);
6208 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6209 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6210 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6211 DAG.getUNDEF(ContainerVT), Scalar, VL);
6212 if (VT.isFixedLengthVector())
6213 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6214 return V;
6215 }
6216 case ISD::VSCALE: {
6217 MVT XLenVT = Subtarget.getXLenVT();
6218 MVT VT = Op.getSimpleValueType();
6219 SDLoc DL(Op);
6220 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6221 // We define our scalable vector types for lmul=1 to use a 64 bit known
6222 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6223 // vscale as VLENB / 8.
6224 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6225 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6226 report_fatal_error("Support for VLEN==32 is incomplete.");
6227 // We assume VLENB is a multiple of 8. We manually choose the best shift
6228 // here because SimplifyDemandedBits isn't always able to simplify it.
6229 uint64_t Val = Op.getConstantOperandVal(0);
6230 if (isPowerOf2_64(Val)) {
6231 uint64_t Log2 = Log2_64(Val);
6232 if (Log2 < 3)
6233 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6234 DAG.getConstant(3 - Log2, DL, VT));
6235 else if (Log2 > 3)
6236 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6237 DAG.getConstant(Log2 - 3, DL, XLenVT));
6238 } else if ((Val % 8) == 0) {
6239 // If the multiplier is a multiple of 8, scale it down to avoid needing
6240 // to shift the VLENB value.
6241 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6242 DAG.getConstant(Val / 8, DL, XLenVT));
6243 } else {
6244 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6245 DAG.getConstant(3, DL, XLenVT));
6246 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6247 DAG.getConstant(Val, DL, XLenVT));
6248 }
6249 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6250 }
6251 case ISD::FPOWI: {
6252 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6253 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6254 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6255 Op.getOperand(1).getValueType() == MVT::i32) {
6256 SDLoc DL(Op);
6257 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6258 SDValue Powi =
6259 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6260 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6261 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6262 }
6263 return SDValue();
6264 }
6265 case ISD::FMAXIMUM:
6266 case ISD::FMINIMUM:
6267 if (Op.getValueType() == MVT::nxv32f16 &&
6268 (Subtarget.hasVInstructionsF16Minimal() &&
6269 !Subtarget.hasVInstructionsF16()))
6270 return SplitVectorOp(Op, DAG);
6271 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6272 case ISD::FP_EXTEND: {
6273 SDLoc DL(Op);
6274 EVT VT = Op.getValueType();
6275 SDValue Op0 = Op.getOperand(0);
6276 EVT Op0VT = Op0.getValueType();
6277 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6278 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6279 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6280 SDValue FloatVal =
6281 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6282 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6283 }
6284
6285 if (!Op.getValueType().isVector())
6286 return Op;
6287 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6288 }
6289 case ISD::FP_ROUND: {
6290 SDLoc DL(Op);
6291 EVT VT = Op.getValueType();
6292 SDValue Op0 = Op.getOperand(0);
6293 EVT Op0VT = Op0.getValueType();
6294 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6295 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6296 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6297 Subtarget.hasStdExtDOrZdinx()) {
6298 SDValue FloatVal =
6299 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6300 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6301 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6302 }
6303
6304 if (!Op.getValueType().isVector())
6305 return Op;
6306 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6307 }
6310 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6311 case ISD::SINT_TO_FP:
6312 case ISD::UINT_TO_FP:
6313 if (Op.getValueType().isVector() &&
6314 Op.getValueType().getScalarType() == MVT::f16 &&
6315 (Subtarget.hasVInstructionsF16Minimal() &&
6316 !Subtarget.hasVInstructionsF16())) {
6317 if (Op.getValueType() == MVT::nxv32f16)
6318 return SplitVectorOp(Op, DAG);
6319 // int -> f32
6320 SDLoc DL(Op);
6321 MVT NVT =
6322 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6323 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6324 // f32 -> f16
6325 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6326 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6327 }
6328 [[fallthrough]];
6329 case ISD::FP_TO_SINT:
6330 case ISD::FP_TO_UINT:
6331 if (SDValue Op1 = Op.getOperand(0);
6332 Op1.getValueType().isVector() &&
6333 Op1.getValueType().getScalarType() == MVT::f16 &&
6334 (Subtarget.hasVInstructionsF16Minimal() &&
6335 !Subtarget.hasVInstructionsF16())) {
6336 if (Op1.getValueType() == MVT::nxv32f16)
6337 return SplitVectorOp(Op, DAG);
6338 // f16 -> f32
6339 SDLoc DL(Op);
6340 MVT NVT = MVT::getVectorVT(MVT::f32,
6341 Op1.getValueType().getVectorElementCount());
6342 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6343 // f32 -> int
6344 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6345 }
6346 [[fallthrough]];
6351 // RVV can only do fp<->int conversions to types half/double the size as
6352 // the source. We custom-lower any conversions that do two hops into
6353 // sequences.
6354 MVT VT = Op.getSimpleValueType();
6355 if (!VT.isVector())
6356 return Op;
6357 SDLoc DL(Op);
6358 bool IsStrict = Op->isStrictFPOpcode();
6359 SDValue Src = Op.getOperand(0 + IsStrict);
6360 MVT EltVT = VT.getVectorElementType();
6361 MVT SrcVT = Src.getSimpleValueType();
6362 MVT SrcEltVT = SrcVT.getVectorElementType();
6363 unsigned EltSize = EltVT.getSizeInBits();
6364 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6365 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6366 "Unexpected vector element types");
6367
6368 bool IsInt2FP = SrcEltVT.isInteger();
6369 // Widening conversions
6370 if (EltSize > (2 * SrcEltSize)) {
6371 if (IsInt2FP) {
6372 // Do a regular integer sign/zero extension then convert to float.
6373 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6375 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6376 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6379 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6380 if (IsStrict)
6381 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6382 Op.getOperand(0), Ext);
6383 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6384 }
6385 // FP2Int
6386 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6387 // Do one doubling fp_extend then complete the operation by converting
6388 // to int.
6389 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6390 if (IsStrict) {
6391 auto [FExt, Chain] =
6392 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6393 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6394 }
6395 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6396 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6397 }
6398
6399 // Narrowing conversions
6400 if (SrcEltSize > (2 * EltSize)) {
6401 if (IsInt2FP) {
6402 // One narrowing int_to_fp, then an fp_round.
6403 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6404 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6405 if (IsStrict) {
6406 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6407 DAG.getVTList(InterimFVT, MVT::Other),
6408 Op.getOperand(0), Src);
6409 SDValue Chain = Int2FP.getValue(1);
6410 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6411 }
6412 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6413 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6414 }
6415 // FP2Int
6416 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6417 // representable by the integer, the result is poison.
6418 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6420 if (IsStrict) {
6421 SDValue FP2Int =
6422 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6423 Op.getOperand(0), Src);
6424 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6425 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6426 }
6427 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6428 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6429 }
6430
6431 // Scalable vectors can exit here. Patterns will handle equally-sized
6432 // conversions halving/doubling ones.
6433 if (!VT.isFixedLengthVector())
6434 return Op;
6435
6436 // For fixed-length vectors we lower to a custom "VL" node.
6437 unsigned RVVOpc = 0;
6438 switch (Op.getOpcode()) {
6439 default:
6440 llvm_unreachable("Impossible opcode");
6441 case ISD::FP_TO_SINT:
6443 break;
6444 case ISD::FP_TO_UINT:
6446 break;
6447 case ISD::SINT_TO_FP:
6448 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6449 break;
6450 case ISD::UINT_TO_FP:
6451 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6452 break;
6455 break;
6458 break;
6461 break;
6464 break;
6465 }
6466
6467 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6468 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6469 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6470 "Expected same element count");
6471
6472 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6473
6474 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6475 if (IsStrict) {
6476 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6477 Op.getOperand(0), Src, Mask, VL);
6478 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6479 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6480 }
6481 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6482 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6483 }
6486 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6487 case ISD::FP_TO_BF16: {
6488 // Custom lower to ensure the libcall return is passed in an FPR on hard
6489 // float ABIs.
6490 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6491 SDLoc DL(Op);
6492 MakeLibCallOptions CallOptions;
6493 RTLIB::Libcall LC =
6494 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6495 SDValue Res =
6496 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6497 if (Subtarget.is64Bit() && !RV64LegalI32)
6498 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6499 return DAG.getBitcast(MVT::i32, Res);
6500 }
6501 case ISD::BF16_TO_FP: {
6502 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6503 MVT VT = Op.getSimpleValueType();
6504 SDLoc DL(Op);
6505 Op = DAG.getNode(
6506 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6507 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6508 SDValue Res = Subtarget.is64Bit()
6509 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6510 : DAG.getBitcast(MVT::f32, Op);
6511 // fp_extend if the target VT is bigger than f32.
6512 if (VT != MVT::f32)
6513 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6514 return Res;
6515 }
6516 case ISD::FP_TO_FP16: {
6517 // Custom lower to ensure the libcall return is passed in an FPR on hard
6518 // float ABIs.
6519 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6520 SDLoc DL(Op);
6521 MakeLibCallOptions CallOptions;
6522 RTLIB::Libcall LC =
6523 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6524 SDValue Res =
6525 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6526 if (Subtarget.is64Bit() && !RV64LegalI32)
6527 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6528 return DAG.getBitcast(MVT::i32, Res);
6529 }
6530 case ISD::FP16_TO_FP: {
6531 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6532 // float ABIs.
6533 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6534 SDLoc DL(Op);
6535 MakeLibCallOptions CallOptions;
6536 SDValue Arg = Subtarget.is64Bit()
6537 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6538 Op.getOperand(0))
6539 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6540 SDValue Res =
6541 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6542 .first;
6543 return Res;
6544 }
6545 case ISD::FTRUNC:
6546 case ISD::FCEIL:
6547 case ISD::FFLOOR:
6548 case ISD::FNEARBYINT:
6549 case ISD::FRINT:
6550 case ISD::FROUND:
6551 case ISD::FROUNDEVEN:
6552 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6553 case ISD::LRINT:
6554 case ISD::LLRINT:
6555 return lowerVectorXRINT(Op, DAG, Subtarget);
6556 case ISD::VECREDUCE_ADD:
6561 return lowerVECREDUCE(Op, DAG);
6562 case ISD::VECREDUCE_AND:
6563 case ISD::VECREDUCE_OR:
6564 case ISD::VECREDUCE_XOR:
6565 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6566 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6567 return lowerVECREDUCE(Op, DAG);
6574 return lowerFPVECREDUCE(Op, DAG);
6575 case ISD::VP_REDUCE_ADD:
6576 case ISD::VP_REDUCE_UMAX:
6577 case ISD::VP_REDUCE_SMAX:
6578 case ISD::VP_REDUCE_UMIN:
6579 case ISD::VP_REDUCE_SMIN:
6580 case ISD::VP_REDUCE_FADD:
6581 case ISD::VP_REDUCE_SEQ_FADD:
6582 case ISD::VP_REDUCE_FMIN:
6583 case ISD::VP_REDUCE_FMAX:
6584 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6585 (Subtarget.hasVInstructionsF16Minimal() &&
6586 !Subtarget.hasVInstructionsF16()))
6587 return SplitVectorReductionOp(Op, DAG);
6588 return lowerVPREDUCE(Op, DAG);
6589 case ISD::VP_REDUCE_AND:
6590 case ISD::VP_REDUCE_OR:
6591 case ISD::VP_REDUCE_XOR:
6592 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6593 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6594 return lowerVPREDUCE(Op, DAG);
6595 case ISD::UNDEF: {
6596 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6597 return convertFromScalableVector(Op.getSimpleValueType(),
6598 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6599 }
6601 return lowerINSERT_SUBVECTOR(Op, DAG);
6603 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6605 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6607 return lowerVECTOR_INTERLEAVE(Op, DAG);
6608 case ISD::STEP_VECTOR:
6609 return lowerSTEP_VECTOR(Op, DAG);
6611 return lowerVECTOR_REVERSE(Op, DAG);
6612 case ISD::VECTOR_SPLICE:
6613 return lowerVECTOR_SPLICE(Op, DAG);
6614 case ISD::BUILD_VECTOR:
6615 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6616 case ISD::SPLAT_VECTOR:
6617 if (Op.getValueType().getScalarType() == MVT::f16 &&
6618 (Subtarget.hasVInstructionsF16Minimal() &&
6619 !Subtarget.hasVInstructionsF16())) {
6620 if (Op.getValueType() == MVT::nxv32f16)
6621 return SplitVectorOp(Op, DAG);
6622 SDLoc DL(Op);
6623 SDValue NewScalar =
6624 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6625 SDValue NewSplat = DAG.getNode(
6627 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6628 NewScalar);
6629 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6630 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6631 }
6632 if (Op.getValueType().getVectorElementType() == MVT::i1)
6633 return lowerVectorMaskSplat(Op, DAG);
6634 return SDValue();
6636 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6637 case ISD::CONCAT_VECTORS: {
6638 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6639 // better than going through the stack, as the default expansion does.
6640 SDLoc DL(Op);
6641 MVT VT = Op.getSimpleValueType();
6642 MVT ContainerVT = VT;
6643 if (VT.isFixedLengthVector())
6644 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6645
6646 // Recursively split concat_vectors with more than 2 operands:
6647 //
6648 // concat_vector op1, op2, op3, op4
6649 // ->
6650 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6651 //
6652 // This reduces the length of the chain of vslideups and allows us to
6653 // perform the vslideups at a smaller LMUL, limited to MF2.
6654 if (Op.getNumOperands() > 2 &&
6655 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6656 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6658 size_t HalfNumOps = Op.getNumOperands() / 2;
6659 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6660 Op->ops().take_front(HalfNumOps));
6661 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6662 Op->ops().drop_front(HalfNumOps));
6663 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6664 }
6665
6666 unsigned NumOpElts =
6667 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6668 SDValue Vec = DAG.getUNDEF(VT);
6669 for (const auto &OpIdx : enumerate(Op->ops())) {
6670 SDValue SubVec = OpIdx.value();
6671 // Don't insert undef subvectors.
6672 if (SubVec.isUndef())
6673 continue;
6674 Vec =
6675 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6676 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6677 }
6678 return Vec;
6679 }
6680 case ISD::LOAD:
6681 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6682 return V;
6683 if (Op.getValueType().isFixedLengthVector())
6684 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6685 return Op;
6686 case ISD::STORE:
6687 if (auto V = expandUnalignedRVVStore(Op, DAG))
6688 return V;
6689 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6690 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6691 return Op;
6692 case ISD::MLOAD:
6693 case ISD::VP_LOAD:
6694 return lowerMaskedLoad(Op, DAG);
6695 case ISD::MSTORE:
6696 case ISD::VP_STORE:
6697 return lowerMaskedStore(Op, DAG);
6698 case ISD::SELECT_CC: {
6699 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6700 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6701 // into separate SETCC+SELECT just like LegalizeDAG.
6702 SDValue Tmp1 = Op.getOperand(0);
6703 SDValue Tmp2 = Op.getOperand(1);
6704 SDValue True = Op.getOperand(2);
6705 SDValue False = Op.getOperand(3);
6706 EVT VT = Op.getValueType();
6707 SDValue CC = Op.getOperand(4);
6708 EVT CmpVT = Tmp1.getValueType();
6709 EVT CCVT =
6710 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6711 SDLoc DL(Op);
6712 SDValue Cond =
6713 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6714 return DAG.getSelect(DL, VT, Cond, True, False);
6715 }
6716 case ISD::SETCC: {
6717 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6718 if (OpVT.isScalarInteger()) {
6719 MVT VT = Op.getSimpleValueType();
6720 SDValue LHS = Op.getOperand(0);
6721 SDValue RHS = Op.getOperand(1);
6722 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6723 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6724 "Unexpected CondCode");
6725
6726 SDLoc DL(Op);
6727
6728 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6729 // convert this to the equivalent of (set(u)ge X, C+1) by using
6730 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6731 // in a register.
6732 if (isa<ConstantSDNode>(RHS)) {
6733 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6734 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6735 // If this is an unsigned compare and the constant is -1, incrementing
6736 // the constant would change behavior. The result should be false.
6737 if (CCVal == ISD::SETUGT && Imm == -1)
6738 return DAG.getConstant(0, DL, VT);
6739 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6740 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6741 SDValue SetCC = DAG.getSetCC(
6742 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6743 return DAG.getLogicalNOT(DL, SetCC, VT);
6744 }
6745 }
6746
6747 // Not a constant we could handle, swap the operands and condition code to
6748 // SETLT/SETULT.
6749 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6750 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6751 }
6752
6753 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6754 (Subtarget.hasVInstructionsF16Minimal() &&
6755 !Subtarget.hasVInstructionsF16()))
6756 return SplitVectorOp(Op, DAG);
6757
6758 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6759 }
6760 case ISD::ADD:
6761 case ISD::SUB:
6762 case ISD::MUL:
6763 case ISD::MULHS:
6764 case ISD::MULHU:
6765 case ISD::AND:
6766 case ISD::OR:
6767 case ISD::XOR:
6768 case ISD::SDIV:
6769 case ISD::SREM:
6770 case ISD::UDIV:
6771 case ISD::UREM:
6772 case ISD::BSWAP:
6773 case ISD::CTPOP:
6774 return lowerToScalableOp(Op, DAG);
6775 case ISD::SHL:
6776 case ISD::SRA:
6777 case ISD::SRL:
6778 if (Op.getSimpleValueType().isFixedLengthVector())
6779 return lowerToScalableOp(Op, DAG);
6780 // This can be called for an i32 shift amount that needs to be promoted.
6781 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6782 "Unexpected custom legalisation");
6783 return SDValue();
6784 case ISD::FADD:
6785 case ISD::FSUB:
6786 case ISD::FMUL:
6787 case ISD::FDIV:
6788 case ISD::FNEG:
6789 case ISD::FABS:
6790 case ISD::FSQRT:
6791 case ISD::FMA:
6792 case ISD::FMINNUM:
6793 case ISD::FMAXNUM:
6794 if (Op.getValueType() == MVT::nxv32f16 &&
6795 (Subtarget.hasVInstructionsF16Minimal() &&
6796 !Subtarget.hasVInstructionsF16()))
6797 return SplitVectorOp(Op, DAG);
6798 [[fallthrough]];
6799 case ISD::AVGFLOORU:
6800 case ISD::AVGCEILU:
6801 case ISD::SMIN:
6802 case ISD::SMAX:
6803 case ISD::UMIN:
6804 case ISD::UMAX:
6805 return lowerToScalableOp(Op, DAG);
6806 case ISD::UADDSAT:
6807 case ISD::USUBSAT:
6808 if (!Op.getValueType().isVector())
6809 return lowerUADDSAT_USUBSAT(Op, DAG);
6810 return lowerToScalableOp(Op, DAG);
6811 case ISD::SADDSAT:
6812 case ISD::SSUBSAT:
6813 if (!Op.getValueType().isVector())
6814 return lowerSADDSAT_SSUBSAT(Op, DAG);
6815 return lowerToScalableOp(Op, DAG);
6816 case ISD::ABDS:
6817 case ISD::ABDU: {
6818 SDLoc dl(Op);
6819 EVT VT = Op->getValueType(0);
6820 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6821 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6822 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6823
6824 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6825 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6826 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6827 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6828 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6829 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6830 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6831 }
6832 case ISD::ABS:
6833 case ISD::VP_ABS:
6834 return lowerABS(Op, DAG);
6835 case ISD::CTLZ:
6837 case ISD::CTTZ:
6839 if (Subtarget.hasStdExtZvbb())
6840 return lowerToScalableOp(Op, DAG);
6841 assert(Op.getOpcode() != ISD::CTTZ);
6842 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6843 case ISD::VSELECT:
6844 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6845 case ISD::FCOPYSIGN:
6846 if (Op.getValueType() == MVT::nxv32f16 &&
6847 (Subtarget.hasVInstructionsF16Minimal() &&
6848 !Subtarget.hasVInstructionsF16()))
6849 return SplitVectorOp(Op, DAG);
6850 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6851 case ISD::STRICT_FADD:
6852 case ISD::STRICT_FSUB:
6853 case ISD::STRICT_FMUL:
6854 case ISD::STRICT_FDIV:
6855 case ISD::STRICT_FSQRT:
6856 case ISD::STRICT_FMA:
6857 if (Op.getValueType() == MVT::nxv32f16 &&
6858 (Subtarget.hasVInstructionsF16Minimal() &&
6859 !Subtarget.hasVInstructionsF16()))
6860 return SplitStrictFPVectorOp(Op, DAG);
6861 return lowerToScalableOp(Op, DAG);
6862 case ISD::STRICT_FSETCC:
6864 return lowerVectorStrictFSetcc(Op, DAG);
6865 case ISD::STRICT_FCEIL:
6866 case ISD::STRICT_FRINT:
6867 case ISD::STRICT_FFLOOR:
6868 case ISD::STRICT_FTRUNC:
6870 case ISD::STRICT_FROUND:
6872 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6873 case ISD::MGATHER:
6874 case ISD::VP_GATHER:
6875 return lowerMaskedGather(Op, DAG);
6876 case ISD::MSCATTER:
6877 case ISD::VP_SCATTER:
6878 return lowerMaskedScatter(Op, DAG);
6879 case ISD::GET_ROUNDING:
6880 return lowerGET_ROUNDING(Op, DAG);
6881 case ISD::SET_ROUNDING:
6882 return lowerSET_ROUNDING(Op, DAG);
6883 case ISD::EH_DWARF_CFA:
6884 return lowerEH_DWARF_CFA(Op, DAG);
6885 case ISD::VP_SELECT:
6886 case ISD::VP_MERGE:
6887 case ISD::VP_ADD:
6888 case ISD::VP_SUB:
6889 case ISD::VP_MUL:
6890 case ISD::VP_SDIV:
6891 case ISD::VP_UDIV:
6892 case ISD::VP_SREM:
6893 case ISD::VP_UREM:
6894 case ISD::VP_UADDSAT:
6895 case ISD::VP_USUBSAT:
6896 case ISD::VP_SADDSAT:
6897 case ISD::VP_SSUBSAT:
6898 case ISD::VP_LRINT:
6899 case ISD::VP_LLRINT:
6900 return lowerVPOp(Op, DAG);
6901 case ISD::VP_AND:
6902 case ISD::VP_OR:
6903 case ISD::VP_XOR:
6904 return lowerLogicVPOp(Op, DAG);
6905 case ISD::VP_FADD:
6906 case ISD::VP_FSUB:
6907 case ISD::VP_FMUL:
6908 case ISD::VP_FDIV:
6909 case ISD::VP_FNEG:
6910 case ISD::VP_FABS:
6911 case ISD::VP_SQRT:
6912 case ISD::VP_FMA:
6913 case ISD::VP_FMINNUM:
6914 case ISD::VP_FMAXNUM:
6915 case ISD::VP_FCOPYSIGN:
6916 if (Op.getValueType() == MVT::nxv32f16 &&
6917 (Subtarget.hasVInstructionsF16Minimal() &&
6918 !Subtarget.hasVInstructionsF16()))
6919 return SplitVPOp(Op, DAG);
6920 [[fallthrough]];
6921 case ISD::VP_ASHR:
6922 case ISD::VP_LSHR:
6923 case ISD::VP_SHL:
6924 return lowerVPOp(Op, DAG);
6925 case ISD::VP_IS_FPCLASS:
6926 return LowerIS_FPCLASS(Op, DAG);
6927 case ISD::VP_SIGN_EXTEND:
6928 case ISD::VP_ZERO_EXTEND:
6929 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6930 return lowerVPExtMaskOp(Op, DAG);
6931 return lowerVPOp(Op, DAG);
6932 case ISD::VP_TRUNCATE:
6933 return lowerVectorTruncLike(Op, DAG);
6934 case ISD::VP_FP_EXTEND:
6935 case ISD::VP_FP_ROUND:
6936 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6937 case ISD::VP_SINT_TO_FP:
6938 case ISD::VP_UINT_TO_FP:
6939 if (Op.getValueType().isVector() &&
6940 Op.getValueType().getScalarType() == MVT::f16 &&
6941 (Subtarget.hasVInstructionsF16Minimal() &&
6942 !Subtarget.hasVInstructionsF16())) {
6943 if (Op.getValueType() == MVT::nxv32f16)
6944 return SplitVPOp(Op, DAG);
6945 // int -> f32
6946 SDLoc DL(Op);
6947 MVT NVT =
6948 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6949 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6950 // f32 -> f16
6951 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6952 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6953 }
6954 [[fallthrough]];
6955 case ISD::VP_FP_TO_SINT:
6956 case ISD::VP_FP_TO_UINT:
6957 if (SDValue Op1 = Op.getOperand(0);
6958 Op1.getValueType().isVector() &&
6959 Op1.getValueType().getScalarType() == MVT::f16 &&
6960 (Subtarget.hasVInstructionsF16Minimal() &&
6961 !Subtarget.hasVInstructionsF16())) {
6962 if (Op1.getValueType() == MVT::nxv32f16)
6963 return SplitVPOp(Op, DAG);
6964 // f16 -> f32
6965 SDLoc DL(Op);
6966 MVT NVT = MVT::getVectorVT(MVT::f32,
6967 Op1.getValueType().getVectorElementCount());
6968 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6969 // f32 -> int
6970 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6971 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6972 }
6973 return lowerVPFPIntConvOp(Op, DAG);
6974 case ISD::VP_SETCC:
6975 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6976 (Subtarget.hasVInstructionsF16Minimal() &&
6977 !Subtarget.hasVInstructionsF16()))
6978 return SplitVPOp(Op, DAG);
6979 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6980 return lowerVPSetCCMaskOp(Op, DAG);
6981 [[fallthrough]];
6982 case ISD::VP_SMIN:
6983 case ISD::VP_SMAX:
6984 case ISD::VP_UMIN:
6985 case ISD::VP_UMAX:
6986 case ISD::VP_BITREVERSE:
6987 case ISD::VP_BSWAP:
6988 return lowerVPOp(Op, DAG);
6989 case ISD::VP_CTLZ:
6990 case ISD::VP_CTLZ_ZERO_UNDEF:
6991 if (Subtarget.hasStdExtZvbb())
6992 return lowerVPOp(Op, DAG);
6993 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6994 case ISD::VP_CTTZ:
6995 case ISD::VP_CTTZ_ZERO_UNDEF:
6996 if (Subtarget.hasStdExtZvbb())
6997 return lowerVPOp(Op, DAG);
6998 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6999 case ISD::VP_CTPOP:
7000 return lowerVPOp(Op, DAG);
7001 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7002 return lowerVPStridedLoad(Op, DAG);
7003 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7004 return lowerVPStridedStore(Op, DAG);
7005 case ISD::VP_FCEIL:
7006 case ISD::VP_FFLOOR:
7007 case ISD::VP_FRINT:
7008 case ISD::VP_FNEARBYINT:
7009 case ISD::VP_FROUND:
7010 case ISD::VP_FROUNDEVEN:
7011 case ISD::VP_FROUNDTOZERO:
7012 if (Op.getValueType() == MVT::nxv32f16 &&
7013 (Subtarget.hasVInstructionsF16Minimal() &&
7014 !Subtarget.hasVInstructionsF16()))
7015 return SplitVPOp(Op, DAG);
7016 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7017 case ISD::VP_FMAXIMUM:
7018 case ISD::VP_FMINIMUM:
7019 if (Op.getValueType() == MVT::nxv32f16 &&
7020 (Subtarget.hasVInstructionsF16Minimal() &&
7021 !Subtarget.hasVInstructionsF16()))
7022 return SplitVPOp(Op, DAG);
7023 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7024 case ISD::EXPERIMENTAL_VP_SPLICE:
7025 return lowerVPSpliceExperimental(Op, DAG);
7026 case ISD::EXPERIMENTAL_VP_REVERSE:
7027 return lowerVPReverseExperimental(Op, DAG);
7028 }
7029}
7030
7032 SelectionDAG &DAG, unsigned Flags) {
7033 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7034}
7035
7037 SelectionDAG &DAG, unsigned Flags) {
7038 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7039 Flags);
7040}
7041
7043 SelectionDAG &DAG, unsigned Flags) {
7044 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7045 N->getOffset(), Flags);
7046}
7047
7049 SelectionDAG &DAG, unsigned Flags) {
7050 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7051}
7052
7053template <class NodeTy>
7054SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7055 bool IsLocal, bool IsExternWeak) const {
7056 SDLoc DL(N);
7057 EVT Ty = getPointerTy(DAG.getDataLayout());
7058
7059 // When HWASAN is used and tagging of global variables is enabled
7060 // they should be accessed via the GOT, since the tagged address of a global
7061 // is incompatible with existing code models. This also applies to non-pic
7062 // mode.
7063 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7064 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7065 if (IsLocal && !Subtarget.allowTaggedGlobals())
7066 // Use PC-relative addressing to access the symbol. This generates the
7067 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7068 // %pcrel_lo(auipc)).
7069 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7070
7071 // Use PC-relative addressing to access the GOT for this symbol, then load
7072 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7073 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7074 SDValue Load =
7075 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7081 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7082 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7083 return Load;
7084 }
7085
7086 switch (getTargetMachine().getCodeModel()) {
7087 default:
7088 report_fatal_error("Unsupported code model for lowering");
7089 case CodeModel::Small: {
7090 // Generate a sequence for accessing addresses within the first 2 GiB of
7091 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7092 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7093 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7094 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7095 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7096 }
7097 case CodeModel::Medium: {
7098 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7099 if (IsExternWeak) {
7100 // An extern weak symbol may be undefined, i.e. have value 0, which may
7101 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7102 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7103 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7104 SDValue Load =
7105 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7111 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7112 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7113 return Load;
7114 }
7115
7116 // Generate a sequence for accessing addresses within any 2GiB range within
7117 // the address space. This generates the pattern (PseudoLLA sym), which
7118 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7119 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7120 }
7121 }
7122}
7123
7124SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7125 SelectionDAG &DAG) const {
7126 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7127 assert(N->getOffset() == 0 && "unexpected offset in global node");
7128 const GlobalValue *GV = N->getGlobal();
7129 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7130}
7131
7132SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7133 SelectionDAG &DAG) const {
7134 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7135
7136 return getAddr(N, DAG);
7137}
7138
7139SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7140 SelectionDAG &DAG) const {
7141 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7142
7143 return getAddr(N, DAG);
7144}
7145
7146SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7147 SelectionDAG &DAG) const {
7148 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7149
7150 return getAddr(N, DAG);
7151}
7152
7153SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7154 SelectionDAG &DAG,
7155 bool UseGOT) const {
7156 SDLoc DL(N);
7157 EVT Ty = getPointerTy(DAG.getDataLayout());
7158 const GlobalValue *GV = N->getGlobal();
7159 MVT XLenVT = Subtarget.getXLenVT();
7160
7161 if (UseGOT) {
7162 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7163 // load the address from the GOT and add the thread pointer. This generates
7164 // the pattern (PseudoLA_TLS_IE sym), which expands to
7165 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7166 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7167 SDValue Load =
7168 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7174 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7175 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7176
7177 // Add the thread pointer.
7178 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7179 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7180 }
7181
7182 // Generate a sequence for accessing the address relative to the thread
7183 // pointer, with the appropriate adjustment for the thread pointer offset.
7184 // This generates the pattern
7185 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7186 SDValue AddrHi =
7188 SDValue AddrAdd =
7190 SDValue AddrLo =
7192
7193 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7194 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7195 SDValue MNAdd =
7196 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7197 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7198}
7199
7200SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7201 SelectionDAG &DAG) const {
7202 SDLoc DL(N);
7203 EVT Ty = getPointerTy(DAG.getDataLayout());
7204 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7205 const GlobalValue *GV = N->getGlobal();
7206
7207 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7208 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7209 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7210 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7211 SDValue Load =
7212 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7213
7214 // Prepare argument list to generate call.
7216 ArgListEntry Entry;
7217 Entry.Node = Load;
7218 Entry.Ty = CallTy;
7219 Args.push_back(Entry);
7220
7221 // Setup call to __tls_get_addr.
7223 CLI.setDebugLoc(DL)
7224 .setChain(DAG.getEntryNode())
7225 .setLibCallee(CallingConv::C, CallTy,
7226 DAG.getExternalSymbol("__tls_get_addr", Ty),
7227 std::move(Args));
7228
7229 return LowerCallTo(CLI).first;
7230}
7231
7232SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7233 SelectionDAG &DAG) const {
7234 SDLoc DL(N);
7235 EVT Ty = getPointerTy(DAG.getDataLayout());
7236 const GlobalValue *GV = N->getGlobal();
7237
7238 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7239 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7240 //
7241 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7242 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7243 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7244 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7245 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7246 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7247}
7248
7249SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7250 SelectionDAG &DAG) const {
7251 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7252 assert(N->getOffset() == 0 && "unexpected offset in global node");
7253
7254 if (DAG.getTarget().useEmulatedTLS())
7255 return LowerToTLSEmulatedModel(N, DAG);
7256
7258
7261 report_fatal_error("In GHC calling convention TLS is not supported");
7262
7263 SDValue Addr;
7264 switch (Model) {
7266 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7267 break;
7269 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7270 break;
7273 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7274 : getDynamicTLSAddr(N, DAG);
7275 break;
7276 }
7277
7278 return Addr;
7279}
7280
7281// Return true if Val is equal to (setcc LHS, RHS, CC).
7282// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7283// Otherwise, return std::nullopt.
7284static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7285 ISD::CondCode CC, SDValue Val) {
7286 assert(Val->getOpcode() == ISD::SETCC);
7287 SDValue LHS2 = Val.getOperand(0);
7288 SDValue RHS2 = Val.getOperand(1);
7289 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7290
7291 if (LHS == LHS2 && RHS == RHS2) {
7292 if (CC == CC2)
7293 return true;
7294 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7295 return false;
7296 } else if (LHS == RHS2 && RHS == LHS2) {
7298 if (CC == CC2)
7299 return true;
7300 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7301 return false;
7302 }
7303
7304 return std::nullopt;
7305}
7306
7308 const RISCVSubtarget &Subtarget) {
7309 SDValue CondV = N->getOperand(0);
7310 SDValue TrueV = N->getOperand(1);
7311 SDValue FalseV = N->getOperand(2);
7312 MVT VT = N->getSimpleValueType(0);
7313 SDLoc DL(N);
7314
7315 if (!Subtarget.hasConditionalMoveFusion()) {
7316 // (select c, -1, y) -> -c | y
7317 if (isAllOnesConstant(TrueV)) {
7318 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7319 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7320 }
7321 // (select c, y, -1) -> (c-1) | y
7322 if (isAllOnesConstant(FalseV)) {
7323 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7324 DAG.getAllOnesConstant(DL, VT));
7325 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7326 }
7327
7328 // (select c, 0, y) -> (c-1) & y
7329 if (isNullConstant(TrueV)) {
7330 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7331 DAG.getAllOnesConstant(DL, VT));
7332 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7333 }
7334 // (select c, y, 0) -> -c & y
7335 if (isNullConstant(FalseV)) {
7336 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7337 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7338 }
7339 }
7340
7341 // select c, ~x, x --> xor -c, x
7342 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7343 const APInt &TrueVal = TrueV->getAsAPIntVal();
7344 const APInt &FalseVal = FalseV->getAsAPIntVal();
7345 if (~TrueVal == FalseVal) {
7346 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7347 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7348 }
7349 }
7350
7351 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7352 // when both truev and falsev are also setcc.
7353 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7354 FalseV.getOpcode() == ISD::SETCC) {
7355 SDValue LHS = CondV.getOperand(0);
7356 SDValue RHS = CondV.getOperand(1);
7357 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7358
7359 // (select x, x, y) -> x | y
7360 // (select !x, x, y) -> x & y
7361 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7362 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7363 DAG.getFreeze(FalseV));
7364 }
7365 // (select x, y, x) -> x & y
7366 // (select !x, y, x) -> x | y
7367 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7368 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7369 DAG.getFreeze(TrueV), FalseV);
7370 }
7371 }
7372
7373 return SDValue();
7374}
7375
7376// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7377// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7378// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7379// being `0` or `-1`. In such cases we can replace `select` with `and`.
7380// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7381// than `c0`?
7382static SDValue
7384 const RISCVSubtarget &Subtarget) {
7385 if (Subtarget.hasShortForwardBranchOpt())
7386 return SDValue();
7387
7388 unsigned SelOpNo = 0;
7389 SDValue Sel = BO->getOperand(0);
7390 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7391 SelOpNo = 1;
7392 Sel = BO->getOperand(1);
7393 }
7394
7395 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7396 return SDValue();
7397
7398 unsigned ConstSelOpNo = 1;
7399 unsigned OtherSelOpNo = 2;
7400 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7401 ConstSelOpNo = 2;
7402 OtherSelOpNo = 1;
7403 }
7404 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7405 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7406 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7407 return SDValue();
7408
7409 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7410 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7411 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7412 return SDValue();
7413
7414 SDLoc DL(Sel);
7415 EVT VT = BO->getValueType(0);
7416
7417 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7418 if (SelOpNo == 1)
7419 std::swap(NewConstOps[0], NewConstOps[1]);
7420
7421 SDValue NewConstOp =
7422 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7423 if (!NewConstOp)
7424 return SDValue();
7425
7426 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7427 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7428 return SDValue();
7429
7430 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7431 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7432 if (SelOpNo == 1)
7433 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7434 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7435
7436 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7437 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7438 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7439}
7440
7441SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7442 SDValue CondV = Op.getOperand(0);
7443 SDValue TrueV = Op.getOperand(1);
7444 SDValue FalseV = Op.getOperand(2);
7445 SDLoc DL(Op);
7446 MVT VT = Op.getSimpleValueType();
7447 MVT XLenVT = Subtarget.getXLenVT();
7448
7449 // Lower vector SELECTs to VSELECTs by splatting the condition.
7450 if (VT.isVector()) {
7451 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7452 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7453 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7454 }
7455
7456 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7457 // nodes to implement the SELECT. Performing the lowering here allows for
7458 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7459 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7460 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7461 VT.isScalarInteger()) {
7462 // (select c, t, 0) -> (czero_eqz t, c)
7463 if (isNullConstant(FalseV))
7464 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7465 // (select c, 0, f) -> (czero_nez f, c)
7466 if (isNullConstant(TrueV))
7467 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7468
7469 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7470 if (TrueV.getOpcode() == ISD::AND &&
7471 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7472 return DAG.getNode(
7473 ISD::OR, DL, VT, TrueV,
7474 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7475 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7476 if (FalseV.getOpcode() == ISD::AND &&
7477 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7478 return DAG.getNode(
7479 ISD::OR, DL, VT, FalseV,
7480 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7481
7482 // Try some other optimizations before falling back to generic lowering.
7483 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7484 return V;
7485
7486 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7487 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7488 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7489 const APInt &TrueVal = TrueV->getAsAPIntVal();
7490 const APInt &FalseVal = FalseV->getAsAPIntVal();
7491 const int TrueValCost = RISCVMatInt::getIntMatCost(
7492 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7493 const int FalseValCost = RISCVMatInt::getIntMatCost(
7494 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7495 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7496 SDValue LHSVal = DAG.getConstant(
7497 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7498 SDValue RHSVal =
7499 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7500 SDValue CMOV =
7502 DL, VT, LHSVal, CondV);
7503 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7504 }
7505
7506 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7507 // Unless we have the short forward branch optimization.
7508 if (!Subtarget.hasConditionalMoveFusion())
7509 return DAG.getNode(
7510 ISD::OR, DL, VT,
7511 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7512 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7513 }
7514
7515 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7516 return V;
7517
7518 if (Op.hasOneUse()) {
7519 unsigned UseOpc = Op->use_begin()->getOpcode();
7520 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7521 SDNode *BinOp = *Op->use_begin();
7522 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7523 DAG, Subtarget)) {
7524 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7525 return lowerSELECT(NewSel, DAG);
7526 }
7527 }
7528 }
7529
7530 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7531 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7532 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7533 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7534 if (FPTV && FPFV) {
7535 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7536 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7537 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7538 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7539 DAG.getConstant(1, DL, XLenVT));
7540 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7541 }
7542 }
7543
7544 // If the condition is not an integer SETCC which operates on XLenVT, we need
7545 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7546 // (select condv, truev, falsev)
7547 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7548 if (CondV.getOpcode() != ISD::SETCC ||
7549 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7550 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7551 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7552
7553 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7554
7555 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7556 }
7557
7558 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7559 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7560 // advantage of the integer compare+branch instructions. i.e.:
7561 // (select (setcc lhs, rhs, cc), truev, falsev)
7562 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7563 SDValue LHS = CondV.getOperand(0);
7564 SDValue RHS = CondV.getOperand(1);
7565 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7566
7567 // Special case for a select of 2 constants that have a diffence of 1.
7568 // Normally this is done by DAGCombine, but if the select is introduced by
7569 // type legalization or op legalization, we miss it. Restricting to SETLT
7570 // case for now because that is what signed saturating add/sub need.
7571 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7572 // but we would probably want to swap the true/false values if the condition
7573 // is SETGE/SETLE to avoid an XORI.
7574 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7575 CCVal == ISD::SETLT) {
7576 const APInt &TrueVal = TrueV->getAsAPIntVal();
7577 const APInt &FalseVal = FalseV->getAsAPIntVal();
7578 if (TrueVal - 1 == FalseVal)
7579 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7580 if (TrueVal + 1 == FalseVal)
7581 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7582 }
7583
7584 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7585 // 1 < x ? x : 1 -> 0 < x ? x : 1
7586 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7587 RHS == TrueV && LHS == FalseV) {
7588 LHS = DAG.getConstant(0, DL, VT);
7589 // 0 <u x is the same as x != 0.
7590 if (CCVal == ISD::SETULT) {
7591 std::swap(LHS, RHS);
7592 CCVal = ISD::SETNE;
7593 }
7594 }
7595
7596 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7597 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7598 RHS == FalseV) {
7599 RHS = DAG.getConstant(0, DL, VT);
7600 }
7601
7602 SDValue TargetCC = DAG.getCondCode(CCVal);
7603
7604 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7605 // (select (setcc lhs, rhs, CC), constant, falsev)
7606 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7607 std::swap(TrueV, FalseV);
7608 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7609 }
7610
7611 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7612 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7613}
7614
7615SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7616 SDValue CondV = Op.getOperand(1);
7617 SDLoc DL(Op);
7618 MVT XLenVT = Subtarget.getXLenVT();
7619
7620 if (CondV.getOpcode() == ISD::SETCC &&
7621 CondV.getOperand(0).getValueType() == XLenVT) {
7622 SDValue LHS = CondV.getOperand(0);
7623 SDValue RHS = CondV.getOperand(1);
7624 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7625
7626 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7627
7628 SDValue TargetCC = DAG.getCondCode(CCVal);
7629 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7630 LHS, RHS, TargetCC, Op.getOperand(2));
7631 }
7632
7633 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7634 CondV, DAG.getConstant(0, DL, XLenVT),
7635 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7636}
7637
7638SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7641
7642 SDLoc DL(Op);
7643 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7645
7646 // vastart just stores the address of the VarArgsFrameIndex slot into the
7647 // memory location argument.
7648 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7649 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7650 MachinePointerInfo(SV));
7651}
7652
7653SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7654 SelectionDAG &DAG) const {
7655 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7657 MachineFrameInfo &MFI = MF.getFrameInfo();
7658 MFI.setFrameAddressIsTaken(true);
7659 Register FrameReg = RI.getFrameRegister(MF);
7660 int XLenInBytes = Subtarget.getXLen() / 8;
7661
7662 EVT VT = Op.getValueType();
7663 SDLoc DL(Op);
7664 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7665 unsigned Depth = Op.getConstantOperandVal(0);
7666 while (Depth--) {
7667 int Offset = -(XLenInBytes * 2);
7668 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7670 FrameAddr =
7671 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7672 }
7673 return FrameAddr;
7674}
7675
7676SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7677 SelectionDAG &DAG) const {
7678 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7680 MachineFrameInfo &MFI = MF.getFrameInfo();
7681 MFI.setReturnAddressIsTaken(true);
7682 MVT XLenVT = Subtarget.getXLenVT();
7683 int XLenInBytes = Subtarget.getXLen() / 8;
7684
7686 return SDValue();
7687
7688 EVT VT = Op.getValueType();
7689 SDLoc DL(Op);
7690 unsigned Depth = Op.getConstantOperandVal(0);
7691 if (Depth) {
7692 int Off = -XLenInBytes;
7693 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7694 SDValue Offset = DAG.getConstant(Off, DL, VT);
7695 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7696 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7698 }
7699
7700 // Return the value of the return address register, marking it an implicit
7701 // live-in.
7702 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7703 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7704}
7705
7706SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7707 SelectionDAG &DAG) const {
7708 SDLoc DL(Op);
7709 SDValue Lo = Op.getOperand(0);
7710 SDValue Hi = Op.getOperand(1);
7711 SDValue Shamt = Op.getOperand(2);
7712 EVT VT = Lo.getValueType();
7713
7714 // if Shamt-XLEN < 0: // Shamt < XLEN
7715 // Lo = Lo << Shamt
7716 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7717 // else:
7718 // Lo = 0
7719 // Hi = Lo << (Shamt-XLEN)
7720
7721 SDValue Zero = DAG.getConstant(0, DL, VT);
7722 SDValue One = DAG.getConstant(1, DL, VT);
7723 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7724 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7725 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7726 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7727
7728 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7729 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7730 SDValue ShiftRightLo =
7731 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7732 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7733 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7734 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7735
7736 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7737
7738 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7739 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7740
7741 SDValue Parts[2] = {Lo, Hi};
7742 return DAG.getMergeValues(Parts, DL);
7743}
7744
7745SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7746 bool IsSRA) const {
7747 SDLoc DL(Op);
7748 SDValue Lo = Op.getOperand(0);
7749 SDValue Hi = Op.getOperand(1);
7750 SDValue Shamt = Op.getOperand(2);
7751 EVT VT = Lo.getValueType();
7752
7753 // SRA expansion:
7754 // if Shamt-XLEN < 0: // Shamt < XLEN
7755 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7756 // Hi = Hi >>s Shamt
7757 // else:
7758 // Lo = Hi >>s (Shamt-XLEN);
7759 // Hi = Hi >>s (XLEN-1)
7760 //
7761 // SRL expansion:
7762 // if Shamt-XLEN < 0: // Shamt < XLEN
7763 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7764 // Hi = Hi >>u Shamt
7765 // else:
7766 // Lo = Hi >>u (Shamt-XLEN);
7767 // Hi = 0;
7768
7769 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7770
7771 SDValue Zero = DAG.getConstant(0, DL, VT);
7772 SDValue One = DAG.getConstant(1, DL, VT);
7773 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7774 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7775 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7776 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7777
7778 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7779 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7780 SDValue ShiftLeftHi =
7781 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7782 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7783 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7784 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7785 SDValue HiFalse =
7786 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7787
7788 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7789
7790 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7791 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7792
7793 SDValue Parts[2] = {Lo, Hi};
7794 return DAG.getMergeValues(Parts, DL);
7795}
7796
7797// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7798// legal equivalently-sized i8 type, so we can use that as a go-between.
7799SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7800 SelectionDAG &DAG) const {
7801 SDLoc DL(Op);
7802 MVT VT = Op.getSimpleValueType();
7803 SDValue SplatVal = Op.getOperand(0);
7804 // All-zeros or all-ones splats are handled specially.
7805 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7806 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7807 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7808 }
7809 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7810 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7811 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7812 }
7813 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7814 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7815 DAG.getConstant(1, DL, SplatVal.getValueType()));
7816 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7817 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7818 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7819}
7820
7821// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7822// illegal (currently only vXi64 RV32).
7823// FIXME: We could also catch non-constant sign-extended i32 values and lower
7824// them to VMV_V_X_VL.
7825SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7826 SelectionDAG &DAG) const {
7827 SDLoc DL(Op);
7828 MVT VecVT = Op.getSimpleValueType();
7829 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7830 "Unexpected SPLAT_VECTOR_PARTS lowering");
7831
7832 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7833 SDValue Lo = Op.getOperand(0);
7834 SDValue Hi = Op.getOperand(1);
7835
7836 MVT ContainerVT = VecVT;
7837 if (VecVT.isFixedLengthVector())
7838 ContainerVT = getContainerForFixedLengthVector(VecVT);
7839
7840 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7841
7842 SDValue Res =
7843 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7844
7845 if (VecVT.isFixedLengthVector())
7846 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7847
7848 return Res;
7849}
7850
7851// Custom-lower extensions from mask vectors by using a vselect either with 1
7852// for zero/any-extension or -1 for sign-extension:
7853// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7854// Note that any-extension is lowered identically to zero-extension.
7855SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7856 int64_t ExtTrueVal) const {
7857 SDLoc DL(Op);
7858 MVT VecVT = Op.getSimpleValueType();
7859 SDValue Src = Op.getOperand(0);
7860 // Only custom-lower extensions from mask types
7861 assert(Src.getValueType().isVector() &&
7862 Src.getValueType().getVectorElementType() == MVT::i1);
7863
7864 if (VecVT.isScalableVector()) {
7865 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7866 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7867 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7868 }
7869
7870 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7871 MVT I1ContainerVT =
7872 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7873
7874 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7875
7876 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7877
7878 MVT XLenVT = Subtarget.getXLenVT();
7879 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7880 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7881
7882 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7883 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7884 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7885 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7886 SDValue Select =
7887 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7888 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7889
7890 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7891}
7892
7893SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7894 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7895 MVT ExtVT = Op.getSimpleValueType();
7896 // Only custom-lower extensions from fixed-length vector types.
7897 if (!ExtVT.isFixedLengthVector())
7898 return Op;
7899 MVT VT = Op.getOperand(0).getSimpleValueType();
7900 // Grab the canonical container type for the extended type. Infer the smaller
7901 // type from that to ensure the same number of vector elements, as we know
7902 // the LMUL will be sufficient to hold the smaller type.
7903 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7904 // Get the extended container type manually to ensure the same number of
7905 // vector elements between source and dest.
7906 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7907 ContainerExtVT.getVectorElementCount());
7908
7909 SDValue Op1 =
7910 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7911
7912 SDLoc DL(Op);
7913 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7914
7915 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7916
7917 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7918}
7919
7920// Custom-lower truncations from vectors to mask vectors by using a mask and a
7921// setcc operation:
7922// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7923SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7924 SelectionDAG &DAG) const {
7925 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7926 SDLoc DL(Op);
7927 EVT MaskVT = Op.getValueType();
7928 // Only expect to custom-lower truncations to mask types
7929 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7930 "Unexpected type for vector mask lowering");
7931 SDValue Src = Op.getOperand(0);
7932 MVT VecVT = Src.getSimpleValueType();
7933 SDValue Mask, VL;
7934 if (IsVPTrunc) {
7935 Mask = Op.getOperand(1);
7936 VL = Op.getOperand(2);
7937 }
7938 // If this is a fixed vector, we need to convert it to a scalable vector.
7939 MVT ContainerVT = VecVT;
7940
7941 if (VecVT.isFixedLengthVector()) {
7942 ContainerVT = getContainerForFixedLengthVector(VecVT);
7943 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7944 if (IsVPTrunc) {
7945 MVT MaskContainerVT =
7946 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7947 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7948 }
7949 }
7950
7951 if (!IsVPTrunc) {
7952 std::tie(Mask, VL) =
7953 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7954 }
7955
7956 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7957 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7958
7959 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7960 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7961 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7962 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7963
7964 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7965 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7966 DAG.getUNDEF(ContainerVT), Mask, VL);
7967 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7968 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7969 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7970 if (MaskVT.isFixedLengthVector())
7971 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7972 return Trunc;
7973}
7974
7975SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7976 SelectionDAG &DAG) const {
7977 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7978 SDLoc DL(Op);
7979
7980 MVT VT = Op.getSimpleValueType();
7981 // Only custom-lower vector truncates
7982 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7983
7984 // Truncates to mask types are handled differently
7985 if (VT.getVectorElementType() == MVT::i1)
7986 return lowerVectorMaskTruncLike(Op, DAG);
7987
7988 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7989 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7990 // truncate by one power of two at a time.
7991 MVT DstEltVT = VT.getVectorElementType();
7992
7993 SDValue Src = Op.getOperand(0);
7994 MVT SrcVT = Src.getSimpleValueType();
7995 MVT SrcEltVT = SrcVT.getVectorElementType();
7996
7997 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7998 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7999 "Unexpected vector truncate lowering");
8000
8001 MVT ContainerVT = SrcVT;
8002 SDValue Mask, VL;
8003 if (IsVPTrunc) {
8004 Mask = Op.getOperand(1);
8005 VL = Op.getOperand(2);
8006 }
8007 if (SrcVT.isFixedLengthVector()) {
8008 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8009 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8010 if (IsVPTrunc) {
8011 MVT MaskVT = getMaskTypeFor(ContainerVT);
8012 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8013 }
8014 }
8015
8016 SDValue Result = Src;
8017 if (!IsVPTrunc) {
8018 std::tie(Mask, VL) =
8019 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8020 }
8021
8022 LLVMContext &Context = *DAG.getContext();
8023 const ElementCount Count = ContainerVT.getVectorElementCount();
8024 do {
8025 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8026 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8027 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8028 Mask, VL);
8029 } while (SrcEltVT != DstEltVT);
8030
8031 if (SrcVT.isFixedLengthVector())
8032 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8033
8034 return Result;
8035}
8036
8037SDValue
8038RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8039 SelectionDAG &DAG) const {
8040 SDLoc DL(Op);
8041 SDValue Chain = Op.getOperand(0);
8042 SDValue Src = Op.getOperand(1);
8043 MVT VT = Op.getSimpleValueType();
8044 MVT SrcVT = Src.getSimpleValueType();
8045 MVT ContainerVT = VT;
8046 if (VT.isFixedLengthVector()) {
8047 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8048 ContainerVT =
8049 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8050 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8051 }
8052
8053 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8054
8055 // RVV can only widen/truncate fp to types double/half the size as the source.
8056 if ((VT.getVectorElementType() == MVT::f64 &&
8057 SrcVT.getVectorElementType() == MVT::f16) ||
8058 (VT.getVectorElementType() == MVT::f16 &&
8059 SrcVT.getVectorElementType() == MVT::f64)) {
8060 // For double rounding, the intermediate rounding should be round-to-odd.
8061 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8064 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8065 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8066 Chain, Src, Mask, VL);
8067 Chain = Src.getValue(1);
8068 }
8069
8070 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8073 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8074 Chain, Src, Mask, VL);
8075 if (VT.isFixedLengthVector()) {
8076 // StrictFP operations have two result values. Their lowered result should
8077 // have same result count.
8078 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8079 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8080 }
8081 return Res;
8082}
8083
8084SDValue
8085RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8086 SelectionDAG &DAG) const {
8087 bool IsVP =
8088 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8089 bool IsExtend =
8090 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8091 // RVV can only do truncate fp to types half the size as the source. We
8092 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8093 // conversion instruction.
8094 SDLoc DL(Op);
8095 MVT VT = Op.getSimpleValueType();
8096
8097 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8098
8099 SDValue Src = Op.getOperand(0);
8100 MVT SrcVT = Src.getSimpleValueType();
8101
8102 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8103 SrcVT.getVectorElementType() != MVT::f16);
8104 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8105 SrcVT.getVectorElementType() != MVT::f64);
8106
8107 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8108
8109 // Prepare any fixed-length vector operands.
8110 MVT ContainerVT = VT;
8111 SDValue Mask, VL;
8112 if (IsVP) {
8113 Mask = Op.getOperand(1);
8114 VL = Op.getOperand(2);
8115 }
8116 if (VT.isFixedLengthVector()) {
8117 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8118 ContainerVT =
8119 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8120 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8121 if (IsVP) {
8122 MVT MaskVT = getMaskTypeFor(ContainerVT);
8123 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8124 }
8125 }
8126
8127 if (!IsVP)
8128 std::tie(Mask, VL) =
8129 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8130
8131 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8132
8133 if (IsDirectConv) {
8134 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8135 if (VT.isFixedLengthVector())
8136 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8137 return Src;
8138 }
8139
8140 unsigned InterConvOpc =
8142
8143 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8144 SDValue IntermediateConv =
8145 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8146 SDValue Result =
8147 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8148 if (VT.isFixedLengthVector())
8149 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8150 return Result;
8151}
8152
8153// Given a scalable vector type and an index into it, returns the type for the
8154// smallest subvector that the index fits in. This can be used to reduce LMUL
8155// for operations like vslidedown.
8156//
8157// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8158static std::optional<MVT>
8159getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8160 const RISCVSubtarget &Subtarget) {
8161 assert(VecVT.isScalableVector());
8162 const unsigned EltSize = VecVT.getScalarSizeInBits();
8163 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8164 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8165 MVT SmallerVT;
8166 if (MaxIdx < MinVLMAX)
8167 SmallerVT = getLMUL1VT(VecVT);
8168 else if (MaxIdx < MinVLMAX * 2)
8169 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8170 else if (MaxIdx < MinVLMAX * 4)
8171 SmallerVT = getLMUL1VT(VecVT)
8174 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8175 return std::nullopt;
8176 return SmallerVT;
8177}
8178
8179// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8180// first position of a vector, and that vector is slid up to the insert index.
8181// By limiting the active vector length to index+1 and merging with the
8182// original vector (with an undisturbed tail policy for elements >= VL), we
8183// achieve the desired result of leaving all elements untouched except the one
8184// at VL-1, which is replaced with the desired value.
8185SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8186 SelectionDAG &DAG) const {
8187 SDLoc DL(Op);
8188 MVT VecVT = Op.getSimpleValueType();
8189 SDValue Vec = Op.getOperand(0);
8190 SDValue Val = Op.getOperand(1);
8191 SDValue Idx = Op.getOperand(2);
8192
8193 if (VecVT.getVectorElementType() == MVT::i1) {
8194 // FIXME: For now we just promote to an i8 vector and insert into that,
8195 // but this is probably not optimal.
8196 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8197 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8198 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8199 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8200 }
8201
8202 MVT ContainerVT = VecVT;
8203 // If the operand is a fixed-length vector, convert to a scalable one.
8204 if (VecVT.isFixedLengthVector()) {
8205 ContainerVT = getContainerForFixedLengthVector(VecVT);
8206 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8207 }
8208
8209 // If we know the index we're going to insert at, we can shrink Vec so that
8210 // we're performing the scalar inserts and slideup on a smaller LMUL.
8211 MVT OrigContainerVT = ContainerVT;
8212 SDValue OrigVec = Vec;
8213 SDValue AlignedIdx;
8214 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8215 const unsigned OrigIdx = IdxC->getZExtValue();
8216 // Do we know an upper bound on LMUL?
8217 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8218 DL, DAG, Subtarget)) {
8219 ContainerVT = *ShrunkVT;
8220 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8221 }
8222
8223 // If we're compiling for an exact VLEN value, we can always perform
8224 // the insert in m1 as we can determine the register corresponding to
8225 // the index in the register group.
8226 const MVT M1VT = getLMUL1VT(ContainerVT);
8227 if (auto VLEN = Subtarget.getRealVLen();
8228 VLEN && ContainerVT.bitsGT(M1VT)) {
8229 EVT ElemVT = VecVT.getVectorElementType();
8230 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8231 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8232 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8233 unsigned ExtractIdx =
8234 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8235 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8236 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8237 ContainerVT = M1VT;
8238 }
8239
8240 if (AlignedIdx)
8241 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8242 AlignedIdx);
8243 }
8244
8245 MVT XLenVT = Subtarget.getXLenVT();
8246
8247 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8248 // Even i64-element vectors on RV32 can be lowered without scalar
8249 // legalization if the most-significant 32 bits of the value are not affected
8250 // by the sign-extension of the lower 32 bits.
8251 // TODO: We could also catch sign extensions of a 32-bit value.
8252 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8253 const auto *CVal = cast<ConstantSDNode>(Val);
8254 if (isInt<32>(CVal->getSExtValue())) {
8255 IsLegalInsert = true;
8256 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8257 }
8258 }
8259
8260 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8261
8262 SDValue ValInVec;
8263
8264 if (IsLegalInsert) {
8265 unsigned Opc =
8267 if (isNullConstant(Idx)) {
8268 if (!VecVT.isFloatingPoint())
8269 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8270 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8271
8272 if (AlignedIdx)
8273 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8274 Vec, AlignedIdx);
8275 if (!VecVT.isFixedLengthVector())
8276 return Vec;
8277 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8278 }
8279 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8280 } else {
8281 // On RV32, i64-element vectors must be specially handled to place the
8282 // value at element 0, by using two vslide1down instructions in sequence on
8283 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8284 // this.
8285 SDValue ValLo, ValHi;
8286 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8287 MVT I32ContainerVT =
8288 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8289 SDValue I32Mask =
8290 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8291 // Limit the active VL to two.
8292 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8293 // If the Idx is 0 we can insert directly into the vector.
8294 if (isNullConstant(Idx)) {
8295 // First slide in the lo value, then the hi in above it. We use slide1down
8296 // to avoid the register group overlap constraint of vslide1up.
8297 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8298 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8299 // If the source vector is undef don't pass along the tail elements from
8300 // the previous slide1down.
8301 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8302 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8303 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8304 // Bitcast back to the right container type.
8305 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8306
8307 if (AlignedIdx)
8308 ValInVec =
8309 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8310 ValInVec, AlignedIdx);
8311 if (!VecVT.isFixedLengthVector())
8312 return ValInVec;
8313 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8314 }
8315
8316 // First slide in the lo value, then the hi in above it. We use slide1down
8317 // to avoid the register group overlap constraint of vslide1up.
8318 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8319 DAG.getUNDEF(I32ContainerVT),
8320 DAG.getUNDEF(I32ContainerVT), ValLo,
8321 I32Mask, InsertI64VL);
8322 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8323 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8324 I32Mask, InsertI64VL);
8325 // Bitcast back to the right container type.
8326 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8327 }
8328
8329 // Now that the value is in a vector, slide it into position.
8330 SDValue InsertVL =
8331 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8332
8333 // Use tail agnostic policy if Idx is the last index of Vec.
8335 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8336 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8337 Policy = RISCVII::TAIL_AGNOSTIC;
8338 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8339 Idx, Mask, InsertVL, Policy);
8340
8341 if (AlignedIdx)
8342 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8343 Slideup, AlignedIdx);
8344 if (!VecVT.isFixedLengthVector())
8345 return Slideup;
8346 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8347}
8348
8349// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8350// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8351// types this is done using VMV_X_S to allow us to glean information about the
8352// sign bits of the result.
8353SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8354 SelectionDAG &DAG) const {
8355 SDLoc DL(Op);
8356 SDValue Idx = Op.getOperand(1);
8357 SDValue Vec = Op.getOperand(0);
8358 EVT EltVT = Op.getValueType();
8359 MVT VecVT = Vec.getSimpleValueType();
8360 MVT XLenVT = Subtarget.getXLenVT();
8361
8362 if (VecVT.getVectorElementType() == MVT::i1) {
8363 // Use vfirst.m to extract the first bit.
8364 if (isNullConstant(Idx)) {
8365 MVT ContainerVT = VecVT;
8366 if (VecVT.isFixedLengthVector()) {
8367 ContainerVT = getContainerForFixedLengthVector(VecVT);
8368 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8369 }
8370 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8371 SDValue Vfirst =
8372 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8373 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8374 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8375 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8376 }
8377 if (VecVT.isFixedLengthVector()) {
8378 unsigned NumElts = VecVT.getVectorNumElements();
8379 if (NumElts >= 8) {
8380 MVT WideEltVT;
8381 unsigned WidenVecLen;
8382 SDValue ExtractElementIdx;
8383 SDValue ExtractBitIdx;
8384 unsigned MaxEEW = Subtarget.getELen();
8385 MVT LargestEltVT = MVT::getIntegerVT(
8386 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8387 if (NumElts <= LargestEltVT.getSizeInBits()) {
8388 assert(isPowerOf2_32(NumElts) &&
8389 "the number of elements should be power of 2");
8390 WideEltVT = MVT::getIntegerVT(NumElts);
8391 WidenVecLen = 1;
8392 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8393 ExtractBitIdx = Idx;
8394 } else {
8395 WideEltVT = LargestEltVT;
8396 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8397 // extract element index = index / element width
8398 ExtractElementIdx = DAG.getNode(
8399 ISD::SRL, DL, XLenVT, Idx,
8400 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8401 // mask bit index = index % element width
8402 ExtractBitIdx = DAG.getNode(
8403 ISD::AND, DL, XLenVT, Idx,
8404 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8405 }
8406 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8407 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8408 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8409 Vec, ExtractElementIdx);
8410 // Extract the bit from GPR.
8411 SDValue ShiftRight =
8412 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8413 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8414 DAG.getConstant(1, DL, XLenVT));
8415 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8416 }
8417 }
8418 // Otherwise, promote to an i8 vector and extract from that.
8419 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8420 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8421 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8422 }
8423
8424 // If this is a fixed vector, we need to convert it to a scalable vector.
8425 MVT ContainerVT = VecVT;
8426 if (VecVT.isFixedLengthVector()) {
8427 ContainerVT = getContainerForFixedLengthVector(VecVT);
8428 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8429 }
8430
8431 // If we're compiling for an exact VLEN value and we have a known
8432 // constant index, we can always perform the extract in m1 (or
8433 // smaller) as we can determine the register corresponding to
8434 // the index in the register group.
8435 const auto VLen = Subtarget.getRealVLen();
8436 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8437 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8438 MVT M1VT = getLMUL1VT(ContainerVT);
8439 unsigned OrigIdx = IdxC->getZExtValue();
8440 EVT ElemVT = VecVT.getVectorElementType();
8441 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8442 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8443 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8444 unsigned ExtractIdx =
8445 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8446 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8447 DAG.getVectorIdxConstant(ExtractIdx, DL));
8448 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8449 ContainerVT = M1VT;
8450 }
8451
8452 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8453 // contains our index.
8454 std::optional<uint64_t> MaxIdx;
8455 if (VecVT.isFixedLengthVector())
8456 MaxIdx = VecVT.getVectorNumElements() - 1;
8457 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8458 MaxIdx = IdxC->getZExtValue();
8459 if (MaxIdx) {
8460 if (auto SmallerVT =
8461 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8462 ContainerVT = *SmallerVT;
8463 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8464 DAG.getConstant(0, DL, XLenVT));
8465 }
8466 }
8467
8468 // If after narrowing, the required slide is still greater than LMUL2,
8469 // fallback to generic expansion and go through the stack. This is done
8470 // for a subtle reason: extracting *all* elements out of a vector is
8471 // widely expected to be linear in vector size, but because vslidedown
8472 // is linear in LMUL, performing N extracts using vslidedown becomes
8473 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8474 // seems to have the same problem (the store is linear in LMUL), but the
8475 // generic expansion *memoizes* the store, and thus for many extracts of
8476 // the same vector we end up with one store and a bunch of loads.
8477 // TODO: We don't have the same code for insert_vector_elt because we
8478 // have BUILD_VECTOR and handle the degenerate case there. Should we
8479 // consider adding an inverse BUILD_VECTOR node?
8480 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8481 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8482 return SDValue();
8483
8484 // If the index is 0, the vector is already in the right position.
8485 if (!isNullConstant(Idx)) {
8486 // Use a VL of 1 to avoid processing more elements than we need.
8487 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8488 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8489 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8490 }
8491
8492 if (!EltVT.isInteger()) {
8493 // Floating-point extracts are handled in TableGen.
8494 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8495 DAG.getVectorIdxConstant(0, DL));
8496 }
8497
8498 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8499 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8500}
8501
8502// Some RVV intrinsics may claim that they want an integer operand to be
8503// promoted or expanded.
8505 const RISCVSubtarget &Subtarget) {
8506 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8507 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8508 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8509 "Unexpected opcode");
8510
8511 if (!Subtarget.hasVInstructions())
8512 return SDValue();
8513
8514 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8515 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8516 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8517
8518 SDLoc DL(Op);
8519
8521 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8522 if (!II || !II->hasScalarOperand())
8523 return SDValue();
8524
8525 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8526 assert(SplatOp < Op.getNumOperands());
8527
8528 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8529 SDValue &ScalarOp = Operands[SplatOp];
8530 MVT OpVT = ScalarOp.getSimpleValueType();
8531 MVT XLenVT = Subtarget.getXLenVT();
8532
8533 // If this isn't a scalar, or its type is XLenVT we're done.
8534 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8535 return SDValue();
8536
8537 // Simplest case is that the operand needs to be promoted to XLenVT.
8538 if (OpVT.bitsLT(XLenVT)) {
8539 // If the operand is a constant, sign extend to increase our chances
8540 // of being able to use a .vi instruction. ANY_EXTEND would become a
8541 // a zero extend and the simm5 check in isel would fail.
8542 // FIXME: Should we ignore the upper bits in isel instead?
8543 unsigned ExtOpc =
8544 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8545 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8546 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8547 }
8548
8549 // Use the previous operand to get the vXi64 VT. The result might be a mask
8550 // VT for compares. Using the previous operand assumes that the previous
8551 // operand will never have a smaller element size than a scalar operand and
8552 // that a widening operation never uses SEW=64.
8553 // NOTE: If this fails the below assert, we can probably just find the
8554 // element count from any operand or result and use it to construct the VT.
8555 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8556 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8557
8558 // The more complex case is when the scalar is larger than XLenVT.
8559 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8560 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8561
8562 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8563 // instruction to sign-extend since SEW>XLEN.
8564 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8565 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8566 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8567 }
8568
8569 switch (IntNo) {
8570 case Intrinsic::riscv_vslide1up:
8571 case Intrinsic::riscv_vslide1down:
8572 case Intrinsic::riscv_vslide1up_mask:
8573 case Intrinsic::riscv_vslide1down_mask: {
8574 // We need to special case these when the scalar is larger than XLen.
8575 unsigned NumOps = Op.getNumOperands();
8576 bool IsMasked = NumOps == 7;
8577
8578 // Convert the vector source to the equivalent nxvXi32 vector.
8579 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8580 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8581 SDValue ScalarLo, ScalarHi;
8582 std::tie(ScalarLo, ScalarHi) =
8583 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8584
8585 // Double the VL since we halved SEW.
8586 SDValue AVL = getVLOperand(Op);
8587 SDValue I32VL;
8588
8589 // Optimize for constant AVL
8590 if (isa<ConstantSDNode>(AVL)) {
8591 const auto [MinVLMAX, MaxVLMAX] =
8593
8594 uint64_t AVLInt = AVL->getAsZExtVal();
8595 if (AVLInt <= MinVLMAX) {
8596 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8597 } else if (AVLInt >= 2 * MaxVLMAX) {
8598 // Just set vl to VLMAX in this situation
8600 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8601 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8602 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8603 SDValue SETVLMAX = DAG.getTargetConstant(
8604 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8605 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8606 LMUL);
8607 } else {
8608 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8609 // is related to the hardware implementation.
8610 // So let the following code handle
8611 }
8612 }
8613 if (!I32VL) {
8615 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8616 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8617 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8618 SDValue SETVL =
8619 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8620 // Using vsetvli instruction to get actually used length which related to
8621 // the hardware implementation
8622 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8623 SEW, LMUL);
8624 I32VL =
8625 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8626 }
8627
8628 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8629
8630 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8631 // instructions.
8632 SDValue Passthru;
8633 if (IsMasked)
8634 Passthru = DAG.getUNDEF(I32VT);
8635 else
8636 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8637
8638 if (IntNo == Intrinsic::riscv_vslide1up ||
8639 IntNo == Intrinsic::riscv_vslide1up_mask) {
8640 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8641 ScalarHi, I32Mask, I32VL);
8642 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8643 ScalarLo, I32Mask, I32VL);
8644 } else {
8645 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8646 ScalarLo, I32Mask, I32VL);
8647 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8648 ScalarHi, I32Mask, I32VL);
8649 }
8650
8651 // Convert back to nxvXi64.
8652 Vec = DAG.getBitcast(VT, Vec);
8653
8654 if (!IsMasked)
8655 return Vec;
8656 // Apply mask after the operation.
8657 SDValue Mask = Operands[NumOps - 3];
8658 SDValue MaskedOff = Operands[1];
8659 // Assume Policy operand is the last operand.
8660 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8661 // We don't need to select maskedoff if it's undef.
8662 if (MaskedOff.isUndef())
8663 return Vec;
8664 // TAMU
8665 if (Policy == RISCVII::TAIL_AGNOSTIC)
8666 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8667 DAG.getUNDEF(VT), AVL);
8668 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8669 // It's fine because vmerge does not care mask policy.
8670 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8671 MaskedOff, AVL);
8672 }
8673 }
8674
8675 // We need to convert the scalar to a splat vector.
8676 SDValue VL = getVLOperand(Op);
8677 assert(VL.getValueType() == XLenVT);
8678 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8679 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8680}
8681
8682// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8683// scalable vector llvm.get.vector.length for now.
8684//
8685// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8686// (vscale * VF). The vscale and VF are independent of element width. We use
8687// SEW=8 for the vsetvli because it is the only element width that supports all
8688// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8689// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8690// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8691// SEW and LMUL are better for the surrounding vector instructions.
8693 const RISCVSubtarget &Subtarget) {
8694 MVT XLenVT = Subtarget.getXLenVT();
8695
8696 // The smallest LMUL is only valid for the smallest element width.
8697 const unsigned ElementWidth = 8;
8698
8699 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8700 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8701 // We don't support VF==1 with ELEN==32.
8702 [[maybe_unused]] unsigned MinVF =
8703 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8704
8705 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8706 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8707 "Unexpected VF");
8708
8709 bool Fractional = VF < LMul1VF;
8710 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8711 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8712 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8713
8714 SDLoc DL(N);
8715
8716 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8717 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8718
8719 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8720
8721 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8722 SDValue Res =
8723 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8724 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8725}
8726
8728 const RISCVSubtarget &Subtarget) {
8729 SDValue Op0 = N->getOperand(1);
8730 MVT OpVT = Op0.getSimpleValueType();
8731 MVT ContainerVT = OpVT;
8732 if (OpVT.isFixedLengthVector()) {
8733 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8734 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8735 }
8736 MVT XLenVT = Subtarget.getXLenVT();
8737 SDLoc DL(N);
8738 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8739 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8740 if (isOneConstant(N->getOperand(2)))
8741 return Res;
8742
8743 // Convert -1 to VL.
8744 SDValue Setcc =
8745 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8746 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8747 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8748}
8749
8750static inline void promoteVCIXScalar(const SDValue &Op,
8752 SelectionDAG &DAG) {
8753 const RISCVSubtarget &Subtarget =
8755
8756 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8757 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8758 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8759 SDLoc DL(Op);
8760
8762 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8763 if (!II || !II->hasScalarOperand())
8764 return;
8765
8766 unsigned SplatOp = II->ScalarOperand + 1;
8767 assert(SplatOp < Op.getNumOperands());
8768
8769 SDValue &ScalarOp = Operands[SplatOp];
8770 MVT OpVT = ScalarOp.getSimpleValueType();
8771 MVT XLenVT = Subtarget.getXLenVT();
8772
8773 // The code below is partially copied from lowerVectorIntrinsicScalars.
8774 // If this isn't a scalar, or its type is XLenVT we're done.
8775 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8776 return;
8777
8778 // Manually emit promote operation for scalar operation.
8779 if (OpVT.bitsLT(XLenVT)) {
8780 unsigned ExtOpc =
8781 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8782 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8783 }
8784
8785 return;
8786}
8787
8788static void processVCIXOperands(SDValue &OrigOp,
8790 SelectionDAG &DAG) {
8791 promoteVCIXScalar(OrigOp, Operands, DAG);
8792 const RISCVSubtarget &Subtarget =
8794 for (SDValue &V : Operands) {
8795 EVT ValType = V.getValueType();
8796 if (ValType.isVector() && ValType.isFloatingPoint()) {
8797 MVT InterimIVT =
8798 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8799 ValType.getVectorElementCount());
8800 V = DAG.getBitcast(InterimIVT, V);
8801 }
8802 if (ValType.isFixedLengthVector()) {
8803 MVT OpContainerVT = getContainerForFixedLengthVector(
8804 DAG, V.getSimpleValueType(), Subtarget);
8805 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8806 }
8807 }
8808}
8809
8810// LMUL * VLEN should be greater than or equal to EGS * SEW
8811static inline bool isValidEGW(int EGS, EVT VT,
8812 const RISCVSubtarget &Subtarget) {
8813 return (Subtarget.getRealMinVLen() *
8815 EGS * VT.getScalarSizeInBits();
8816}
8817
8818SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8819 SelectionDAG &DAG) const {
8820 unsigned IntNo = Op.getConstantOperandVal(0);
8821 SDLoc DL(Op);
8822 MVT XLenVT = Subtarget.getXLenVT();
8823
8824 switch (IntNo) {
8825 default:
8826 break; // Don't custom lower most intrinsics.
8827 case Intrinsic::thread_pointer: {
8828 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8829 return DAG.getRegister(RISCV::X4, PtrVT);
8830 }
8831 case Intrinsic::riscv_orc_b:
8832 case Intrinsic::riscv_brev8:
8833 case Intrinsic::riscv_sha256sig0:
8834 case Intrinsic::riscv_sha256sig1:
8835 case Intrinsic::riscv_sha256sum0:
8836 case Intrinsic::riscv_sha256sum1:
8837 case Intrinsic::riscv_sm3p0:
8838 case Intrinsic::riscv_sm3p1: {
8839 unsigned Opc;
8840 switch (IntNo) {
8841 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8842 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8843 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8844 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8845 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8846 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8847 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8848 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8849 }
8850
8851 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852 SDValue NewOp =
8853 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8854 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8855 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8856 }
8857
8858 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8859 }
8860 case Intrinsic::riscv_sm4ks:
8861 case Intrinsic::riscv_sm4ed: {
8862 unsigned Opc =
8863 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8864
8865 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866 SDValue NewOp0 =
8867 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8868 SDValue NewOp1 =
8869 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8870 SDValue Res =
8871 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8872 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8873 }
8874
8875 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8876 Op.getOperand(3));
8877 }
8878 case Intrinsic::riscv_zip:
8879 case Intrinsic::riscv_unzip: {
8880 unsigned Opc =
8881 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8882 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8883 }
8884 case Intrinsic::riscv_mopr: {
8885 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8886 SDValue NewOp =
8887 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8888 SDValue Res = DAG.getNode(
8889 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8890 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8891 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892 }
8893 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8894 Op.getOperand(2));
8895 }
8896
8897 case Intrinsic::riscv_moprr: {
8898 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8899 SDValue NewOp0 =
8900 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8901 SDValue NewOp1 =
8902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8903 SDValue Res = DAG.getNode(
8904 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8905 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8906 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8907 }
8908 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8909 Op.getOperand(2), Op.getOperand(3));
8910 }
8911 case Intrinsic::riscv_clmul:
8912 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8913 SDValue NewOp0 =
8914 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8915 SDValue NewOp1 =
8916 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8917 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8918 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8919 }
8920 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8921 Op.getOperand(2));
8922 case Intrinsic::riscv_clmulh:
8923 case Intrinsic::riscv_clmulr: {
8924 unsigned Opc =
8925 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8926 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8927 SDValue NewOp0 =
8928 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8929 SDValue NewOp1 =
8930 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8931 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8932 DAG.getConstant(32, DL, MVT::i64));
8933 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8934 DAG.getConstant(32, DL, MVT::i64));
8935 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8936 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8937 DAG.getConstant(32, DL, MVT::i64));
8938 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8939 }
8940
8941 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8942 }
8943 case Intrinsic::experimental_get_vector_length:
8944 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8945 case Intrinsic::experimental_cttz_elts:
8946 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
8947 case Intrinsic::riscv_vmv_x_s: {
8948 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8949 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8950 }
8951 case Intrinsic::riscv_vfmv_f_s:
8952 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8953 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8954 case Intrinsic::riscv_vmv_v_x:
8955 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8956 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8957 Subtarget);
8958 case Intrinsic::riscv_vfmv_v_f:
8959 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8960 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8961 case Intrinsic::riscv_vmv_s_x: {
8962 SDValue Scalar = Op.getOperand(2);
8963
8964 if (Scalar.getValueType().bitsLE(XLenVT)) {
8965 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8966 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8967 Op.getOperand(1), Scalar, Op.getOperand(3));
8968 }
8969
8970 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8971
8972 // This is an i64 value that lives in two scalar registers. We have to
8973 // insert this in a convoluted way. First we build vXi64 splat containing
8974 // the two values that we assemble using some bit math. Next we'll use
8975 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8976 // to merge element 0 from our splat into the source vector.
8977 // FIXME: This is probably not the best way to do this, but it is
8978 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8979 // point.
8980 // sw lo, (a0)
8981 // sw hi, 4(a0)
8982 // vlse vX, (a0)
8983 //
8984 // vid.v vVid
8985 // vmseq.vx mMask, vVid, 0
8986 // vmerge.vvm vDest, vSrc, vVal, mMask
8987 MVT VT = Op.getSimpleValueType();
8988 SDValue Vec = Op.getOperand(1);
8989 SDValue VL = getVLOperand(Op);
8990
8991 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8992 if (Op.getOperand(1).isUndef())
8993 return SplattedVal;
8994 SDValue SplattedIdx =
8995 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8996 DAG.getConstant(0, DL, MVT::i32), VL);
8997
8998 MVT MaskVT = getMaskTypeFor(VT);
8999 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9000 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9001 SDValue SelectCond =
9002 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9003 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9004 DAG.getUNDEF(MaskVT), Mask, VL});
9005 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9006 Vec, DAG.getUNDEF(VT), VL);
9007 }
9008 case Intrinsic::riscv_vfmv_s_f:
9009 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9010 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9011 // EGS * EEW >= 128 bits
9012 case Intrinsic::riscv_vaesdf_vv:
9013 case Intrinsic::riscv_vaesdf_vs:
9014 case Intrinsic::riscv_vaesdm_vv:
9015 case Intrinsic::riscv_vaesdm_vs:
9016 case Intrinsic::riscv_vaesef_vv:
9017 case Intrinsic::riscv_vaesef_vs:
9018 case Intrinsic::riscv_vaesem_vv:
9019 case Intrinsic::riscv_vaesem_vs:
9020 case Intrinsic::riscv_vaeskf1:
9021 case Intrinsic::riscv_vaeskf2:
9022 case Intrinsic::riscv_vaesz_vs:
9023 case Intrinsic::riscv_vsm4k:
9024 case Intrinsic::riscv_vsm4r_vv:
9025 case Intrinsic::riscv_vsm4r_vs: {
9026 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9027 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9028 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9029 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9030 return Op;
9031 }
9032 // EGS * EEW >= 256 bits
9033 case Intrinsic::riscv_vsm3c:
9034 case Intrinsic::riscv_vsm3me: {
9035 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9036 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9037 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9038 return Op;
9039 }
9040 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9041 case Intrinsic::riscv_vsha2ch:
9042 case Intrinsic::riscv_vsha2cl:
9043 case Intrinsic::riscv_vsha2ms: {
9044 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9045 !Subtarget.hasStdExtZvknhb())
9046 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9047 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9048 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9049 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9050 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9051 return Op;
9052 }
9053 case Intrinsic::riscv_sf_vc_v_x:
9054 case Intrinsic::riscv_sf_vc_v_i:
9055 case Intrinsic::riscv_sf_vc_v_xv:
9056 case Intrinsic::riscv_sf_vc_v_iv:
9057 case Intrinsic::riscv_sf_vc_v_vv:
9058 case Intrinsic::riscv_sf_vc_v_fv:
9059 case Intrinsic::riscv_sf_vc_v_xvv:
9060 case Intrinsic::riscv_sf_vc_v_ivv:
9061 case Intrinsic::riscv_sf_vc_v_vvv:
9062 case Intrinsic::riscv_sf_vc_v_fvv:
9063 case Intrinsic::riscv_sf_vc_v_xvw:
9064 case Intrinsic::riscv_sf_vc_v_ivw:
9065 case Intrinsic::riscv_sf_vc_v_vvw:
9066 case Intrinsic::riscv_sf_vc_v_fvw: {
9067 MVT VT = Op.getSimpleValueType();
9068
9069 SmallVector<SDValue> Operands{Op->op_values()};
9071
9072 MVT RetVT = VT;
9073 if (VT.isFixedLengthVector())
9075 else if (VT.isFloatingPoint())
9078
9079 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9080
9081 if (VT.isFixedLengthVector())
9082 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9083 else if (VT.isFloatingPoint())
9084 NewNode = DAG.getBitcast(VT, NewNode);
9085
9086 if (Op == NewNode)
9087 break;
9088
9089 return NewNode;
9090 }
9091 }
9092
9093 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9094}
9095
9097 unsigned Type) {
9098 SDLoc DL(Op);
9099 SmallVector<SDValue> Operands{Op->op_values()};
9100 Operands.erase(Operands.begin() + 1);
9101
9102 const RISCVSubtarget &Subtarget =
9104 MVT VT = Op.getSimpleValueType();
9105 MVT RetVT = VT;
9106 MVT FloatVT = VT;
9107
9108 if (VT.isFloatingPoint()) {
9109 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9111 FloatVT = RetVT;
9112 }
9113 if (VT.isFixedLengthVector())
9115 Subtarget);
9116
9118
9119 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9120 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9121 SDValue Chain = NewNode.getValue(1);
9122
9123 if (VT.isFixedLengthVector())
9124 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9125 if (VT.isFloatingPoint())
9126 NewNode = DAG.getBitcast(VT, NewNode);
9127
9128 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9129
9130 return NewNode;
9131}
9132
9134 unsigned Type) {
9135 SmallVector<SDValue> Operands{Op->op_values()};
9136 Operands.erase(Operands.begin() + 1);
9138
9139 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9140}
9141
9142SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9143 SelectionDAG &DAG) const {
9144 unsigned IntNo = Op.getConstantOperandVal(1);
9145 switch (IntNo) {
9146 default:
9147 break;
9148 case Intrinsic::riscv_masked_strided_load: {
9149 SDLoc DL(Op);
9150 MVT XLenVT = Subtarget.getXLenVT();
9151
9152 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9153 // the selection of the masked intrinsics doesn't do this for us.
9154 SDValue Mask = Op.getOperand(5);
9155 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9156
9157 MVT VT = Op->getSimpleValueType(0);
9158 MVT ContainerVT = VT;
9159 if (VT.isFixedLengthVector())
9160 ContainerVT = getContainerForFixedLengthVector(VT);
9161
9162 SDValue PassThru = Op.getOperand(2);
9163 if (!IsUnmasked) {
9164 MVT MaskVT = getMaskTypeFor(ContainerVT);
9165 if (VT.isFixedLengthVector()) {
9166 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9167 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9168 }
9169 }
9170
9171 auto *Load = cast<MemIntrinsicSDNode>(Op);
9172 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9173 SDValue Ptr = Op.getOperand(3);
9174 SDValue Stride = Op.getOperand(4);
9175 SDValue Result, Chain;
9176
9177 // TODO: We restrict this to unmasked loads currently in consideration of
9178 // the complexity of handling all falses masks.
9179 MVT ScalarVT = ContainerVT.getVectorElementType();
9180 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9181 SDValue ScalarLoad =
9182 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9183 ScalarVT, Load->getMemOperand());
9184 Chain = ScalarLoad.getValue(1);
9185 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9186 Subtarget);
9187 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9188 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9189 Load->getMemOperand());
9190 Chain = ScalarLoad.getValue(1);
9191 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9192 } else {
9193 SDValue IntID = DAG.getTargetConstant(
9194 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9195 XLenVT);
9196
9197 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9198 if (IsUnmasked)
9199 Ops.push_back(DAG.getUNDEF(ContainerVT));
9200 else
9201 Ops.push_back(PassThru);
9202 Ops.push_back(Ptr);
9203 Ops.push_back(Stride);
9204 if (!IsUnmasked)
9205 Ops.push_back(Mask);
9206 Ops.push_back(VL);
9207 if (!IsUnmasked) {
9208 SDValue Policy =
9210 Ops.push_back(Policy);
9211 }
9212
9213 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9214 Result =
9216 Load->getMemoryVT(), Load->getMemOperand());
9217 Chain = Result.getValue(1);
9218 }
9219 if (VT.isFixedLengthVector())
9220 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9221 return DAG.getMergeValues({Result, Chain}, DL);
9222 }
9223 case Intrinsic::riscv_seg2_load:
9224 case Intrinsic::riscv_seg3_load:
9225 case Intrinsic::riscv_seg4_load:
9226 case Intrinsic::riscv_seg5_load:
9227 case Intrinsic::riscv_seg6_load:
9228 case Intrinsic::riscv_seg7_load:
9229 case Intrinsic::riscv_seg8_load: {
9230 SDLoc DL(Op);
9231 static const Intrinsic::ID VlsegInts[7] = {
9232 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9233 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9234 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9235 Intrinsic::riscv_vlseg8};
9236 unsigned NF = Op->getNumValues() - 1;
9237 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9238 MVT XLenVT = Subtarget.getXLenVT();
9239 MVT VT = Op->getSimpleValueType(0);
9240 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9241
9242 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9243 Subtarget);
9244 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9245 auto *Load = cast<MemIntrinsicSDNode>(Op);
9246 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9247 ContainerVTs.push_back(MVT::Other);
9248 SDVTList VTs = DAG.getVTList(ContainerVTs);
9249 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9250 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9251 Ops.push_back(Op.getOperand(2));
9252 Ops.push_back(VL);
9253 SDValue Result =
9255 Load->getMemoryVT(), Load->getMemOperand());
9257 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9258 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9259 DAG, Subtarget));
9260 Results.push_back(Result.getValue(NF));
9261 return DAG.getMergeValues(Results, DL);
9262 }
9263 case Intrinsic::riscv_sf_vc_v_x_se:
9265 case Intrinsic::riscv_sf_vc_v_i_se:
9267 case Intrinsic::riscv_sf_vc_v_xv_se:
9269 case Intrinsic::riscv_sf_vc_v_iv_se:
9271 case Intrinsic::riscv_sf_vc_v_vv_se:
9273 case Intrinsic::riscv_sf_vc_v_fv_se:
9275 case Intrinsic::riscv_sf_vc_v_xvv_se:
9277 case Intrinsic::riscv_sf_vc_v_ivv_se:
9279 case Intrinsic::riscv_sf_vc_v_vvv_se:
9281 case Intrinsic::riscv_sf_vc_v_fvv_se:
9283 case Intrinsic::riscv_sf_vc_v_xvw_se:
9285 case Intrinsic::riscv_sf_vc_v_ivw_se:
9287 case Intrinsic::riscv_sf_vc_v_vvw_se:
9289 case Intrinsic::riscv_sf_vc_v_fvw_se:
9291 }
9292
9293 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9294}
9295
9296SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9297 SelectionDAG &DAG) const {
9298 unsigned IntNo = Op.getConstantOperandVal(1);
9299 switch (IntNo) {
9300 default:
9301 break;
9302 case Intrinsic::riscv_masked_strided_store: {
9303 SDLoc DL(Op);
9304 MVT XLenVT = Subtarget.getXLenVT();
9305
9306 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9307 // the selection of the masked intrinsics doesn't do this for us.
9308 SDValue Mask = Op.getOperand(5);
9309 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9310
9311 SDValue Val = Op.getOperand(2);
9312 MVT VT = Val.getSimpleValueType();
9313 MVT ContainerVT = VT;
9314 if (VT.isFixedLengthVector()) {
9315 ContainerVT = getContainerForFixedLengthVector(VT);
9316 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9317 }
9318 if (!IsUnmasked) {
9319 MVT MaskVT = getMaskTypeFor(ContainerVT);
9320 if (VT.isFixedLengthVector())
9321 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9322 }
9323
9324 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9325
9326 SDValue IntID = DAG.getTargetConstant(
9327 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9328 XLenVT);
9329
9330 auto *Store = cast<MemIntrinsicSDNode>(Op);
9331 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9332 Ops.push_back(Val);
9333 Ops.push_back(Op.getOperand(3)); // Ptr
9334 Ops.push_back(Op.getOperand(4)); // Stride
9335 if (!IsUnmasked)
9336 Ops.push_back(Mask);
9337 Ops.push_back(VL);
9338
9339 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9340 Ops, Store->getMemoryVT(),
9341 Store->getMemOperand());
9342 }
9343 case Intrinsic::riscv_seg2_store:
9344 case Intrinsic::riscv_seg3_store:
9345 case Intrinsic::riscv_seg4_store:
9346 case Intrinsic::riscv_seg5_store:
9347 case Intrinsic::riscv_seg6_store:
9348 case Intrinsic::riscv_seg7_store:
9349 case Intrinsic::riscv_seg8_store: {
9350 SDLoc DL(Op);
9351 static const Intrinsic::ID VssegInts[] = {
9352 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9353 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9354 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9355 Intrinsic::riscv_vsseg8};
9356 // Operands are (chain, int_id, vec*, ptr, vl)
9357 unsigned NF = Op->getNumOperands() - 4;
9358 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9359 MVT XLenVT = Subtarget.getXLenVT();
9360 MVT VT = Op->getOperand(2).getSimpleValueType();
9361 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9362
9363 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9364 Subtarget);
9365 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9366 SDValue Ptr = Op->getOperand(NF + 2);
9367
9368 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9369 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9370 for (unsigned i = 0; i < NF; i++)
9372 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9373 Ops.append({Ptr, VL});
9374
9375 return DAG.getMemIntrinsicNode(
9376 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9377 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9378 }
9379 case Intrinsic::riscv_sf_vc_xv_se:
9381 case Intrinsic::riscv_sf_vc_iv_se:
9383 case Intrinsic::riscv_sf_vc_vv_se:
9385 case Intrinsic::riscv_sf_vc_fv_se:
9387 case Intrinsic::riscv_sf_vc_xvv_se:
9389 case Intrinsic::riscv_sf_vc_ivv_se:
9391 case Intrinsic::riscv_sf_vc_vvv_se:
9393 case Intrinsic::riscv_sf_vc_fvv_se:
9395 case Intrinsic::riscv_sf_vc_xvw_se:
9397 case Intrinsic::riscv_sf_vc_ivw_se:
9399 case Intrinsic::riscv_sf_vc_vvw_se:
9401 case Intrinsic::riscv_sf_vc_fvw_se:
9403 }
9404
9405 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9406}
9407
9408static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9409 switch (ISDOpcode) {
9410 default:
9411 llvm_unreachable("Unhandled reduction");
9412 case ISD::VP_REDUCE_ADD:
9413 case ISD::VECREDUCE_ADD:
9415 case ISD::VP_REDUCE_UMAX:
9418 case ISD::VP_REDUCE_SMAX:
9421 case ISD::VP_REDUCE_UMIN:
9424 case ISD::VP_REDUCE_SMIN:
9427 case ISD::VP_REDUCE_AND:
9428 case ISD::VECREDUCE_AND:
9430 case ISD::VP_REDUCE_OR:
9431 case ISD::VECREDUCE_OR:
9433 case ISD::VP_REDUCE_XOR:
9434 case ISD::VECREDUCE_XOR:
9436 case ISD::VP_REDUCE_FADD:
9438 case ISD::VP_REDUCE_SEQ_FADD:
9440 case ISD::VP_REDUCE_FMAX:
9442 case ISD::VP_REDUCE_FMIN:
9444 }
9445
9446}
9447
9448SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9449 SelectionDAG &DAG,
9450 bool IsVP) const {
9451 SDLoc DL(Op);
9452 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9453 MVT VecVT = Vec.getSimpleValueType();
9454 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9455 Op.getOpcode() == ISD::VECREDUCE_OR ||
9456 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9457 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9458 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9459 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9460 "Unexpected reduction lowering");
9461
9462 MVT XLenVT = Subtarget.getXLenVT();
9463
9464 MVT ContainerVT = VecVT;
9465 if (VecVT.isFixedLengthVector()) {
9466 ContainerVT = getContainerForFixedLengthVector(VecVT);
9467 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9468 }
9469
9470 SDValue Mask, VL;
9471 if (IsVP) {
9472 Mask = Op.getOperand(2);
9473 VL = Op.getOperand(3);
9474 } else {
9475 std::tie(Mask, VL) =
9476 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9477 }
9478
9479 unsigned BaseOpc;
9481 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9482
9483 switch (Op.getOpcode()) {
9484 default:
9485 llvm_unreachable("Unhandled reduction");
9486 case ISD::VECREDUCE_AND:
9487 case ISD::VP_REDUCE_AND: {
9488 // vcpop ~x == 0
9489 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9490 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9491 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9492 CC = ISD::SETEQ;
9493 BaseOpc = ISD::AND;
9494 break;
9495 }
9496 case ISD::VECREDUCE_OR:
9497 case ISD::VP_REDUCE_OR:
9498 // vcpop x != 0
9499 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9500 CC = ISD::SETNE;
9501 BaseOpc = ISD::OR;
9502 break;
9503 case ISD::VECREDUCE_XOR:
9504 case ISD::VP_REDUCE_XOR: {
9505 // ((vcpop x) & 1) != 0
9506 SDValue One = DAG.getConstant(1, DL, XLenVT);
9507 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9508 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9509 CC = ISD::SETNE;
9510 BaseOpc = ISD::XOR;
9511 break;
9512 }
9513 }
9514
9515 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9516 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9517
9518 if (!IsVP)
9519 return SetCC;
9520
9521 // Now include the start value in the operation.
9522 // Note that we must return the start value when no elements are operated
9523 // upon. The vcpop instructions we've emitted in each case above will return
9524 // 0 for an inactive vector, and so we've already received the neutral value:
9525 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9526 // can simply include the start value.
9527 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9528}
9529
9530static bool isNonZeroAVL(SDValue AVL) {
9531 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9532 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9533 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9534 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9535}
9536
9537/// Helper to lower a reduction sequence of the form:
9538/// scalar = reduce_op vec, scalar_start
9539static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9540 SDValue StartValue, SDValue Vec, SDValue Mask,
9541 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9542 const RISCVSubtarget &Subtarget) {
9543 const MVT VecVT = Vec.getSimpleValueType();
9544 const MVT M1VT = getLMUL1VT(VecVT);
9545 const MVT XLenVT = Subtarget.getXLenVT();
9546 const bool NonZeroAVL = isNonZeroAVL(VL);
9547
9548 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9549 // or the original VT if fractional.
9550 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9551 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9552 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9553 // be the result of the reduction operation.
9554 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9555 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9556 DAG, Subtarget);
9557 if (M1VT != InnerVT)
9558 InitialValue =
9559 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9560 InitialValue, DAG.getVectorIdxConstant(0, DL));
9561 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9562 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9563 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9564 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9565 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9566 DAG.getVectorIdxConstant(0, DL));
9567}
9568
9569SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9570 SelectionDAG &DAG) const {
9571 SDLoc DL(Op);
9572 SDValue Vec = Op.getOperand(0);
9573 EVT VecEVT = Vec.getValueType();
9574
9575 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9576
9577 // Due to ordering in legalize types we may have a vector type that needs to
9578 // be split. Do that manually so we can get down to a legal type.
9579 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9581 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9582 VecEVT = Lo.getValueType();
9583 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9584 }
9585
9586 // TODO: The type may need to be widened rather than split. Or widened before
9587 // it can be split.
9588 if (!isTypeLegal(VecEVT))
9589 return SDValue();
9590
9591 MVT VecVT = VecEVT.getSimpleVT();
9592 MVT VecEltVT = VecVT.getVectorElementType();
9593 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9594
9595 MVT ContainerVT = VecVT;
9596 if (VecVT.isFixedLengthVector()) {
9597 ContainerVT = getContainerForFixedLengthVector(VecVT);
9598 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9599 }
9600
9601 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9602
9603 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9604 switch (BaseOpc) {
9605 case ISD::AND:
9606 case ISD::OR:
9607 case ISD::UMAX:
9608 case ISD::UMIN:
9609 case ISD::SMAX:
9610 case ISD::SMIN:
9611 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9612 DAG.getVectorIdxConstant(0, DL));
9613 }
9614 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9615 Mask, VL, DL, DAG, Subtarget);
9616}
9617
9618// Given a reduction op, this function returns the matching reduction opcode,
9619// the vector SDValue and the scalar SDValue required to lower this to a
9620// RISCVISD node.
9621static std::tuple<unsigned, SDValue, SDValue>
9623 const RISCVSubtarget &Subtarget) {
9624 SDLoc DL(Op);
9625 auto Flags = Op->getFlags();
9626 unsigned Opcode = Op.getOpcode();
9627 switch (Opcode) {
9628 default:
9629 llvm_unreachable("Unhandled reduction");
9630 case ISD::VECREDUCE_FADD: {
9631 // Use positive zero if we can. It is cheaper to materialize.
9632 SDValue Zero =
9633 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9634 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9635 }
9637 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9638 Op.getOperand(0));
9642 case ISD::VECREDUCE_FMAX: {
9643 SDValue Front =
9644 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9645 DAG.getVectorIdxConstant(0, DL));
9646 unsigned RVVOpc =
9647 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9650 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9651 }
9652 }
9653}
9654
9655SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9656 SelectionDAG &DAG) const {
9657 SDLoc DL(Op);
9658 MVT VecEltVT = Op.getSimpleValueType();
9659
9660 unsigned RVVOpcode;
9661 SDValue VectorVal, ScalarVal;
9662 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9663 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9664 MVT VecVT = VectorVal.getSimpleValueType();
9665
9666 MVT ContainerVT = VecVT;
9667 if (VecVT.isFixedLengthVector()) {
9668 ContainerVT = getContainerForFixedLengthVector(VecVT);
9669 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9670 }
9671
9672 MVT ResVT = Op.getSimpleValueType();
9673 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9674 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9675 VL, DL, DAG, Subtarget);
9676 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9677 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9678 return Res;
9679
9680 if (Op->getFlags().hasNoNaNs())
9681 return Res;
9682
9683 // Force output to NaN if any element is Nan.
9684 SDValue IsNan =
9685 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9686 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9687 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9688 MVT XLenVT = Subtarget.getXLenVT();
9689 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9690 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9691 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9692 return DAG.getSelect(
9693 DL, ResVT, NoNaNs, Res,
9695 ResVT));
9696}
9697
9698SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9699 SelectionDAG &DAG) const {
9700 SDLoc DL(Op);
9701 SDValue Vec = Op.getOperand(1);
9702 EVT VecEVT = Vec.getValueType();
9703
9704 // TODO: The type may need to be widened rather than split. Or widened before
9705 // it can be split.
9706 if (!isTypeLegal(VecEVT))
9707 return SDValue();
9708
9709 MVT VecVT = VecEVT.getSimpleVT();
9710 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9711
9712 if (VecVT.isFixedLengthVector()) {
9713 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9714 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9715 }
9716
9717 SDValue VL = Op.getOperand(3);
9718 SDValue Mask = Op.getOperand(2);
9719 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9720 Vec, Mask, VL, DL, DAG, Subtarget);
9721}
9722
9723SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9724 SelectionDAG &DAG) const {
9725 SDValue Vec = Op.getOperand(0);
9726 SDValue SubVec = Op.getOperand(1);
9727 MVT VecVT = Vec.getSimpleValueType();
9728 MVT SubVecVT = SubVec.getSimpleValueType();
9729
9730 SDLoc DL(Op);
9731 MVT XLenVT = Subtarget.getXLenVT();
9732 unsigned OrigIdx = Op.getConstantOperandVal(2);
9733 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9734
9735 // We don't have the ability to slide mask vectors up indexed by their i1
9736 // elements; the smallest we can do is i8. Often we are able to bitcast to
9737 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9738 // into a scalable one, we might not necessarily have enough scalable
9739 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9740 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9741 (OrigIdx != 0 || !Vec.isUndef())) {
9742 if (VecVT.getVectorMinNumElements() >= 8 &&
9743 SubVecVT.getVectorMinNumElements() >= 8) {
9744 assert(OrigIdx % 8 == 0 && "Invalid index");
9745 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9746 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9747 "Unexpected mask vector lowering");
9748 OrigIdx /= 8;
9749 SubVecVT =
9750 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9751 SubVecVT.isScalableVector());
9752 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9753 VecVT.isScalableVector());
9754 Vec = DAG.getBitcast(VecVT, Vec);
9755 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9756 } else {
9757 // We can't slide this mask vector up indexed by its i1 elements.
9758 // This poses a problem when we wish to insert a scalable vector which
9759 // can't be re-expressed as a larger type. Just choose the slow path and
9760 // extend to a larger type, then truncate back down.
9761 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9762 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9763 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9764 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9765 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9766 Op.getOperand(2));
9767 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9768 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9769 }
9770 }
9771
9772 // If the subvector vector is a fixed-length type, we cannot use subregister
9773 // manipulation to simplify the codegen; we don't know which register of a
9774 // LMUL group contains the specific subvector as we only know the minimum
9775 // register size. Therefore we must slide the vector group up the full
9776 // amount.
9777 if (SubVecVT.isFixedLengthVector()) {
9778 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9779 return Op;
9780 MVT ContainerVT = VecVT;
9781 if (VecVT.isFixedLengthVector()) {
9782 ContainerVT = getContainerForFixedLengthVector(VecVT);
9783 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9784 }
9785
9786 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9787 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9788 DAG.getUNDEF(ContainerVT), SubVec,
9789 DAG.getVectorIdxConstant(0, DL));
9790 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9791 return DAG.getBitcast(Op.getValueType(), SubVec);
9792 }
9793
9794 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9795 DAG.getUNDEF(ContainerVT), SubVec,
9796 DAG.getVectorIdxConstant(0, DL));
9797 SDValue Mask =
9798 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9799 // Set the vector length to only the number of elements we care about. Note
9800 // that for slideup this includes the offset.
9801 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9802 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9803
9804 // Use tail agnostic policy if we're inserting over Vec's tail.
9806 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9807 Policy = RISCVII::TAIL_AGNOSTIC;
9808
9809 // If we're inserting into the lowest elements, use a tail undisturbed
9810 // vmv.v.v.
9811 if (OrigIdx == 0) {
9812 SubVec =
9813 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9814 } else {
9815 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9816 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9817 SlideupAmt, Mask, VL, Policy);
9818 }
9819
9820 if (VecVT.isFixedLengthVector())
9821 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9822 return DAG.getBitcast(Op.getValueType(), SubVec);
9823 }
9824
9825 unsigned SubRegIdx, RemIdx;
9826 std::tie(SubRegIdx, RemIdx) =
9828 VecVT, SubVecVT, OrigIdx, TRI);
9829
9830 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9831 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9832 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9833 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9834
9835 // 1. If the Idx has been completely eliminated and this subvector's size is
9836 // a vector register or a multiple thereof, or the surrounding elements are
9837 // undef, then this is a subvector insert which naturally aligns to a vector
9838 // register. These can easily be handled using subregister manipulation.
9839 // 2. If the subvector is smaller than a vector register, then the insertion
9840 // must preserve the undisturbed elements of the register. We do this by
9841 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9842 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9843 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9844 // LMUL=1 type back into the larger vector (resolving to another subregister
9845 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9846 // to avoid allocating a large register group to hold our subvector.
9847 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9848 return Op;
9849
9850 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9851 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9852 // (in our case undisturbed). This means we can set up a subvector insertion
9853 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9854 // size of the subvector.
9855 MVT InterSubVT = VecVT;
9856 SDValue AlignedExtract = Vec;
9857 unsigned AlignedIdx = OrigIdx - RemIdx;
9858 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9859 InterSubVT = getLMUL1VT(VecVT);
9860 // Extract a subvector equal to the nearest full vector register type. This
9861 // should resolve to a EXTRACT_SUBREG instruction.
9862 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9863 DAG.getVectorIdxConstant(AlignedIdx, DL));
9864 }
9865
9866 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9867 DAG.getUNDEF(InterSubVT), SubVec,
9868 DAG.getVectorIdxConstant(0, DL));
9869
9870 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9871
9872 ElementCount EndIndex =
9874 VL = computeVLMax(SubVecVT, DL, DAG);
9875
9876 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9878 if (EndIndex == InterSubVT.getVectorElementCount())
9879 Policy = RISCVII::TAIL_AGNOSTIC;
9880
9881 // If we're inserting into the lowest elements, use a tail undisturbed
9882 // vmv.v.v.
9883 if (RemIdx == 0) {
9884 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9885 SubVec, VL);
9886 } else {
9887 SDValue SlideupAmt =
9888 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9889
9890 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9891 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9892
9893 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9894 SlideupAmt, Mask, VL, Policy);
9895 }
9896
9897 // If required, insert this subvector back into the correct vector register.
9898 // This should resolve to an INSERT_SUBREG instruction.
9899 if (VecVT.bitsGT(InterSubVT))
9900 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9901 DAG.getVectorIdxConstant(AlignedIdx, DL));
9902
9903 // We might have bitcast from a mask type: cast back to the original type if
9904 // required.
9905 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9906}
9907
9908SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9909 SelectionDAG &DAG) const {
9910 SDValue Vec = Op.getOperand(0);
9911 MVT SubVecVT = Op.getSimpleValueType();
9912 MVT VecVT = Vec.getSimpleValueType();
9913
9914 SDLoc DL(Op);
9915 MVT XLenVT = Subtarget.getXLenVT();
9916 unsigned OrigIdx = Op.getConstantOperandVal(1);
9917 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9918
9919 // We don't have the ability to slide mask vectors down indexed by their i1
9920 // elements; the smallest we can do is i8. Often we are able to bitcast to
9921 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9922 // from a scalable one, we might not necessarily have enough scalable
9923 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9924 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9925 if (VecVT.getVectorMinNumElements() >= 8 &&
9926 SubVecVT.getVectorMinNumElements() >= 8) {
9927 assert(OrigIdx % 8 == 0 && "Invalid index");
9928 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9929 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9930 "Unexpected mask vector lowering");
9931 OrigIdx /= 8;
9932 SubVecVT =
9933 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9934 SubVecVT.isScalableVector());
9935 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9936 VecVT.isScalableVector());
9937 Vec = DAG.getBitcast(VecVT, Vec);
9938 } else {
9939 // We can't slide this mask vector down, indexed by its i1 elements.
9940 // This poses a problem when we wish to extract a scalable vector which
9941 // can't be re-expressed as a larger type. Just choose the slow path and
9942 // extend to a larger type, then truncate back down.
9943 // TODO: We could probably improve this when extracting certain fixed
9944 // from fixed, where we can extract as i8 and shift the correct element
9945 // right to reach the desired subvector?
9946 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9947 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9948 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9949 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9950 Op.getOperand(1));
9951 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9952 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9953 }
9954 }
9955
9956 // With an index of 0 this is a cast-like subvector, which can be performed
9957 // with subregister operations.
9958 if (OrigIdx == 0)
9959 return Op;
9960
9961 const auto VLen = Subtarget.getRealVLen();
9962
9963 // If the subvector vector is a fixed-length type and we don't know VLEN
9964 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9965 // don't know which register of a LMUL group contains the specific subvector
9966 // as we only know the minimum register size. Therefore we must slide the
9967 // vector group down the full amount.
9968 if (SubVecVT.isFixedLengthVector() && !VLen) {
9969 MVT ContainerVT = VecVT;
9970 if (VecVT.isFixedLengthVector()) {
9971 ContainerVT = getContainerForFixedLengthVector(VecVT);
9972 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9973 }
9974
9975 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9976 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9977 if (auto ShrunkVT =
9978 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9979 ContainerVT = *ShrunkVT;
9980 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9981 DAG.getVectorIdxConstant(0, DL));
9982 }
9983
9984 SDValue Mask =
9985 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9986 // Set the vector length to only the number of elements we care about. This
9987 // avoids sliding down elements we're going to discard straight away.
9988 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9989 Subtarget);
9990 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9991 SDValue Slidedown =
9992 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9993 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9994 // Now we can use a cast-like subvector extract to get the result.
9995 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9996 DAG.getVectorIdxConstant(0, DL));
9997 return DAG.getBitcast(Op.getValueType(), Slidedown);
9998 }
9999
10000 if (VecVT.isFixedLengthVector()) {
10001 VecVT = getContainerForFixedLengthVector(VecVT);
10002 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10003 }
10004
10005 MVT ContainerSubVecVT = SubVecVT;
10006 if (SubVecVT.isFixedLengthVector())
10007 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10008
10009 unsigned SubRegIdx;
10010 ElementCount RemIdx;
10011 // extract_subvector scales the index by vscale if the subvector is scalable,
10012 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10013 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10014 if (SubVecVT.isFixedLengthVector()) {
10015 assert(VLen);
10016 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10017 auto Decompose =
10019 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10020 SubRegIdx = Decompose.first;
10021 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10022 (OrigIdx % Vscale));
10023 } else {
10024 auto Decompose =
10026 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10027 SubRegIdx = Decompose.first;
10028 RemIdx = ElementCount::getScalable(Decompose.second);
10029 }
10030
10031 // If the Idx has been completely eliminated then this is a subvector extract
10032 // which naturally aligns to a vector register. These can easily be handled
10033 // using subregister manipulation.
10034 if (RemIdx.isZero()) {
10035 if (SubVecVT.isFixedLengthVector()) {
10036 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10037 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10038 }
10039 return Op;
10040 }
10041
10042 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10043 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10044 // divide exactly.
10045 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10046 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10047
10048 // If the vector type is an LMUL-group type, extract a subvector equal to the
10049 // nearest full vector register type.
10050 MVT InterSubVT = VecVT;
10051 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10052 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10053 // we should have successfully decomposed the extract into a subregister.
10054 assert(SubRegIdx != RISCV::NoSubRegister);
10055 InterSubVT = getLMUL1VT(VecVT);
10056 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10057 }
10058
10059 // Slide this vector register down by the desired number of elements in order
10060 // to place the desired subvector starting at element 0.
10061 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10062 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10063 if (SubVecVT.isFixedLengthVector())
10064 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10065 Subtarget);
10066 SDValue Slidedown =
10067 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10068 Vec, SlidedownAmt, Mask, VL);
10069
10070 // Now the vector is in the right position, extract our final subvector. This
10071 // should resolve to a COPY.
10072 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10073 DAG.getVectorIdxConstant(0, DL));
10074
10075 // We might have bitcast from a mask type: cast back to the original type if
10076 // required.
10077 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10078}
10079
10080// Widen a vector's operands to i8, then truncate its results back to the
10081// original type, typically i1. All operand and result types must be the same.
10083 SelectionDAG &DAG) {
10084 MVT VT = N.getSimpleValueType();
10085 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10087 for (SDValue Op : N->ops()) {
10088 assert(Op.getSimpleValueType() == VT &&
10089 "Operands and result must be same type");
10090 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10091 }
10092
10093 unsigned NumVals = N->getNumValues();
10094
10096 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10097 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10098 SmallVector<SDValue, 4> TruncVals;
10099 for (unsigned I = 0; I < NumVals; I++) {
10100 TruncVals.push_back(
10101 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10102 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10103 }
10104
10105 if (TruncVals.size() > 1)
10106 return DAG.getMergeValues(TruncVals, DL);
10107 return TruncVals.front();
10108}
10109
10110SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10111 SelectionDAG &DAG) const {
10112 SDLoc DL(Op);
10113 MVT VecVT = Op.getSimpleValueType();
10114
10115 assert(VecVT.isScalableVector() &&
10116 "vector_interleave on non-scalable vector!");
10117
10118 // 1 bit element vectors need to be widened to e8
10119 if (VecVT.getVectorElementType() == MVT::i1)
10120 return widenVectorOpsToi8(Op, DL, DAG);
10121
10122 // If the VT is LMUL=8, we need to split and reassemble.
10123 if (VecVT.getSizeInBits().getKnownMinValue() ==
10124 (8 * RISCV::RVVBitsPerBlock)) {
10125 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10126 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10127 EVT SplitVT = Op0Lo.getValueType();
10128
10130 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10132 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10133
10134 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10135 ResLo.getValue(0), ResHi.getValue(0));
10136 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10137 ResHi.getValue(1));
10138 return DAG.getMergeValues({Even, Odd}, DL);
10139 }
10140
10141 // Concatenate the two vectors as one vector to deinterleave
10142 MVT ConcatVT =
10145 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10146 Op.getOperand(0), Op.getOperand(1));
10147
10148 // We want to operate on all lanes, so get the mask and VL and mask for it
10149 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10150 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10151
10152 // We can deinterleave through vnsrl.wi if the element type is smaller than
10153 // ELEN
10154 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10155 SDValue Even =
10156 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10157 SDValue Odd =
10158 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10159 return DAG.getMergeValues({Even, Odd}, DL);
10160 }
10161
10162 // For the indices, use the same SEW to avoid an extra vsetvli
10163 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10164 // Create a vector of even indices {0, 2, 4, ...}
10165 SDValue EvenIdx =
10166 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10167 // Create a vector of odd indices {1, 3, 5, ... }
10168 SDValue OddIdx =
10169 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10170
10171 // Gather the even and odd elements into two separate vectors
10172 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10173 Concat, EvenIdx, Passthru, Mask, VL);
10174 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10175 Concat, OddIdx, Passthru, Mask, VL);
10176
10177 // Extract the result half of the gather for even and odd
10178 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10179 DAG.getVectorIdxConstant(0, DL));
10180 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10181 DAG.getVectorIdxConstant(0, DL));
10182
10183 return DAG.getMergeValues({Even, Odd}, DL);
10184}
10185
10186SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10187 SelectionDAG &DAG) const {
10188 SDLoc DL(Op);
10189 MVT VecVT = Op.getSimpleValueType();
10190
10191 assert(VecVT.isScalableVector() &&
10192 "vector_interleave on non-scalable vector!");
10193
10194 // i1 vectors need to be widened to i8
10195 if (VecVT.getVectorElementType() == MVT::i1)
10196 return widenVectorOpsToi8(Op, DL, DAG);
10197
10198 MVT XLenVT = Subtarget.getXLenVT();
10199 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10200
10201 // If the VT is LMUL=8, we need to split and reassemble.
10202 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10203 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10204 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10205 EVT SplitVT = Op0Lo.getValueType();
10206
10208 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10210 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10211
10212 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10213 ResLo.getValue(0), ResLo.getValue(1));
10214 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10215 ResHi.getValue(0), ResHi.getValue(1));
10216 return DAG.getMergeValues({Lo, Hi}, DL);
10217 }
10218
10219 SDValue Interleaved;
10220
10221 // If the element type is smaller than ELEN, then we can interleave with
10222 // vwaddu.vv and vwmaccu.vx
10223 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10224 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10225 DAG, Subtarget);
10226 } else {
10227 // Otherwise, fallback to using vrgathere16.vv
10228 MVT ConcatVT =
10231 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10232 Op.getOperand(0), Op.getOperand(1));
10233
10234 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10235
10236 // 0 1 2 3 4 5 6 7 ...
10237 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10238
10239 // 1 1 1 1 1 1 1 1 ...
10240 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10241
10242 // 1 0 1 0 1 0 1 0 ...
10243 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10244 OddMask = DAG.getSetCC(
10245 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10246 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10248
10249 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10250
10251 // Build up the index vector for interleaving the concatenated vector
10252 // 0 0 1 1 2 2 3 3 ...
10253 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10254 // 0 n 1 n+1 2 n+2 3 n+3 ...
10255 Idx =
10256 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10257
10258 // Then perform the interleave
10259 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10260 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10261 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10262 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10263 }
10264
10265 // Extract the two halves from the interleaved result
10266 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10267 DAG.getVectorIdxConstant(0, DL));
10268 SDValue Hi = DAG.getNode(
10269 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10271
10272 return DAG.getMergeValues({Lo, Hi}, DL);
10273}
10274
10275// Lower step_vector to the vid instruction. Any non-identity step value must
10276// be accounted for my manual expansion.
10277SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10278 SelectionDAG &DAG) const {
10279 SDLoc DL(Op);
10280 MVT VT = Op.getSimpleValueType();
10281 assert(VT.isScalableVector() && "Expected scalable vector");
10282 MVT XLenVT = Subtarget.getXLenVT();
10283 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10284 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10285 uint64_t StepValImm = Op.getConstantOperandVal(0);
10286 if (StepValImm != 1) {
10287 if (isPowerOf2_64(StepValImm)) {
10288 SDValue StepVal =
10289 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10290 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10291 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10292 } else {
10293 SDValue StepVal = lowerScalarSplat(
10294 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10295 VL, VT, DL, DAG, Subtarget);
10296 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10297 }
10298 }
10299 return StepVec;
10300}
10301
10302// Implement vector_reverse using vrgather.vv with indices determined by
10303// subtracting the id of each element from (VLMAX-1). This will convert
10304// the indices like so:
10305// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10306// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10307SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10308 SelectionDAG &DAG) const {
10309 SDLoc DL(Op);
10310 MVT VecVT = Op.getSimpleValueType();
10311 if (VecVT.getVectorElementType() == MVT::i1) {
10312 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10313 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10314 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10315 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10316 }
10317 unsigned EltSize = VecVT.getScalarSizeInBits();
10318 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10319 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10320 unsigned MaxVLMAX =
10321 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10322
10323 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10324 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10325
10326 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10327 // to use vrgatherei16.vv.
10328 // TODO: It's also possible to use vrgatherei16.vv for other types to
10329 // decrease register width for the index calculation.
10330 if (MaxVLMAX > 256 && EltSize == 8) {
10331 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10332 // Reverse each half, then reassemble them in reverse order.
10333 // NOTE: It's also possible that after splitting that VLMAX no longer
10334 // requires vrgatherei16.vv.
10335 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10336 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10337 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10338 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10339 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10340 // Reassemble the low and high pieces reversed.
10341 // FIXME: This is a CONCAT_VECTORS.
10342 SDValue Res =
10343 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10344 DAG.getVectorIdxConstant(0, DL));
10345 return DAG.getNode(
10346 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10347 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10348 }
10349
10350 // Just promote the int type to i16 which will double the LMUL.
10351 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10352 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10353 }
10354
10355 MVT XLenVT = Subtarget.getXLenVT();
10356 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10357
10358 // Calculate VLMAX-1 for the desired SEW.
10359 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10360 computeVLMax(VecVT, DL, DAG),
10361 DAG.getConstant(1, DL, XLenVT));
10362
10363 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10364 bool IsRV32E64 =
10365 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10366 SDValue SplatVL;
10367 if (!IsRV32E64)
10368 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10369 else
10370 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10371 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10372
10373 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10374 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10375 DAG.getUNDEF(IntVT), Mask, VL);
10376
10377 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10378 DAG.getUNDEF(VecVT), Mask, VL);
10379}
10380
10381SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10382 SelectionDAG &DAG) const {
10383 SDLoc DL(Op);
10384 SDValue V1 = Op.getOperand(0);
10385 SDValue V2 = Op.getOperand(1);
10386 MVT XLenVT = Subtarget.getXLenVT();
10387 MVT VecVT = Op.getSimpleValueType();
10388
10389 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10390
10391 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10392 SDValue DownOffset, UpOffset;
10393 if (ImmValue >= 0) {
10394 // The operand is a TargetConstant, we need to rebuild it as a regular
10395 // constant.
10396 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10397 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10398 } else {
10399 // The operand is a TargetConstant, we need to rebuild it as a regular
10400 // constant rather than negating the original operand.
10401 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10402 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10403 }
10404
10405 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10406
10407 SDValue SlideDown =
10408 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10409 DownOffset, TrueMask, UpOffset);
10410 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10411 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10413}
10414
10415SDValue
10416RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10417 SelectionDAG &DAG) const {
10418 SDLoc DL(Op);
10419 auto *Load = cast<LoadSDNode>(Op);
10420
10422 Load->getMemoryVT(),
10423 *Load->getMemOperand()) &&
10424 "Expecting a correctly-aligned load");
10425
10426 MVT VT = Op.getSimpleValueType();
10427 MVT XLenVT = Subtarget.getXLenVT();
10428 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10429
10430 // If we know the exact VLEN and our fixed length vector completely fills
10431 // the container, use a whole register load instead.
10432 const auto [MinVLMAX, MaxVLMAX] =
10433 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10434 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10435 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10436 MachineMemOperand *MMO = Load->getMemOperand();
10437 SDValue NewLoad =
10438 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10439 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10440 MMO->getAAInfo(), MMO->getRanges());
10441 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10442 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10443 }
10444
10445 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10446
10447 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10448 SDValue IntID = DAG.getTargetConstant(
10449 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10450 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10451 if (!IsMaskOp)
10452 Ops.push_back(DAG.getUNDEF(ContainerVT));
10453 Ops.push_back(Load->getBasePtr());
10454 Ops.push_back(VL);
10455 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10456 SDValue NewLoad =
10458 Load->getMemoryVT(), Load->getMemOperand());
10459
10460 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10461 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10462}
10463
10464SDValue
10465RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10466 SelectionDAG &DAG) const {
10467 SDLoc DL(Op);
10468 auto *Store = cast<StoreSDNode>(Op);
10469
10471 Store->getMemoryVT(),
10472 *Store->getMemOperand()) &&
10473 "Expecting a correctly-aligned store");
10474
10475 SDValue StoreVal = Store->getValue();
10476 MVT VT = StoreVal.getSimpleValueType();
10477 MVT XLenVT = Subtarget.getXLenVT();
10478
10479 // If the size less than a byte, we need to pad with zeros to make a byte.
10480 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10481 VT = MVT::v8i1;
10482 StoreVal =
10483 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10484 StoreVal, DAG.getVectorIdxConstant(0, DL));
10485 }
10486
10487 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10488
10489 SDValue NewValue =
10490 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10491
10492
10493 // If we know the exact VLEN and our fixed length vector completely fills
10494 // the container, use a whole register store instead.
10495 const auto [MinVLMAX, MaxVLMAX] =
10496 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10497 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10498 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10499 MachineMemOperand *MMO = Store->getMemOperand();
10500 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10501 MMO->getPointerInfo(), MMO->getBaseAlign(),
10502 MMO->getFlags(), MMO->getAAInfo());
10503 }
10504
10505 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10506 Subtarget);
10507
10508 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10509 SDValue IntID = DAG.getTargetConstant(
10510 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10511 return DAG.getMemIntrinsicNode(
10512 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10513 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10514 Store->getMemoryVT(), Store->getMemOperand());
10515}
10516
10517SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10518 SelectionDAG &DAG) const {
10519 SDLoc DL(Op);
10520 MVT VT = Op.getSimpleValueType();
10521
10522 const auto *MemSD = cast<MemSDNode>(Op);
10523 EVT MemVT = MemSD->getMemoryVT();
10524 MachineMemOperand *MMO = MemSD->getMemOperand();
10525 SDValue Chain = MemSD->getChain();
10526 SDValue BasePtr = MemSD->getBasePtr();
10527
10528 SDValue Mask, PassThru, VL;
10529 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10530 Mask = VPLoad->getMask();
10531 PassThru = DAG.getUNDEF(VT);
10532 VL = VPLoad->getVectorLength();
10533 } else {
10534 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10535 Mask = MLoad->getMask();
10536 PassThru = MLoad->getPassThru();
10537 }
10538
10539 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10540
10541 MVT XLenVT = Subtarget.getXLenVT();
10542
10543 MVT ContainerVT = VT;
10544 if (VT.isFixedLengthVector()) {
10545 ContainerVT = getContainerForFixedLengthVector(VT);
10546 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10547 if (!IsUnmasked) {
10548 MVT MaskVT = getMaskTypeFor(ContainerVT);
10549 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10550 }
10551 }
10552
10553 if (!VL)
10554 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10555
10556 unsigned IntID =
10557 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10558 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10559 if (IsUnmasked)
10560 Ops.push_back(DAG.getUNDEF(ContainerVT));
10561 else
10562 Ops.push_back(PassThru);
10563 Ops.push_back(BasePtr);
10564 if (!IsUnmasked)
10565 Ops.push_back(Mask);
10566 Ops.push_back(VL);
10567 if (!IsUnmasked)
10569
10570 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10571
10572 SDValue Result =
10573 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10574 Chain = Result.getValue(1);
10575
10576 if (VT.isFixedLengthVector())
10577 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10578
10579 return DAG.getMergeValues({Result, Chain}, DL);
10580}
10581
10582SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10583 SelectionDAG &DAG) const {
10584 SDLoc DL(Op);
10585
10586 const auto *MemSD = cast<MemSDNode>(Op);
10587 EVT MemVT = MemSD->getMemoryVT();
10588 MachineMemOperand *MMO = MemSD->getMemOperand();
10589 SDValue Chain = MemSD->getChain();
10590 SDValue BasePtr = MemSD->getBasePtr();
10591 SDValue Val, Mask, VL;
10592
10593 bool IsCompressingStore = false;
10594 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10595 Val = VPStore->getValue();
10596 Mask = VPStore->getMask();
10597 VL = VPStore->getVectorLength();
10598 } else {
10599 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10600 Val = MStore->getValue();
10601 Mask = MStore->getMask();
10602 IsCompressingStore = MStore->isCompressingStore();
10603 }
10604
10605 bool IsUnmasked =
10606 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10607
10608 MVT VT = Val.getSimpleValueType();
10609 MVT XLenVT = Subtarget.getXLenVT();
10610
10611 MVT ContainerVT = VT;
10612 if (VT.isFixedLengthVector()) {
10613 ContainerVT = getContainerForFixedLengthVector(VT);
10614
10615 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10616 if (!IsUnmasked || IsCompressingStore) {
10617 MVT MaskVT = getMaskTypeFor(ContainerVT);
10618 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10619 }
10620 }
10621
10622 if (!VL)
10623 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10624
10625 if (IsCompressingStore) {
10626 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10628 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10629 VL =
10630 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10631 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10632 }
10633
10634 unsigned IntID =
10635 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10636 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10637 Ops.push_back(Val);
10638 Ops.push_back(BasePtr);
10639 if (!IsUnmasked)
10640 Ops.push_back(Mask);
10641 Ops.push_back(VL);
10642
10644 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10645}
10646
10647SDValue
10648RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10649 SelectionDAG &DAG) const {
10650 MVT InVT = Op.getOperand(0).getSimpleValueType();
10651 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10652
10653 MVT VT = Op.getSimpleValueType();
10654
10655 SDValue Op1 =
10656 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10657 SDValue Op2 =
10658 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10659
10660 SDLoc DL(Op);
10661 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10662 DAG, Subtarget);
10663 MVT MaskVT = getMaskTypeFor(ContainerVT);
10664
10665 SDValue Cmp =
10666 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10667 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10668
10669 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10670}
10671
10672SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10673 SelectionDAG &DAG) const {
10674 unsigned Opc = Op.getOpcode();
10675 SDLoc DL(Op);
10676 SDValue Chain = Op.getOperand(0);
10677 SDValue Op1 = Op.getOperand(1);
10678 SDValue Op2 = Op.getOperand(2);
10679 SDValue CC = Op.getOperand(3);
10680 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10681 MVT VT = Op.getSimpleValueType();
10682 MVT InVT = Op1.getSimpleValueType();
10683
10684 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10685 // condition code.
10686 if (Opc == ISD::STRICT_FSETCCS) {
10687 // Expand strict_fsetccs(x, oeq) to
10688 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10689 SDVTList VTList = Op->getVTList();
10690 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10691 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10692 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10693 Op2, OLECCVal);
10694 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10695 Op1, OLECCVal);
10696 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10697 Tmp1.getValue(1), Tmp2.getValue(1));
10698 // Tmp1 and Tmp2 might be the same node.
10699 if (Tmp1 != Tmp2)
10700 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10701 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10702 }
10703
10704 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10705 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10706 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10707 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10708 Op2, OEQCCVal);
10709 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10710 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10711 }
10712 }
10713
10714 MVT ContainerInVT = InVT;
10715 if (InVT.isFixedLengthVector()) {
10716 ContainerInVT = getContainerForFixedLengthVector(InVT);
10717 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10718 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10719 }
10720 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10721
10722 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10723
10724 SDValue Res;
10725 if (Opc == ISD::STRICT_FSETCC &&
10726 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10727 CCVal == ISD::SETOLE)) {
10728 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10729 // active when both input elements are ordered.
10730 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10731 SDValue OrderMask1 = DAG.getNode(
10732 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10733 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10734 True, VL});
10735 SDValue OrderMask2 = DAG.getNode(
10736 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10737 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10738 True, VL});
10739 Mask =
10740 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10741 // Use Mask as the merge operand to let the result be 0 if either of the
10742 // inputs is unordered.
10744 DAG.getVTList(MaskVT, MVT::Other),
10745 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10746 } else {
10747 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10749 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10750 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10751 }
10752
10753 if (VT.isFixedLengthVector()) {
10754 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10755 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10756 }
10757 return Res;
10758}
10759
10760// Lower vector ABS to smax(X, sub(0, X)).
10761SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10762 SDLoc DL(Op);
10763 MVT VT = Op.getSimpleValueType();
10764 SDValue X = Op.getOperand(0);
10765
10766 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10767 "Unexpected type for ISD::ABS");
10768
10769 MVT ContainerVT = VT;
10770 if (VT.isFixedLengthVector()) {
10771 ContainerVT = getContainerForFixedLengthVector(VT);
10772 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10773 }
10774
10775 SDValue Mask, VL;
10776 if (Op->getOpcode() == ISD::VP_ABS) {
10777 Mask = Op->getOperand(1);
10778 if (VT.isFixedLengthVector())
10779 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10780 Subtarget);
10781 VL = Op->getOperand(2);
10782 } else
10783 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10784
10785 SDValue SplatZero = DAG.getNode(
10786 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10787 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10788 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10789 DAG.getUNDEF(ContainerVT), Mask, VL);
10790 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10791 DAG.getUNDEF(ContainerVT), Mask, VL);
10792
10793 if (VT.isFixedLengthVector())
10794 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10795 return Max;
10796}
10797
10798SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10799 SDValue Op, SelectionDAG &DAG) const {
10800 SDLoc DL(Op);
10801 MVT VT = Op.getSimpleValueType();
10802 SDValue Mag = Op.getOperand(0);
10803 SDValue Sign = Op.getOperand(1);
10804 assert(Mag.getValueType() == Sign.getValueType() &&
10805 "Can only handle COPYSIGN with matching types.");
10806
10807 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10808 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10809 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10810
10811 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10812
10813 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10814 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10815
10816 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10817}
10818
10819SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10820 SDValue Op, SelectionDAG &DAG) const {
10821 MVT VT = Op.getSimpleValueType();
10822 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10823
10824 MVT I1ContainerVT =
10825 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10826
10827 SDValue CC =
10828 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10829 SDValue Op1 =
10830 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10831 SDValue Op2 =
10832 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10833
10834 SDLoc DL(Op);
10835 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10836
10837 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10838 Op2, DAG.getUNDEF(ContainerVT), VL);
10839
10840 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10841}
10842
10843SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10844 SelectionDAG &DAG) const {
10845 unsigned NewOpc = getRISCVVLOp(Op);
10846 bool HasMergeOp = hasMergeOp(NewOpc);
10847 bool HasMask = hasMaskOp(NewOpc);
10848
10849 MVT VT = Op.getSimpleValueType();
10850 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10851
10852 // Create list of operands by converting existing ones to scalable types.
10854 for (const SDValue &V : Op->op_values()) {
10855 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10856
10857 // Pass through non-vector operands.
10858 if (!V.getValueType().isVector()) {
10859 Ops.push_back(V);
10860 continue;
10861 }
10862
10863 // "cast" fixed length vector to a scalable vector.
10864 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10865 "Only fixed length vectors are supported!");
10866 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10867 }
10868
10869 SDLoc DL(Op);
10870 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10871 if (HasMergeOp)
10872 Ops.push_back(DAG.getUNDEF(ContainerVT));
10873 if (HasMask)
10874 Ops.push_back(Mask);
10875 Ops.push_back(VL);
10876
10877 // StrictFP operations have two result values. Their lowered result should
10878 // have same result count.
10879 if (Op->isStrictFPOpcode()) {
10880 SDValue ScalableRes =
10881 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10882 Op->getFlags());
10883 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10884 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10885 }
10886
10887 SDValue ScalableRes =
10888 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10889 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10890}
10891
10892// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10893// * Operands of each node are assumed to be in the same order.
10894// * The EVL operand is promoted from i32 to i64 on RV64.
10895// * Fixed-length vectors are converted to their scalable-vector container
10896// types.
10897SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10898 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10899 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10900
10901 SDLoc DL(Op);
10902 MVT VT = Op.getSimpleValueType();
10904
10905 MVT ContainerVT = VT;
10906 if (VT.isFixedLengthVector())
10907 ContainerVT = getContainerForFixedLengthVector(VT);
10908
10909 for (const auto &OpIdx : enumerate(Op->ops())) {
10910 SDValue V = OpIdx.value();
10911 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10912 // Add dummy merge value before the mask. Or if there isn't a mask, before
10913 // EVL.
10914 if (HasMergeOp) {
10915 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10916 if (MaskIdx) {
10917 if (*MaskIdx == OpIdx.index())
10918 Ops.push_back(DAG.getUNDEF(ContainerVT));
10919 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10920 OpIdx.index()) {
10921 if (Op.getOpcode() == ISD::VP_MERGE) {
10922 // For VP_MERGE, copy the false operand instead of an undef value.
10923 Ops.push_back(Ops.back());
10924 } else {
10925 assert(Op.getOpcode() == ISD::VP_SELECT);
10926 // For VP_SELECT, add an undef value.
10927 Ops.push_back(DAG.getUNDEF(ContainerVT));
10928 }
10929 }
10930 }
10931 // Pass through operands which aren't fixed-length vectors.
10932 if (!V.getValueType().isFixedLengthVector()) {
10933 Ops.push_back(V);
10934 continue;
10935 }
10936 // "cast" fixed length vector to a scalable vector.
10937 MVT OpVT = V.getSimpleValueType();
10938 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10939 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10940 "Only fixed length vectors are supported!");
10941 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10942 }
10943
10944 if (!VT.isFixedLengthVector())
10945 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10946
10947 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10948
10949 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10950}
10951
10952SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10953 SelectionDAG &DAG) const {
10954 SDLoc DL(Op);
10955 MVT VT = Op.getSimpleValueType();
10956
10957 SDValue Src = Op.getOperand(0);
10958 // NOTE: Mask is dropped.
10959 SDValue VL = Op.getOperand(2);
10960
10961 MVT ContainerVT = VT;
10962 if (VT.isFixedLengthVector()) {
10963 ContainerVT = getContainerForFixedLengthVector(VT);
10964 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10965 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10966 }
10967
10968 MVT XLenVT = Subtarget.getXLenVT();
10969 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10970 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10971 DAG.getUNDEF(ContainerVT), Zero, VL);
10972
10973 SDValue SplatValue = DAG.getConstant(
10974 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10975 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10976 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10977
10978 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10979 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10980 if (!VT.isFixedLengthVector())
10981 return Result;
10982 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10983}
10984
10985SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10986 SelectionDAG &DAG) const {
10987 SDLoc DL(Op);
10988 MVT VT = Op.getSimpleValueType();
10989
10990 SDValue Op1 = Op.getOperand(0);
10991 SDValue Op2 = Op.getOperand(1);
10992 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10993 // NOTE: Mask is dropped.
10994 SDValue VL = Op.getOperand(4);
10995
10996 MVT ContainerVT = VT;
10997 if (VT.isFixedLengthVector()) {
10998 ContainerVT = getContainerForFixedLengthVector(VT);
10999 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11000 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11001 }
11002
11004 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11005
11006 switch (Condition) {
11007 default:
11008 break;
11009 // X != Y --> (X^Y)
11010 case ISD::SETNE:
11011 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11012 break;
11013 // X == Y --> ~(X^Y)
11014 case ISD::SETEQ: {
11015 SDValue Temp =
11016 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11017 Result =
11018 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11019 break;
11020 }
11021 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11022 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11023 case ISD::SETGT:
11024 case ISD::SETULT: {
11025 SDValue Temp =
11026 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11027 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11028 break;
11029 }
11030 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11031 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11032 case ISD::SETLT:
11033 case ISD::SETUGT: {
11034 SDValue Temp =
11035 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11036 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11037 break;
11038 }
11039 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11040 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11041 case ISD::SETGE:
11042 case ISD::SETULE: {
11043 SDValue Temp =
11044 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11045 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11046 break;
11047 }
11048 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11049 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11050 case ISD::SETLE:
11051 case ISD::SETUGE: {
11052 SDValue Temp =
11053 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11054 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11055 break;
11056 }
11057 }
11058
11059 if (!VT.isFixedLengthVector())
11060 return Result;
11061 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11062}
11063
11064// Lower Floating-Point/Integer Type-Convert VP SDNodes
11065SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11066 SelectionDAG &DAG) const {
11067 SDLoc DL(Op);
11068
11069 SDValue Src = Op.getOperand(0);
11070 SDValue Mask = Op.getOperand(1);
11071 SDValue VL = Op.getOperand(2);
11072 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11073
11074 MVT DstVT = Op.getSimpleValueType();
11075 MVT SrcVT = Src.getSimpleValueType();
11076 if (DstVT.isFixedLengthVector()) {
11077 DstVT = getContainerForFixedLengthVector(DstVT);
11078 SrcVT = getContainerForFixedLengthVector(SrcVT);
11079 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11080 MVT MaskVT = getMaskTypeFor(DstVT);
11081 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11082 }
11083
11084 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11085 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11086
11088 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11089 if (SrcVT.isInteger()) {
11090 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11091
11092 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11095
11096 // Do we need to do any pre-widening before converting?
11097 if (SrcEltSize == 1) {
11098 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11099 MVT XLenVT = Subtarget.getXLenVT();
11100 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11101 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11102 DAG.getUNDEF(IntVT), Zero, VL);
11103 SDValue One = DAG.getConstant(
11104 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11105 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11106 DAG.getUNDEF(IntVT), One, VL);
11107 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11108 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11109 } else if (DstEltSize > (2 * SrcEltSize)) {
11110 // Widen before converting.
11111 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11112 DstVT.getVectorElementCount());
11113 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11114 }
11115
11116 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11117 } else {
11118 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11119 "Wrong input/output vector types");
11120
11121 // Convert f16 to f32 then convert f32 to i64.
11122 if (DstEltSize > (2 * SrcEltSize)) {
11123 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11124 MVT InterimFVT =
11125 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11126 Src =
11127 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11128 }
11129
11130 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11131 }
11132 } else { // Narrowing + Conversion
11133 if (SrcVT.isInteger()) {
11134 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11135 // First do a narrowing convert to an FP type half the size, then round
11136 // the FP type to a small FP type if needed.
11137
11138 MVT InterimFVT = DstVT;
11139 if (SrcEltSize > (2 * DstEltSize)) {
11140 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11141 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11142 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11143 }
11144
11145 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11146
11147 if (InterimFVT != DstVT) {
11148 Src = Result;
11149 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11150 }
11151 } else {
11152 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11153 "Wrong input/output vector types");
11154 // First do a narrowing conversion to an integer half the size, then
11155 // truncate if needed.
11156
11157 if (DstEltSize == 1) {
11158 // First convert to the same size integer, then convert to mask using
11159 // setcc.
11160 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11161 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11162 DstVT.getVectorElementCount());
11163 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11164
11165 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11166 // otherwise the conversion was undefined.
11167 MVT XLenVT = Subtarget.getXLenVT();
11168 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11169 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11170 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11171 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11172 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11173 DAG.getUNDEF(DstVT), Mask, VL});
11174 } else {
11175 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11176 DstVT.getVectorElementCount());
11177
11178 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11179
11180 while (InterimIVT != DstVT) {
11181 SrcEltSize /= 2;
11182 Src = Result;
11183 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11184 DstVT.getVectorElementCount());
11185 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11186 Src, Mask, VL);
11187 }
11188 }
11189 }
11190 }
11191
11192 MVT VT = Op.getSimpleValueType();
11193 if (!VT.isFixedLengthVector())
11194 return Result;
11195 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11196}
11197
11198SDValue
11199RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11200 SelectionDAG &DAG) const {
11201 SDLoc DL(Op);
11202
11203 SDValue Op1 = Op.getOperand(0);
11204 SDValue Op2 = Op.getOperand(1);
11205 SDValue Offset = Op.getOperand(2);
11206 SDValue Mask = Op.getOperand(3);
11207 SDValue EVL1 = Op.getOperand(4);
11208 SDValue EVL2 = Op.getOperand(5);
11209
11210 const MVT XLenVT = Subtarget.getXLenVT();
11211 MVT VT = Op.getSimpleValueType();
11212 MVT ContainerVT = VT;
11213 if (VT.isFixedLengthVector()) {
11214 ContainerVT = getContainerForFixedLengthVector(VT);
11215 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11216 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11217 MVT MaskVT = getMaskTypeFor(ContainerVT);
11218 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11219 }
11220
11221 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11222 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11223
11224 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11225 if (IsMaskVector) {
11226 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11227
11228 // Expand input operands
11229 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11230 DAG.getUNDEF(ContainerVT),
11231 DAG.getConstant(1, DL, XLenVT), EVL1);
11232 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11233 DAG.getUNDEF(ContainerVT),
11234 DAG.getConstant(0, DL, XLenVT), EVL1);
11235 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11236 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11237
11238 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11239 DAG.getUNDEF(ContainerVT),
11240 DAG.getConstant(1, DL, XLenVT), EVL2);
11241 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11242 DAG.getUNDEF(ContainerVT),
11243 DAG.getConstant(0, DL, XLenVT), EVL2);
11244 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11245 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11246 }
11247
11248 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11249 SDValue DownOffset, UpOffset;
11250 if (ImmValue >= 0) {
11251 // The operand is a TargetConstant, we need to rebuild it as a regular
11252 // constant.
11253 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11254 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11255 } else {
11256 // The operand is a TargetConstant, we need to rebuild it as a regular
11257 // constant rather than negating the original operand.
11258 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11259 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11260 }
11261
11262 SDValue SlideDown =
11263 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11264 Op1, DownOffset, Mask, UpOffset);
11265 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11266 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11267
11268 if (IsMaskVector) {
11269 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11270 Result = DAG.getNode(
11271 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11272 {Result, DAG.getConstant(0, DL, ContainerVT),
11273 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11274 Mask, EVL2});
11275 }
11276
11277 if (!VT.isFixedLengthVector())
11278 return Result;
11279 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11280}
11281
11282SDValue
11283RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11284 SelectionDAG &DAG) const {
11285 SDLoc DL(Op);
11286 MVT VT = Op.getSimpleValueType();
11287 MVT XLenVT = Subtarget.getXLenVT();
11288
11289 SDValue Op1 = Op.getOperand(0);
11290 SDValue Mask = Op.getOperand(1);
11291 SDValue EVL = Op.getOperand(2);
11292
11293 MVT ContainerVT = VT;
11294 if (VT.isFixedLengthVector()) {
11295 ContainerVT = getContainerForFixedLengthVector(VT);
11296 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11297 MVT MaskVT = getMaskTypeFor(ContainerVT);
11298 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11299 }
11300
11301 MVT GatherVT = ContainerVT;
11302 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11303 // Check if we are working with mask vectors
11304 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11305 if (IsMaskVector) {
11306 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11307
11308 // Expand input operand
11309 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11310 DAG.getUNDEF(IndicesVT),
11311 DAG.getConstant(1, DL, XLenVT), EVL);
11312 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11313 DAG.getUNDEF(IndicesVT),
11314 DAG.getConstant(0, DL, XLenVT), EVL);
11315 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11316 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11317 }
11318
11319 unsigned EltSize = GatherVT.getScalarSizeInBits();
11320 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11321 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11322 unsigned MaxVLMAX =
11323 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11324
11325 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11326 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11327 // to use vrgatherei16.vv.
11328 // TODO: It's also possible to use vrgatherei16.vv for other types to
11329 // decrease register width for the index calculation.
11330 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11331 if (MaxVLMAX > 256 && EltSize == 8) {
11332 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11333 // Split the vector in half and reverse each half using a full register
11334 // reverse.
11335 // Swap the halves and concatenate them.
11336 // Slide the concatenated result by (VLMax - VL).
11337 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11338 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11339 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11340
11341 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11342 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11343
11344 // Reassemble the low and high pieces reversed.
11345 // NOTE: this Result is unmasked (because we do not need masks for
11346 // shuffles). If in the future this has to change, we can use a SELECT_VL
11347 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11348 SDValue Result =
11349 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11350
11351 // Slide off any elements from past EVL that were reversed into the low
11352 // elements.
11353 unsigned MinElts = GatherVT.getVectorMinNumElements();
11354 SDValue VLMax =
11355 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11356 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11357
11358 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11359 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11360
11361 if (IsMaskVector) {
11362 // Truncate Result back to a mask vector
11363 Result =
11364 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11365 {Result, DAG.getConstant(0, DL, GatherVT),
11367 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11368 }
11369
11370 if (!VT.isFixedLengthVector())
11371 return Result;
11372 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11373 }
11374
11375 // Just promote the int type to i16 which will double the LMUL.
11376 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11377 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11378 }
11379
11380 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11381 SDValue VecLen =
11382 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11383 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11384 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11385 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11386 DAG.getUNDEF(IndicesVT), Mask, EVL);
11387 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11388 DAG.getUNDEF(GatherVT), Mask, EVL);
11389
11390 if (IsMaskVector) {
11391 // Truncate Result back to a mask vector
11392 Result = DAG.getNode(
11393 RISCVISD::SETCC_VL, DL, ContainerVT,
11394 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11395 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11396 }
11397
11398 if (!VT.isFixedLengthVector())
11399 return Result;
11400 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11401}
11402
11403SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11404 SelectionDAG &DAG) const {
11405 MVT VT = Op.getSimpleValueType();
11406 if (VT.getVectorElementType() != MVT::i1)
11407 return lowerVPOp(Op, DAG);
11408
11409 // It is safe to drop mask parameter as masked-off elements are undef.
11410 SDValue Op1 = Op->getOperand(0);
11411 SDValue Op2 = Op->getOperand(1);
11412 SDValue VL = Op->getOperand(3);
11413
11414 MVT ContainerVT = VT;
11415 const bool IsFixed = VT.isFixedLengthVector();
11416 if (IsFixed) {
11417 ContainerVT = getContainerForFixedLengthVector(VT);
11418 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11419 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11420 }
11421
11422 SDLoc DL(Op);
11423 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11424 if (!IsFixed)
11425 return Val;
11426 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11427}
11428
11429SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11430 SelectionDAG &DAG) const {
11431 SDLoc DL(Op);
11432 MVT XLenVT = Subtarget.getXLenVT();
11433 MVT VT = Op.getSimpleValueType();
11434 MVT ContainerVT = VT;
11435 if (VT.isFixedLengthVector())
11436 ContainerVT = getContainerForFixedLengthVector(VT);
11437
11438 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11439
11440 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11441 // Check if the mask is known to be all ones
11442 SDValue Mask = VPNode->getMask();
11443 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11444
11445 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11446 : Intrinsic::riscv_vlse_mask,
11447 DL, XLenVT);
11448 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11449 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11450 VPNode->getStride()};
11451 if (!IsUnmasked) {
11452 if (VT.isFixedLengthVector()) {
11453 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11454 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11455 }
11456 Ops.push_back(Mask);
11457 }
11458 Ops.push_back(VPNode->getVectorLength());
11459 if (!IsUnmasked) {
11460 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11461 Ops.push_back(Policy);
11462 }
11463
11464 SDValue Result =
11466 VPNode->getMemoryVT(), VPNode->getMemOperand());
11467 SDValue Chain = Result.getValue(1);
11468
11469 if (VT.isFixedLengthVector())
11470 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11471
11472 return DAG.getMergeValues({Result, Chain}, DL);
11473}
11474
11475SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11476 SelectionDAG &DAG) const {
11477 SDLoc DL(Op);
11478 MVT XLenVT = Subtarget.getXLenVT();
11479
11480 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11481 SDValue StoreVal = VPNode->getValue();
11482 MVT VT = StoreVal.getSimpleValueType();
11483 MVT ContainerVT = VT;
11484 if (VT.isFixedLengthVector()) {
11485 ContainerVT = getContainerForFixedLengthVector(VT);
11486 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11487 }
11488
11489 // Check if the mask is known to be all ones
11490 SDValue Mask = VPNode->getMask();
11491 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11492
11493 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11494 : Intrinsic::riscv_vsse_mask,
11495 DL, XLenVT);
11496 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11497 VPNode->getBasePtr(), VPNode->getStride()};
11498 if (!IsUnmasked) {
11499 if (VT.isFixedLengthVector()) {
11500 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11501 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11502 }
11503 Ops.push_back(Mask);
11504 }
11505 Ops.push_back(VPNode->getVectorLength());
11506
11507 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11508 Ops, VPNode->getMemoryVT(),
11509 VPNode->getMemOperand());
11510}
11511
11512// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11513// matched to a RVV indexed load. The RVV indexed load instructions only
11514// support the "unsigned unscaled" addressing mode; indices are implicitly
11515// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11516// signed or scaled indexing is extended to the XLEN value type and scaled
11517// accordingly.
11518SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11519 SelectionDAG &DAG) const {
11520 SDLoc DL(Op);
11521 MVT VT = Op.getSimpleValueType();
11522
11523 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11524 EVT MemVT = MemSD->getMemoryVT();
11525 MachineMemOperand *MMO = MemSD->getMemOperand();
11526 SDValue Chain = MemSD->getChain();
11527 SDValue BasePtr = MemSD->getBasePtr();
11528
11529 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11530 SDValue Index, Mask, PassThru, VL;
11531
11532 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11533 Index = VPGN->getIndex();
11534 Mask = VPGN->getMask();
11535 PassThru = DAG.getUNDEF(VT);
11536 VL = VPGN->getVectorLength();
11537 // VP doesn't support extending loads.
11539 } else {
11540 // Else it must be a MGATHER.
11541 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11542 Index = MGN->getIndex();
11543 Mask = MGN->getMask();
11544 PassThru = MGN->getPassThru();
11545 LoadExtType = MGN->getExtensionType();
11546 }
11547
11548 MVT IndexVT = Index.getSimpleValueType();
11549 MVT XLenVT = Subtarget.getXLenVT();
11550
11552 "Unexpected VTs!");
11553 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11554 // Targets have to explicitly opt-in for extending vector loads.
11555 assert(LoadExtType == ISD::NON_EXTLOAD &&
11556 "Unexpected extending MGATHER/VP_GATHER");
11557
11558 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11559 // the selection of the masked intrinsics doesn't do this for us.
11560 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11561
11562 MVT ContainerVT = VT;
11563 if (VT.isFixedLengthVector()) {
11564 ContainerVT = getContainerForFixedLengthVector(VT);
11565 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11566 ContainerVT.getVectorElementCount());
11567
11568 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11569
11570 if (!IsUnmasked) {
11571 MVT MaskVT = getMaskTypeFor(ContainerVT);
11572 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11573 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11574 }
11575 }
11576
11577 if (!VL)
11578 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11579
11580 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11581 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11582 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11583 }
11584
11585 unsigned IntID =
11586 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11587 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11588 if (IsUnmasked)
11589 Ops.push_back(DAG.getUNDEF(ContainerVT));
11590 else
11591 Ops.push_back(PassThru);
11592 Ops.push_back(BasePtr);
11593 Ops.push_back(Index);
11594 if (!IsUnmasked)
11595 Ops.push_back(Mask);
11596 Ops.push_back(VL);
11597 if (!IsUnmasked)
11599
11600 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11601 SDValue Result =
11602 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11603 Chain = Result.getValue(1);
11604
11605 if (VT.isFixedLengthVector())
11606 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11607
11608 return DAG.getMergeValues({Result, Chain}, DL);
11609}
11610
11611// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11612// matched to a RVV indexed store. The RVV indexed store instructions only
11613// support the "unsigned unscaled" addressing mode; indices are implicitly
11614// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11615// signed or scaled indexing is extended to the XLEN value type and scaled
11616// accordingly.
11617SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11618 SelectionDAG &DAG) const {
11619 SDLoc DL(Op);
11620 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11621 EVT MemVT = MemSD->getMemoryVT();
11622 MachineMemOperand *MMO = MemSD->getMemOperand();
11623 SDValue Chain = MemSD->getChain();
11624 SDValue BasePtr = MemSD->getBasePtr();
11625
11626 [[maybe_unused]] bool IsTruncatingStore = false;
11627 SDValue Index, Mask, Val, VL;
11628
11629 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11630 Index = VPSN->getIndex();
11631 Mask = VPSN->getMask();
11632 Val = VPSN->getValue();
11633 VL = VPSN->getVectorLength();
11634 // VP doesn't support truncating stores.
11635 IsTruncatingStore = false;
11636 } else {
11637 // Else it must be a MSCATTER.
11638 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11639 Index = MSN->getIndex();
11640 Mask = MSN->getMask();
11641 Val = MSN->getValue();
11642 IsTruncatingStore = MSN->isTruncatingStore();
11643 }
11644
11645 MVT VT = Val.getSimpleValueType();
11646 MVT IndexVT = Index.getSimpleValueType();
11647 MVT XLenVT = Subtarget.getXLenVT();
11648
11650 "Unexpected VTs!");
11651 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11652 // Targets have to explicitly opt-in for extending vector loads and
11653 // truncating vector stores.
11654 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11655
11656 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11657 // the selection of the masked intrinsics doesn't do this for us.
11658 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11659
11660 MVT ContainerVT = VT;
11661 if (VT.isFixedLengthVector()) {
11662 ContainerVT = getContainerForFixedLengthVector(VT);
11663 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11664 ContainerVT.getVectorElementCount());
11665
11666 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11667 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11668
11669 if (!IsUnmasked) {
11670 MVT MaskVT = getMaskTypeFor(ContainerVT);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 }
11674
11675 if (!VL)
11676 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11677
11678 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11679 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11680 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11681 }
11682
11683 unsigned IntID =
11684 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11685 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11686 Ops.push_back(Val);
11687 Ops.push_back(BasePtr);
11688 Ops.push_back(Index);
11689 if (!IsUnmasked)
11690 Ops.push_back(Mask);
11691 Ops.push_back(VL);
11692
11694 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11695}
11696
11697SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11698 SelectionDAG &DAG) const {
11699 const MVT XLenVT = Subtarget.getXLenVT();
11700 SDLoc DL(Op);
11701 SDValue Chain = Op->getOperand(0);
11702 SDValue SysRegNo = DAG.getTargetConstant(
11703 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11704 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11705 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11706
11707 // Encoding used for rounding mode in RISC-V differs from that used in
11708 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11709 // table, which consists of a sequence of 4-bit fields, each representing
11710 // corresponding FLT_ROUNDS mode.
11711 static const int Table =
11717
11718 SDValue Shift =
11719 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11720 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11721 DAG.getConstant(Table, DL, XLenVT), Shift);
11722 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11723 DAG.getConstant(7, DL, XLenVT));
11724
11725 return DAG.getMergeValues({Masked, Chain}, DL);
11726}
11727
11728SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11729 SelectionDAG &DAG) const {
11730 const MVT XLenVT = Subtarget.getXLenVT();
11731 SDLoc DL(Op);
11732 SDValue Chain = Op->getOperand(0);
11733 SDValue RMValue = Op->getOperand(1);
11734 SDValue SysRegNo = DAG.getTargetConstant(
11735 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11736
11737 // Encoding used for rounding mode in RISC-V differs from that used in
11738 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11739 // a table, which consists of a sequence of 4-bit fields, each representing
11740 // corresponding RISC-V mode.
11741 static const unsigned Table =
11747
11748 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11749
11750 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11751 DAG.getConstant(2, DL, XLenVT));
11752 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11753 DAG.getConstant(Table, DL, XLenVT), Shift);
11754 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11755 DAG.getConstant(0x7, DL, XLenVT));
11756 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11757 RMValue);
11758}
11759
11760SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11761 SelectionDAG &DAG) const {
11763
11764 bool isRISCV64 = Subtarget.is64Bit();
11765 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11766
11767 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11768 return DAG.getFrameIndex(FI, PtrVT);
11769}
11770
11771// Returns the opcode of the target-specific SDNode that implements the 32-bit
11772// form of the given Opcode.
11773static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11774 switch (Opcode) {
11775 default:
11776 llvm_unreachable("Unexpected opcode");
11777 case ISD::SHL:
11778 return RISCVISD::SLLW;
11779 case ISD::SRA:
11780 return RISCVISD::SRAW;
11781 case ISD::SRL:
11782 return RISCVISD::SRLW;
11783 case ISD::SDIV:
11784 return RISCVISD::DIVW;
11785 case ISD::UDIV:
11786 return RISCVISD::DIVUW;
11787 case ISD::UREM:
11788 return RISCVISD::REMUW;
11789 case ISD::ROTL:
11790 return RISCVISD::ROLW;
11791 case ISD::ROTR:
11792 return RISCVISD::RORW;
11793 }
11794}
11795
11796// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11797// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11798// otherwise be promoted to i64, making it difficult to select the
11799// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11800// type i8/i16/i32 is lost.
11802 unsigned ExtOpc = ISD::ANY_EXTEND) {
11803 SDLoc DL(N);
11804 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11805 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11806 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11807 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11808 // ReplaceNodeResults requires we maintain the same type for the return value.
11809 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11810}
11811
11812// Converts the given 32-bit operation to a i64 operation with signed extension
11813// semantic to reduce the signed extension instructions.
11815 SDLoc DL(N);
11816 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11817 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11818 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11819 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11820 DAG.getValueType(MVT::i32));
11821 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11822}
11823
11826 SelectionDAG &DAG) const {
11827 SDLoc DL(N);
11828 switch (N->getOpcode()) {
11829 default:
11830 llvm_unreachable("Don't know how to custom type legalize this operation!");
11833 case ISD::FP_TO_SINT:
11834 case ISD::FP_TO_UINT: {
11835 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11836 "Unexpected custom legalisation");
11837 bool IsStrict = N->isStrictFPOpcode();
11838 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11839 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11840 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11841 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11843 if (!isTypeLegal(Op0.getValueType()))
11844 return;
11845 if (IsStrict) {
11846 SDValue Chain = N->getOperand(0);
11847 // In absense of Zfh, promote f16 to f32, then convert.
11848 if (Op0.getValueType() == MVT::f16 &&
11849 !Subtarget.hasStdExtZfhOrZhinx()) {
11850 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11851 {Chain, Op0});
11852 Chain = Op0.getValue(1);
11853 }
11854 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11856 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11857 SDValue Res = DAG.getNode(
11858 Opc, DL, VTs, Chain, Op0,
11859 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11860 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11861 Results.push_back(Res.getValue(1));
11862 return;
11863 }
11864 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11865 // convert.
11866 if ((Op0.getValueType() == MVT::f16 &&
11867 !Subtarget.hasStdExtZfhOrZhinx()) ||
11868 Op0.getValueType() == MVT::bf16)
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11872 SDValue Res =
11873 DAG.getNode(Opc, DL, MVT::i64, Op0,
11874 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11875 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11876 return;
11877 }
11878 // If the FP type needs to be softened, emit a library call using the 'si'
11879 // version. If we left it to default legalization we'd end up with 'di'. If
11880 // the FP type doesn't need to be softened just let generic type
11881 // legalization promote the result type.
11882 RTLIB::Libcall LC;
11883 if (IsSigned)
11884 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11885 else
11886 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11887 MakeLibCallOptions CallOptions;
11888 EVT OpVT = Op0.getValueType();
11889 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11890 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11891 SDValue Result;
11892 std::tie(Result, Chain) =
11893 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11894 Results.push_back(Result);
11895 if (IsStrict)
11896 Results.push_back(Chain);
11897 break;
11898 }
11899 case ISD::LROUND: {
11900 SDValue Op0 = N->getOperand(0);
11901 EVT Op0VT = Op0.getValueType();
11902 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11904 if (!isTypeLegal(Op0VT))
11905 return;
11906
11907 // In absense of Zfh, promote f16 to f32, then convert.
11908 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11909 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11910
11911 SDValue Res =
11912 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11913 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11914 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11915 return;
11916 }
11917 // If the FP type needs to be softened, emit a library call to lround. We'll
11918 // need to truncate the result. We assume any value that doesn't fit in i32
11919 // is allowed to return an unspecified value.
11920 RTLIB::Libcall LC =
11921 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11922 MakeLibCallOptions CallOptions;
11923 EVT OpVT = Op0.getValueType();
11924 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11925 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11926 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11927 Results.push_back(Result);
11928 break;
11929 }
11932 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11933 "has custom type legalization on riscv32");
11934
11935 SDValue LoCounter, HiCounter;
11936 MVT XLenVT = Subtarget.getXLenVT();
11937 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11938 LoCounter = DAG.getTargetConstant(
11939 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11940 HiCounter = DAG.getTargetConstant(
11941 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11942 } else {
11943 LoCounter = DAG.getTargetConstant(
11944 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11945 HiCounter = DAG.getTargetConstant(
11946 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11947 }
11948 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11950 N->getOperand(0), LoCounter, HiCounter);
11951
11952 Results.push_back(
11953 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11954 Results.push_back(RCW.getValue(2));
11955 break;
11956 }
11957 case ISD::LOAD: {
11958 if (!ISD::isNON_EXTLoad(N))
11959 return;
11960
11961 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11962 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11963 LoadSDNode *Ld = cast<LoadSDNode>(N);
11964
11965 SDLoc dl(N);
11966 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11967 Ld->getBasePtr(), Ld->getMemoryVT(),
11968 Ld->getMemOperand());
11969 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11970 Results.push_back(Res.getValue(1));
11971 return;
11972 }
11973 case ISD::MUL: {
11974 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11975 unsigned XLen = Subtarget.getXLen();
11976 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11977 if (Size > XLen) {
11978 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11979 SDValue LHS = N->getOperand(0);
11980 SDValue RHS = N->getOperand(1);
11981 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11982
11983 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11984 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11985 // We need exactly one side to be unsigned.
11986 if (LHSIsU == RHSIsU)
11987 return;
11988
11989 auto MakeMULPair = [&](SDValue S, SDValue U) {
11990 MVT XLenVT = Subtarget.getXLenVT();
11991 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11992 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11993 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11994 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11996 };
11997
11998 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11999 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12000
12001 // The other operand should be signed, but still prefer MULH when
12002 // possible.
12003 if (RHSIsU && LHSIsS && !RHSIsS)
12004 Results.push_back(MakeMULPair(LHS, RHS));
12005 else if (LHSIsU && RHSIsS && !LHSIsS)
12006 Results.push_back(MakeMULPair(RHS, LHS));
12007
12008 return;
12009 }
12010 [[fallthrough]];
12011 }
12012 case ISD::ADD:
12013 case ISD::SUB:
12014 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12015 "Unexpected custom legalisation");
12016 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12017 break;
12018 case ISD::SHL:
12019 case ISD::SRA:
12020 case ISD::SRL:
12021 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12022 "Unexpected custom legalisation");
12023 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12024 // If we can use a BSET instruction, allow default promotion to apply.
12025 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12026 isOneConstant(N->getOperand(0)))
12027 break;
12028 Results.push_back(customLegalizeToWOp(N, DAG));
12029 break;
12030 }
12031
12032 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12033 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12034 // shift amount.
12035 if (N->getOpcode() == ISD::SHL) {
12036 SDLoc DL(N);
12037 SDValue NewOp0 =
12038 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12039 SDValue NewOp1 =
12040 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12041 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12042 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12043 DAG.getValueType(MVT::i32));
12044 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12045 }
12046
12047 break;
12048 case ISD::ROTL:
12049 case ISD::ROTR:
12050 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12051 "Unexpected custom legalisation");
12052 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12053 Subtarget.hasVendorXTHeadBb()) &&
12054 "Unexpected custom legalization");
12055 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12056 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12057 return;
12058 Results.push_back(customLegalizeToWOp(N, DAG));
12059 break;
12060 case ISD::CTTZ:
12062 case ISD::CTLZ:
12063 case ISD::CTLZ_ZERO_UNDEF: {
12064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12065 "Unexpected custom legalisation");
12066
12067 SDValue NewOp0 =
12068 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12069 bool IsCTZ =
12070 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12071 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12072 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12073 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12074 return;
12075 }
12076 case ISD::SDIV:
12077 case ISD::UDIV:
12078 case ISD::UREM: {
12079 MVT VT = N->getSimpleValueType(0);
12080 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12081 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12082 "Unexpected custom legalisation");
12083 // Don't promote division/remainder by constant since we should expand those
12084 // to multiply by magic constant.
12086 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12087 !isIntDivCheap(N->getValueType(0), Attr))
12088 return;
12089
12090 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12091 // the upper 32 bits. For other types we need to sign or zero extend
12092 // based on the opcode.
12093 unsigned ExtOpc = ISD::ANY_EXTEND;
12094 if (VT != MVT::i32)
12095 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12097
12098 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12099 break;
12100 }
12101 case ISD::SADDO: {
12102 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12103 "Unexpected custom legalisation");
12104
12105 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12106 // use the default legalization.
12107 if (!isa<ConstantSDNode>(N->getOperand(1)))
12108 return;
12109
12110 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12111 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12112 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12113 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12114 DAG.getValueType(MVT::i32));
12115
12116 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12117
12118 // For an addition, the result should be less than one of the operands (LHS)
12119 // if and only if the other operand (RHS) is negative, otherwise there will
12120 // be overflow.
12121 // For a subtraction, the result should be less than one of the operands
12122 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12123 // otherwise there will be overflow.
12124 EVT OType = N->getValueType(1);
12125 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12126 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12127
12128 SDValue Overflow =
12129 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12130 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12131 Results.push_back(Overflow);
12132 return;
12133 }
12134 case ISD::UADDO:
12135 case ISD::USUBO: {
12136 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12137 "Unexpected custom legalisation");
12138 bool IsAdd = N->getOpcode() == ISD::UADDO;
12139 // Create an ADDW or SUBW.
12140 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12141 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12142 SDValue Res =
12143 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12144 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12145 DAG.getValueType(MVT::i32));
12146
12147 SDValue Overflow;
12148 if (IsAdd && isOneConstant(RHS)) {
12149 // Special case uaddo X, 1 overflowed if the addition result is 0.
12150 // The general case (X + C) < C is not necessarily beneficial. Although we
12151 // reduce the live range of X, we may introduce the materialization of
12152 // constant C, especially when the setcc result is used by branch. We have
12153 // no compare with constant and branch instructions.
12154 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12155 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12156 } else if (IsAdd && isAllOnesConstant(RHS)) {
12157 // Special case uaddo X, -1 overflowed if X != 0.
12158 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12159 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12160 } else {
12161 // Sign extend the LHS and perform an unsigned compare with the ADDW
12162 // result. Since the inputs are sign extended from i32, this is equivalent
12163 // to comparing the lower 32 bits.
12164 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12165 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12166 IsAdd ? ISD::SETULT : ISD::SETUGT);
12167 }
12168
12169 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12170 Results.push_back(Overflow);
12171 return;
12172 }
12173 case ISD::UADDSAT:
12174 case ISD::USUBSAT: {
12175 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12176 "Unexpected custom legalisation");
12177 if (Subtarget.hasStdExtZbb()) {
12178 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12179 // sign extend allows overflow of the lower 32 bits to be detected on
12180 // the promoted size.
12181 SDValue LHS =
12182 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12183 SDValue RHS =
12184 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12185 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12186 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12187 return;
12188 }
12189
12190 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12191 // promotion for UADDO/USUBO.
12192 Results.push_back(expandAddSubSat(N, DAG));
12193 return;
12194 }
12195 case ISD::SADDSAT:
12196 case ISD::SSUBSAT: {
12197 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12198 "Unexpected custom legalisation");
12199 Results.push_back(expandAddSubSat(N, DAG));
12200 return;
12201 }
12202 case ISD::ABS: {
12203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12204 "Unexpected custom legalisation");
12205
12206 if (Subtarget.hasStdExtZbb()) {
12207 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12208 // This allows us to remember that the result is sign extended. Expanding
12209 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12210 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12211 N->getOperand(0));
12212 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12213 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12214 return;
12215 }
12216
12217 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12218 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12219
12220 // Freeze the source so we can increase it's use count.
12221 Src = DAG.getFreeze(Src);
12222
12223 // Copy sign bit to all bits using the sraiw pattern.
12224 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12225 DAG.getValueType(MVT::i32));
12226 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12227 DAG.getConstant(31, DL, MVT::i64));
12228
12229 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12230 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12231
12232 // NOTE: The result is only required to be anyextended, but sext is
12233 // consistent with type legalization of sub.
12234 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12235 DAG.getValueType(MVT::i32));
12236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12237 return;
12238 }
12239 case ISD::BITCAST: {
12240 EVT VT = N->getValueType(0);
12241 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12242 SDValue Op0 = N->getOperand(0);
12243 EVT Op0VT = Op0.getValueType();
12244 MVT XLenVT = Subtarget.getXLenVT();
12245 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12246 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12247 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12248 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12249 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12250 Subtarget.hasStdExtZfbfmin()) {
12251 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12252 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12253 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12254 Subtarget.hasStdExtFOrZfinx()) {
12255 SDValue FPConv =
12256 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12257 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12258 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12259 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12260 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12261 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12262 NewReg.getValue(0), NewReg.getValue(1));
12263 Results.push_back(RetReg);
12264 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12265 isTypeLegal(Op0VT)) {
12266 // Custom-legalize bitcasts from fixed-length vector types to illegal
12267 // scalar types in order to improve codegen. Bitcast the vector to a
12268 // one-element vector type whose element type is the same as the result
12269 // type, and extract the first element.
12270 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12271 if (isTypeLegal(BVT)) {
12272 SDValue BVec = DAG.getBitcast(BVT, Op0);
12273 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12274 DAG.getVectorIdxConstant(0, DL)));
12275 }
12276 }
12277 break;
12278 }
12279 case RISCVISD::BREV8: {
12280 MVT VT = N->getSimpleValueType(0);
12281 MVT XLenVT = Subtarget.getXLenVT();
12282 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12283 "Unexpected custom legalisation");
12284 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12285 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12286 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12287 // ReplaceNodeResults requires we maintain the same type for the return
12288 // value.
12289 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12290 break;
12291 }
12293 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12294 // type is illegal (currently only vXi64 RV32).
12295 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12296 // transferred to the destination register. We issue two of these from the
12297 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12298 // first element.
12299 SDValue Vec = N->getOperand(0);
12300 SDValue Idx = N->getOperand(1);
12301
12302 // The vector type hasn't been legalized yet so we can't issue target
12303 // specific nodes if it needs legalization.
12304 // FIXME: We would manually legalize if it's important.
12305 if (!isTypeLegal(Vec.getValueType()))
12306 return;
12307
12308 MVT VecVT = Vec.getSimpleValueType();
12309
12310 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12311 VecVT.getVectorElementType() == MVT::i64 &&
12312 "Unexpected EXTRACT_VECTOR_ELT legalization");
12313
12314 // If this is a fixed vector, we need to convert it to a scalable vector.
12315 MVT ContainerVT = VecVT;
12316 if (VecVT.isFixedLengthVector()) {
12317 ContainerVT = getContainerForFixedLengthVector(VecVT);
12318 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12319 }
12320
12321 MVT XLenVT = Subtarget.getXLenVT();
12322
12323 // Use a VL of 1 to avoid processing more elements than we need.
12324 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12325
12326 // Unless the index is known to be 0, we must slide the vector down to get
12327 // the desired element into index 0.
12328 if (!isNullConstant(Idx)) {
12329 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12330 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12331 }
12332
12333 // Extract the lower XLEN bits of the correct vector element.
12334 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12335
12336 // To extract the upper XLEN bits of the vector element, shift the first
12337 // element right by 32 bits and re-extract the lower XLEN bits.
12338 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12339 DAG.getUNDEF(ContainerVT),
12340 DAG.getConstant(32, DL, XLenVT), VL);
12341 SDValue LShr32 =
12342 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12343 DAG.getUNDEF(ContainerVT), Mask, VL);
12344
12345 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12346
12347 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12348 break;
12349 }
12351 unsigned IntNo = N->getConstantOperandVal(0);
12352 switch (IntNo) {
12353 default:
12355 "Don't know how to custom type legalize this intrinsic!");
12356 case Intrinsic::experimental_get_vector_length: {
12357 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12358 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12359 return;
12360 }
12361 case Intrinsic::experimental_cttz_elts: {
12362 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12363 Results.push_back(
12364 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12365 return;
12366 }
12367 case Intrinsic::riscv_orc_b:
12368 case Intrinsic::riscv_brev8:
12369 case Intrinsic::riscv_sha256sig0:
12370 case Intrinsic::riscv_sha256sig1:
12371 case Intrinsic::riscv_sha256sum0:
12372 case Intrinsic::riscv_sha256sum1:
12373 case Intrinsic::riscv_sm3p0:
12374 case Intrinsic::riscv_sm3p1: {
12375 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12376 return;
12377 unsigned Opc;
12378 switch (IntNo) {
12379 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12380 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12381 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12382 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12383 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12384 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12385 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12386 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12387 }
12388
12389 SDValue NewOp =
12390 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12391 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12392 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12393 return;
12394 }
12395 case Intrinsic::riscv_sm4ks:
12396 case Intrinsic::riscv_sm4ed: {
12397 unsigned Opc =
12398 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12399 SDValue NewOp0 =
12400 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12401 SDValue NewOp1 =
12402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12403 SDValue Res =
12404 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12405 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12406 return;
12407 }
12408 case Intrinsic::riscv_mopr: {
12409 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12410 return;
12411 SDValue NewOp =
12412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12413 SDValue Res = DAG.getNode(
12414 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12415 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12416 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12417 return;
12418 }
12419 case Intrinsic::riscv_moprr: {
12420 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12421 return;
12422 SDValue NewOp0 =
12423 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12424 SDValue NewOp1 =
12425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12426 SDValue Res = DAG.getNode(
12427 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12428 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12429 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12430 return;
12431 }
12432 case Intrinsic::riscv_clmul: {
12433 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12434 return;
12435
12436 SDValue NewOp0 =
12437 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12438 SDValue NewOp1 =
12439 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12440 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12441 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12442 return;
12443 }
12444 case Intrinsic::riscv_clmulh:
12445 case Intrinsic::riscv_clmulr: {
12446 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12447 return;
12448
12449 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12450 // to the full 128-bit clmul result of multiplying two xlen values.
12451 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12452 // upper 32 bits.
12453 //
12454 // The alternative is to mask the inputs to 32 bits and use clmul, but
12455 // that requires two shifts to mask each input without zext.w.
12456 // FIXME: If the inputs are known zero extended or could be freely
12457 // zero extended, the mask form would be better.
12458 SDValue NewOp0 =
12459 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12460 SDValue NewOp1 =
12461 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12462 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12463 DAG.getConstant(32, DL, MVT::i64));
12464 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12465 DAG.getConstant(32, DL, MVT::i64));
12466 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12468 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12469 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12470 DAG.getConstant(32, DL, MVT::i64));
12471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12472 return;
12473 }
12474 case Intrinsic::riscv_vmv_x_s: {
12475 EVT VT = N->getValueType(0);
12476 MVT XLenVT = Subtarget.getXLenVT();
12477 if (VT.bitsLT(XLenVT)) {
12478 // Simple case just extract using vmv.x.s and truncate.
12479 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12480 Subtarget.getXLenVT(), N->getOperand(1));
12481 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12482 return;
12483 }
12484
12485 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12486 "Unexpected custom legalization");
12487
12488 // We need to do the move in two steps.
12489 SDValue Vec = N->getOperand(1);
12490 MVT VecVT = Vec.getSimpleValueType();
12491
12492 // First extract the lower XLEN bits of the element.
12493 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12494
12495 // To extract the upper XLEN bits of the vector element, shift the first
12496 // element right by 32 bits and re-extract the lower XLEN bits.
12497 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12498
12499 SDValue ThirtyTwoV =
12500 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12501 DAG.getConstant(32, DL, XLenVT), VL);
12502 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12503 DAG.getUNDEF(VecVT), Mask, VL);
12504 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12505
12506 Results.push_back(
12507 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12508 break;
12509 }
12510 }
12511 break;
12512 }
12513 case ISD::VECREDUCE_ADD:
12514 case ISD::VECREDUCE_AND:
12515 case ISD::VECREDUCE_OR:
12516 case ISD::VECREDUCE_XOR:
12521 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12522 Results.push_back(V);
12523 break;
12524 case ISD::VP_REDUCE_ADD:
12525 case ISD::VP_REDUCE_AND:
12526 case ISD::VP_REDUCE_OR:
12527 case ISD::VP_REDUCE_XOR:
12528 case ISD::VP_REDUCE_SMAX:
12529 case ISD::VP_REDUCE_UMAX:
12530 case ISD::VP_REDUCE_SMIN:
12531 case ISD::VP_REDUCE_UMIN:
12532 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12533 Results.push_back(V);
12534 break;
12535 case ISD::GET_ROUNDING: {
12536 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12537 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12538 Results.push_back(Res.getValue(0));
12539 Results.push_back(Res.getValue(1));
12540 break;
12541 }
12542 }
12543}
12544
12545/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12546/// which corresponds to it.
12547static unsigned getVecReduceOpcode(unsigned Opc) {
12548 switch (Opc) {
12549 default:
12550 llvm_unreachable("Unhandled binary to transfrom reduction");
12551 case ISD::ADD:
12552 return ISD::VECREDUCE_ADD;
12553 case ISD::UMAX:
12554 return ISD::VECREDUCE_UMAX;
12555 case ISD::SMAX:
12556 return ISD::VECREDUCE_SMAX;
12557 case ISD::UMIN:
12558 return ISD::VECREDUCE_UMIN;
12559 case ISD::SMIN:
12560 return ISD::VECREDUCE_SMIN;
12561 case ISD::AND:
12562 return ISD::VECREDUCE_AND;
12563 case ISD::OR:
12564 return ISD::VECREDUCE_OR;
12565 case ISD::XOR:
12566 return ISD::VECREDUCE_XOR;
12567 case ISD::FADD:
12568 // Note: This is the associative form of the generic reduction opcode.
12569 return ISD::VECREDUCE_FADD;
12570 }
12571}
12572
12573/// Perform two related transforms whose purpose is to incrementally recognize
12574/// an explode_vector followed by scalar reduction as a vector reduction node.
12575/// This exists to recover from a deficiency in SLP which can't handle
12576/// forests with multiple roots sharing common nodes. In some cases, one
12577/// of the trees will be vectorized, and the other will remain (unprofitably)
12578/// scalarized.
12579static SDValue
12581 const RISCVSubtarget &Subtarget) {
12582
12583 // This transforms need to run before all integer types have been legalized
12584 // to i64 (so that the vector element type matches the add type), and while
12585 // it's safe to introduce odd sized vector types.
12587 return SDValue();
12588
12589 // Without V, this transform isn't useful. We could form the (illegal)
12590 // operations and let them be scalarized again, but there's really no point.
12591 if (!Subtarget.hasVInstructions())
12592 return SDValue();
12593
12594 const SDLoc DL(N);
12595 const EVT VT = N->getValueType(0);
12596 const unsigned Opc = N->getOpcode();
12597
12598 // For FADD, we only handle the case with reassociation allowed. We
12599 // could handle strict reduction order, but at the moment, there's no
12600 // known reason to, and the complexity isn't worth it.
12601 // TODO: Handle fminnum and fmaxnum here
12602 if (!VT.isInteger() &&
12603 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12604 return SDValue();
12605
12606 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12607 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12608 "Inconsistent mappings");
12609 SDValue LHS = N->getOperand(0);
12610 SDValue RHS = N->getOperand(1);
12611
12612 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12613 return SDValue();
12614
12615 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12616 std::swap(LHS, RHS);
12617
12618 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12619 !isa<ConstantSDNode>(RHS.getOperand(1)))
12620 return SDValue();
12621
12622 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12623 SDValue SrcVec = RHS.getOperand(0);
12624 EVT SrcVecVT = SrcVec.getValueType();
12625 assert(SrcVecVT.getVectorElementType() == VT);
12626 if (SrcVecVT.isScalableVector())
12627 return SDValue();
12628
12629 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12630 return SDValue();
12631
12632 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12633 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12634 // root of our reduction tree. TODO: We could extend this to any two
12635 // adjacent aligned constant indices if desired.
12636 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12637 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12638 uint64_t LHSIdx =
12639 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12640 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12641 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12642 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12643 DAG.getVectorIdxConstant(0, DL));
12644 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12645 }
12646 }
12647
12648 // Match (binop (reduce (extract_subvector V, 0),
12649 // (extract_vector_elt V, sizeof(SubVec))))
12650 // into a reduction of one more element from the original vector V.
12651 if (LHS.getOpcode() != ReduceOpc)
12652 return SDValue();
12653
12654 SDValue ReduceVec = LHS.getOperand(0);
12655 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12656 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12657 isNullConstant(ReduceVec.getOperand(1)) &&
12658 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12659 // For illegal types (e.g. 3xi32), most will be combined again into a
12660 // wider (hopefully legal) type. If this is a terminal state, we are
12661 // relying on type legalization here to produce something reasonable
12662 // and this lowering quality could probably be improved. (TODO)
12663 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12664 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12665 DAG.getVectorIdxConstant(0, DL));
12666 auto Flags = ReduceVec->getFlags();
12667 Flags.intersectWith(N->getFlags());
12668 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12669 }
12670
12671 return SDValue();
12672}
12673
12674
12675// Try to fold (<bop> x, (reduction.<bop> vec, start))
12677 const RISCVSubtarget &Subtarget) {
12678 auto BinOpToRVVReduce = [](unsigned Opc) {
12679 switch (Opc) {
12680 default:
12681 llvm_unreachable("Unhandled binary to transfrom reduction");
12682 case ISD::ADD:
12684 case ISD::UMAX:
12686 case ISD::SMAX:
12688 case ISD::UMIN:
12690 case ISD::SMIN:
12692 case ISD::AND:
12694 case ISD::OR:
12696 case ISD::XOR:
12698 case ISD::FADD:
12700 case ISD::FMAXNUM:
12702 case ISD::FMINNUM:
12704 }
12705 };
12706
12707 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12708 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12709 isNullConstant(V.getOperand(1)) &&
12710 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12711 };
12712
12713 unsigned Opc = N->getOpcode();
12714 unsigned ReduceIdx;
12715 if (IsReduction(N->getOperand(0), Opc))
12716 ReduceIdx = 0;
12717 else if (IsReduction(N->getOperand(1), Opc))
12718 ReduceIdx = 1;
12719 else
12720 return SDValue();
12721
12722 // Skip if FADD disallows reassociation but the combiner needs.
12723 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12724 return SDValue();
12725
12726 SDValue Extract = N->getOperand(ReduceIdx);
12727 SDValue Reduce = Extract.getOperand(0);
12728 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12729 return SDValue();
12730
12731 SDValue ScalarV = Reduce.getOperand(2);
12732 EVT ScalarVT = ScalarV.getValueType();
12733 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12734 ScalarV.getOperand(0)->isUndef() &&
12735 isNullConstant(ScalarV.getOperand(2)))
12736 ScalarV = ScalarV.getOperand(1);
12737
12738 // Make sure that ScalarV is a splat with VL=1.
12739 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12740 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12741 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12742 return SDValue();
12743
12744 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12745 return SDValue();
12746
12747 // Check the scalar of ScalarV is neutral element
12748 // TODO: Deal with value other than neutral element.
12749 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12750 0))
12751 return SDValue();
12752
12753 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12754 // FIXME: We might be able to improve this if operand 0 is undef.
12755 if (!isNonZeroAVL(Reduce.getOperand(5)))
12756 return SDValue();
12757
12758 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12759
12760 SDLoc DL(N);
12761 SDValue NewScalarV =
12762 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12763 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12764
12765 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12766 if (ScalarVT != ScalarV.getValueType())
12767 NewScalarV =
12768 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12769 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12770
12771 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12772 NewScalarV, Reduce.getOperand(3),
12773 Reduce.getOperand(4), Reduce.getOperand(5)};
12774 SDValue NewReduce =
12775 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12776 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12777 Extract.getOperand(1));
12778}
12779
12780// Optimize (add (shl x, c0), (shl y, c1)) ->
12781// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12783 const RISCVSubtarget &Subtarget) {
12784 // Perform this optimization only in the zba extension.
12785 if (!Subtarget.hasStdExtZba())
12786 return SDValue();
12787
12788 // Skip for vector types and larger types.
12789 EVT VT = N->getValueType(0);
12790 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12791 return SDValue();
12792
12793 // The two operand nodes must be SHL and have no other use.
12794 SDValue N0 = N->getOperand(0);
12795 SDValue N1 = N->getOperand(1);
12796 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12797 !N0->hasOneUse() || !N1->hasOneUse())
12798 return SDValue();
12799
12800 // Check c0 and c1.
12801 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12802 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12803 if (!N0C || !N1C)
12804 return SDValue();
12805 int64_t C0 = N0C->getSExtValue();
12806 int64_t C1 = N1C->getSExtValue();
12807 if (C0 <= 0 || C1 <= 0)
12808 return SDValue();
12809
12810 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12811 int64_t Bits = std::min(C0, C1);
12812 int64_t Diff = std::abs(C0 - C1);
12813 if (Diff != 1 && Diff != 2 && Diff != 3)
12814 return SDValue();
12815
12816 // Build nodes.
12817 SDLoc DL(N);
12818 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12819 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12820 SDValue NA0 =
12821 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12822 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12823 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12824}
12825
12826// Combine a constant select operand into its use:
12827//
12828// (and (select cond, -1, c), x)
12829// -> (select cond, x, (and x, c)) [AllOnes=1]
12830// (or (select cond, 0, c), x)
12831// -> (select cond, x, (or x, c)) [AllOnes=0]
12832// (xor (select cond, 0, c), x)
12833// -> (select cond, x, (xor x, c)) [AllOnes=0]
12834// (add (select cond, 0, c), x)
12835// -> (select cond, x, (add x, c)) [AllOnes=0]
12836// (sub x, (select cond, 0, c))
12837// -> (select cond, x, (sub x, c)) [AllOnes=0]
12839 SelectionDAG &DAG, bool AllOnes,
12840 const RISCVSubtarget &Subtarget) {
12841 EVT VT = N->getValueType(0);
12842
12843 // Skip vectors.
12844 if (VT.isVector())
12845 return SDValue();
12846
12847 if (!Subtarget.hasConditionalMoveFusion()) {
12848 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12849 if ((!Subtarget.hasStdExtZicond() &&
12850 !Subtarget.hasVendorXVentanaCondOps()) ||
12851 N->getOpcode() != ISD::AND)
12852 return SDValue();
12853
12854 // Maybe harmful when condition code has multiple use.
12855 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12856 return SDValue();
12857
12858 // Maybe harmful when VT is wider than XLen.
12859 if (VT.getSizeInBits() > Subtarget.getXLen())
12860 return SDValue();
12861 }
12862
12863 if ((Slct.getOpcode() != ISD::SELECT &&
12864 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12865 !Slct.hasOneUse())
12866 return SDValue();
12867
12868 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12870 };
12871
12872 bool SwapSelectOps;
12873 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12874 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12875 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12876 SDValue NonConstantVal;
12877 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12878 SwapSelectOps = false;
12879 NonConstantVal = FalseVal;
12880 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12881 SwapSelectOps = true;
12882 NonConstantVal = TrueVal;
12883 } else
12884 return SDValue();
12885
12886 // Slct is now know to be the desired identity constant when CC is true.
12887 TrueVal = OtherOp;
12888 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12889 // Unless SwapSelectOps says the condition should be false.
12890 if (SwapSelectOps)
12891 std::swap(TrueVal, FalseVal);
12892
12893 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12894 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12895 {Slct.getOperand(0), Slct.getOperand(1),
12896 Slct.getOperand(2), TrueVal, FalseVal});
12897
12898 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12899 {Slct.getOperand(0), TrueVal, FalseVal});
12900}
12901
12902// Attempt combineSelectAndUse on each operand of a commutative operator N.
12904 bool AllOnes,
12905 const RISCVSubtarget &Subtarget) {
12906 SDValue N0 = N->getOperand(0);
12907 SDValue N1 = N->getOperand(1);
12908 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12909 return Result;
12910 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12911 return Result;
12912 return SDValue();
12913}
12914
12915// Transform (add (mul x, c0), c1) ->
12916// (add (mul (add x, c1/c0), c0), c1%c0).
12917// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12918// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12919// to an infinite loop in DAGCombine if transformed.
12920// Or transform (add (mul x, c0), c1) ->
12921// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12922// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12923// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12924// lead to an infinite loop in DAGCombine if transformed.
12925// Or transform (add (mul x, c0), c1) ->
12926// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12927// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12928// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12929// lead to an infinite loop in DAGCombine if transformed.
12930// Or transform (add (mul x, c0), c1) ->
12931// (mul (add x, c1/c0), c0).
12932// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12934 const RISCVSubtarget &Subtarget) {
12935 // Skip for vector types and larger types.
12936 EVT VT = N->getValueType(0);
12937 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12938 return SDValue();
12939 // The first operand node must be a MUL and has no other use.
12940 SDValue N0 = N->getOperand(0);
12941 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12942 return SDValue();
12943 // Check if c0 and c1 match above conditions.
12944 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12945 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12946 if (!N0C || !N1C)
12947 return SDValue();
12948 // If N0C has multiple uses it's possible one of the cases in
12949 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12950 // in an infinite loop.
12951 if (!N0C->hasOneUse())
12952 return SDValue();
12953 int64_t C0 = N0C->getSExtValue();
12954 int64_t C1 = N1C->getSExtValue();
12955 int64_t CA, CB;
12956 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12957 return SDValue();
12958 // Search for proper CA (non-zero) and CB that both are simm12.
12959 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12960 !isInt<12>(C0 * (C1 / C0))) {
12961 CA = C1 / C0;
12962 CB = C1 % C0;
12963 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12964 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12965 CA = C1 / C0 + 1;
12966 CB = C1 % C0 - C0;
12967 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12968 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12969 CA = C1 / C0 - 1;
12970 CB = C1 % C0 + C0;
12971 } else
12972 return SDValue();
12973 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12974 SDLoc DL(N);
12975 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12976 DAG.getConstant(CA, DL, VT));
12977 SDValue New1 =
12978 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12979 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12980}
12981
12982// add (zext, zext) -> zext (add (zext, zext))
12983// sub (zext, zext) -> sext (sub (zext, zext))
12984// mul (zext, zext) -> zext (mul (zext, zext))
12985// sdiv (zext, zext) -> zext (sdiv (zext, zext))
12986// udiv (zext, zext) -> zext (udiv (zext, zext))
12987// srem (zext, zext) -> zext (srem (zext, zext))
12988// urem (zext, zext) -> zext (urem (zext, zext))
12989//
12990// where the sum of the extend widths match, and the the range of the bin op
12991// fits inside the width of the narrower bin op. (For profitability on rvv, we
12992// use a power of two for both inner and outer extend.)
12994
12995 EVT VT = N->getValueType(0);
12996 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12997 return SDValue();
12998
12999 SDValue N0 = N->getOperand(0);
13000 SDValue N1 = N->getOperand(1);
13002 return SDValue();
13003 if (!N0.hasOneUse() || !N1.hasOneUse())
13004 return SDValue();
13005
13006 SDValue Src0 = N0.getOperand(0);
13007 SDValue Src1 = N1.getOperand(0);
13008 EVT SrcVT = Src0.getValueType();
13009 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13010 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13011 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13012 return SDValue();
13013
13014 LLVMContext &C = *DAG.getContext();
13016 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13017
13018 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13019 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13020
13021 // Src0 and Src1 are zero extended, so they're always positive if signed.
13022 //
13023 // sub can produce a negative from two positive operands, so it needs sign
13024 // extended. Other nodes produce a positive from two positive operands, so
13025 // zero extend instead.
13026 unsigned OuterExtend =
13027 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13028
13029 return DAG.getNode(
13030 OuterExtend, SDLoc(N), VT,
13031 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13032}
13033
13034// Try to turn (add (xor bool, 1) -1) into (neg bool).
13036 SDValue N0 = N->getOperand(0);
13037 SDValue N1 = N->getOperand(1);
13038 EVT VT = N->getValueType(0);
13039 SDLoc DL(N);
13040
13041 // RHS should be -1.
13042 if (!isAllOnesConstant(N1))
13043 return SDValue();
13044
13045 // Look for (xor X, 1).
13046 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13047 return SDValue();
13048
13049 // First xor input should be 0 or 1.
13051 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13052 return SDValue();
13053
13054 // Emit a negate of the setcc.
13055 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13056 N0.getOperand(0));
13057}
13058
13060 const RISCVSubtarget &Subtarget) {
13061 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13062 return V;
13063 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13064 return V;
13065 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13066 return V;
13067 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13068 return V;
13069 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13070 return V;
13071 if (SDValue V = combineBinOpOfZExt(N, DAG))
13072 return V;
13073
13074 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13075 // (select lhs, rhs, cc, x, (add x, y))
13076 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13077}
13078
13079// Try to turn a sub boolean RHS and constant LHS into an addi.
13081 SDValue N0 = N->getOperand(0);
13082 SDValue N1 = N->getOperand(1);
13083 EVT VT = N->getValueType(0);
13084 SDLoc DL(N);
13085
13086 // Require a constant LHS.
13087 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13088 if (!N0C)
13089 return SDValue();
13090
13091 // All our optimizations involve subtracting 1 from the immediate and forming
13092 // an ADDI. Make sure the new immediate is valid for an ADDI.
13093 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13094 if (!ImmValMinus1.isSignedIntN(12))
13095 return SDValue();
13096
13097 SDValue NewLHS;
13098 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13099 // (sub constant, (setcc x, y, eq/neq)) ->
13100 // (add (setcc x, y, neq/eq), constant - 1)
13101 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13102 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13103 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13104 return SDValue();
13105 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13106 NewLHS =
13107 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13108 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13109 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13110 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13111 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13112 NewLHS = N1.getOperand(0);
13113 } else
13114 return SDValue();
13115
13116 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13117 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13118}
13119
13121 const RISCVSubtarget &Subtarget) {
13122 if (SDValue V = combineSubOfBoolean(N, DAG))
13123 return V;
13124
13125 EVT VT = N->getValueType(0);
13126 SDValue N0 = N->getOperand(0);
13127 SDValue N1 = N->getOperand(1);
13128 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13129 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13130 isNullConstant(N1.getOperand(1))) {
13131 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13132 if (CCVal == ISD::SETLT) {
13133 SDLoc DL(N);
13134 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13135 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13136 DAG.getConstant(ShAmt, DL, VT));
13137 }
13138 }
13139
13140 if (SDValue V = combineBinOpOfZExt(N, DAG))
13141 return V;
13142
13143 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13144 // (select lhs, rhs, cc, x, (sub x, y))
13145 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13146}
13147
13148// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13149// Legalizing setcc can introduce xors like this. Doing this transform reduces
13150// the number of xors and may allow the xor to fold into a branch condition.
13152 SDValue N0 = N->getOperand(0);
13153 SDValue N1 = N->getOperand(1);
13154 bool IsAnd = N->getOpcode() == ISD::AND;
13155
13156 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13157 return SDValue();
13158
13159 if (!N0.hasOneUse() || !N1.hasOneUse())
13160 return SDValue();
13161
13162 SDValue N01 = N0.getOperand(1);
13163 SDValue N11 = N1.getOperand(1);
13164
13165 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13166 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13167 // operation is And, allow one of the Xors to use -1.
13168 if (isOneConstant(N01)) {
13169 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13170 return SDValue();
13171 } else if (isOneConstant(N11)) {
13172 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13173 if (!(IsAnd && isAllOnesConstant(N01)))
13174 return SDValue();
13175 } else
13176 return SDValue();
13177
13178 EVT VT = N->getValueType(0);
13179
13180 SDValue N00 = N0.getOperand(0);
13181 SDValue N10 = N1.getOperand(0);
13182
13183 // The LHS of the xors needs to be 0/1.
13185 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13186 return SDValue();
13187
13188 // Invert the opcode and insert a new xor.
13189 SDLoc DL(N);
13190 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13191 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13192 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13193}
13194
13196 const RISCVSubtarget &Subtarget) {
13197 SDValue N0 = N->getOperand(0);
13198 EVT VT = N->getValueType(0);
13199
13200 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13201 // extending X. This is safe since we only need the LSB after the shift and
13202 // shift amounts larger than 31 would produce poison. If we wait until
13203 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13204 // to use a BEXT instruction.
13205 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13206 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13207 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13208 SDLoc DL(N0);
13209 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13210 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13211 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13212 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13213 }
13214
13215 return SDValue();
13216}
13217
13218// Combines two comparison operation and logic operation to one selection
13219// operation(min, max) and logic operation. Returns new constructed Node if
13220// conditions for optimization are satisfied.
13223 const RISCVSubtarget &Subtarget) {
13224 SelectionDAG &DAG = DCI.DAG;
13225
13226 SDValue N0 = N->getOperand(0);
13227 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13228 // extending X. This is safe since we only need the LSB after the shift and
13229 // shift amounts larger than 31 would produce poison. If we wait until
13230 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13231 // to use a BEXT instruction.
13232 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13233 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13234 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13235 N0.hasOneUse()) {
13236 SDLoc DL(N);
13237 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13238 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13239 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13240 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13241 DAG.getConstant(1, DL, MVT::i64));
13242 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13243 }
13244
13245 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13246 return V;
13247 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13248 return V;
13249
13250 if (DCI.isAfterLegalizeDAG())
13251 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13252 return V;
13253
13254 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13255 // (select lhs, rhs, cc, x, (and x, y))
13256 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13257}
13258
13259// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13260// FIXME: Generalize to other binary operators with same operand.
13262 SelectionDAG &DAG) {
13263 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13264
13265 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13267 !N0.hasOneUse() || !N1.hasOneUse())
13268 return SDValue();
13269
13270 // Should have the same condition.
13271 SDValue Cond = N0.getOperand(1);
13272 if (Cond != N1.getOperand(1))
13273 return SDValue();
13274
13275 SDValue TrueV = N0.getOperand(0);
13276 SDValue FalseV = N1.getOperand(0);
13277
13278 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13279 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13280 !isOneConstant(TrueV.getOperand(1)) ||
13281 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13282 return SDValue();
13283
13284 EVT VT = N->getValueType(0);
13285 SDLoc DL(N);
13286
13287 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13288 Cond);
13289 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13290 Cond);
13291 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13292 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13293}
13294
13296 const RISCVSubtarget &Subtarget) {
13297 SelectionDAG &DAG = DCI.DAG;
13298
13299 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13300 return V;
13301 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13302 return V;
13303
13304 if (DCI.isAfterLegalizeDAG())
13305 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13306 return V;
13307
13308 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13309 // We may be able to pull a common operation out of the true and false value.
13310 SDValue N0 = N->getOperand(0);
13311 SDValue N1 = N->getOperand(1);
13312 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13313 return V;
13314 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13315 return V;
13316
13317 // fold (or (select cond, 0, y), x) ->
13318 // (select cond, x, (or x, y))
13319 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13320}
13321
13323 const RISCVSubtarget &Subtarget) {
13324 SDValue N0 = N->getOperand(0);
13325 SDValue N1 = N->getOperand(1);
13326
13327 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13328 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13329 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13330 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13331 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13332 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13333 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13334 SDLoc DL(N);
13335 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13336 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13337 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13338 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13339 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13340 }
13341
13342 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13343 // NOTE: Assumes ROL being legal means ROLW is legal.
13344 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13345 if (N0.getOpcode() == RISCVISD::SLLW &&
13347 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13348 SDLoc DL(N);
13349 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13350 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13351 }
13352
13353 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13354 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13355 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13356 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13357 if (ConstN00 && CC == ISD::SETLT) {
13358 EVT VT = N0.getValueType();
13359 SDLoc DL(N0);
13360 const APInt &Imm = ConstN00->getAPIntValue();
13361 if ((Imm + 1).isSignedIntN(12))
13362 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13363 DAG.getConstant(Imm + 1, DL, VT), CC);
13364 }
13365 }
13366
13367 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13368 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13369 // would have been promoted to i32, but the setcc would have i64 result.
13370 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13371 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13372 SDValue N00 = N0.getOperand(0);
13373 SDLoc DL(N);
13374 SDValue LHS = N00.getOperand(0);
13375 SDValue RHS = N00.getOperand(1);
13376 SDValue CC = N00.getOperand(2);
13377 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13378 LHS.getValueType());
13379 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13380 LHS, RHS, NotCC);
13381 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13382 }
13383
13384 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13385 return V;
13386 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13387 return V;
13388
13389 // fold (xor (select cond, 0, y), x) ->
13390 // (select cond, x, (xor x, y))
13391 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13392}
13393
13394// Try to expand a scalar multiply to a faster sequence.
13397 const RISCVSubtarget &Subtarget) {
13398
13399 EVT VT = N->getValueType(0);
13400
13401 // LI + MUL is usually smaller than the alternative sequence.
13403 return SDValue();
13404
13405 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13406 return SDValue();
13407
13408 if (VT != Subtarget.getXLenVT())
13409 return SDValue();
13410
13411 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13412 return SDValue();
13413
13414 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13415 if (!CNode)
13416 return SDValue();
13417 uint64_t MulAmt = CNode->getZExtValue();
13418
13419 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13420 // We're adding additional uses of X here, and in principle, we should be freezing
13421 // X before doing so. However, adding freeze here causes real regressions, and no
13422 // other target properly freezes X in these cases either.
13423 SDValue X = N->getOperand(0);
13424
13425 for (uint64_t Divisor : {3, 5, 9}) {
13426 if (MulAmt % Divisor != 0)
13427 continue;
13428 uint64_t MulAmt2 = MulAmt / Divisor;
13429 // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C)
13430 // Matched in tablegen, avoid perturbing patterns.
13431 if (isPowerOf2_64(MulAmt2))
13432 return SDValue();
13433
13434 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13435 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13436 SDLoc DL(N);
13437 SDValue Mul359 =
13438 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13439 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13440 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13441 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13442 Mul359);
13443 }
13444 }
13445
13446 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13447 // shXadd. First check if this a sum of two power of 2s because that's
13448 // easy. Then count how many zeros are up to the first bit.
13449 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13450 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13451 if (ScaleShift >= 1 && ScaleShift < 4) {
13452 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13453 SDLoc DL(N);
13454 SDValue Shift1 =
13455 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13456 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13457 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13458 }
13459 }
13460
13461 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13462 // This is the two instruction form, there are also three instruction
13463 // variants we could implement. e.g.
13464 // (2^(1,2,3) * 3,5,9 + 1) << C2
13465 // 2^(C1>3) * 3,5,9 +/- 1
13466 for (uint64_t Divisor : {3, 5, 9}) {
13467 uint64_t C = MulAmt - 1;
13468 if (C <= Divisor)
13469 continue;
13470 unsigned TZ = llvm::countr_zero(C);
13471 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13472 SDLoc DL(N);
13473 SDValue Mul359 =
13474 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13475 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13476 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13477 DAG.getConstant(TZ, DL, VT), X);
13478 }
13479 }
13480
13481 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13482 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13483 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13484 if (ScaleShift >= 1 && ScaleShift < 4) {
13485 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13486 SDLoc DL(N);
13487 SDValue Shift1 =
13488 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13489 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13490 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13491 DAG.getConstant(ScaleShift, DL, VT), X));
13492 }
13493 }
13494
13495 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13496 for (uint64_t Offset : {3, 5, 9}) {
13497 if (isPowerOf2_64(MulAmt + Offset)) {
13498 SDLoc DL(N);
13499 SDValue Shift1 =
13500 DAG.getNode(ISD::SHL, DL, VT, X,
13501 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13502 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13503 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13504 X);
13505 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13506 }
13507 }
13508
13509 return SDValue();
13510}
13511
13512
13515 const RISCVSubtarget &Subtarget) {
13516 EVT VT = N->getValueType(0);
13517 if (!VT.isVector())
13518 return expandMul(N, DAG, DCI, Subtarget);
13519
13520 SDLoc DL(N);
13521 SDValue N0 = N->getOperand(0);
13522 SDValue N1 = N->getOperand(1);
13523 SDValue MulOper;
13524 unsigned AddSubOpc;
13525
13526 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13527 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13528 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13529 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13530 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13531 AddSubOpc = V->getOpcode();
13532 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13533 SDValue Opnd = V->getOperand(1);
13534 MulOper = V->getOperand(0);
13535 if (AddSubOpc == ISD::SUB)
13536 std::swap(Opnd, MulOper);
13537 if (isOneOrOneSplat(Opnd))
13538 return true;
13539 }
13540 return false;
13541 };
13542
13543 if (IsAddSubWith1(N0)) {
13544 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13545 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13546 }
13547
13548 if (IsAddSubWith1(N1)) {
13549 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13550 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13551 }
13552
13553 if (SDValue V = combineBinOpOfZExt(N, DAG))
13554 return V;
13555
13556 return SDValue();
13557}
13558
13559/// According to the property that indexed load/store instructions zero-extend
13560/// their indices, try to narrow the type of index operand.
13561static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13562 if (isIndexTypeSigned(IndexType))
13563 return false;
13564
13565 if (!N->hasOneUse())
13566 return false;
13567
13568 EVT VT = N.getValueType();
13569 SDLoc DL(N);
13570
13571 // In general, what we're doing here is seeing if we can sink a truncate to
13572 // a smaller element type into the expression tree building our index.
13573 // TODO: We can generalize this and handle a bunch more cases if useful.
13574
13575 // Narrow a buildvector to the narrowest element type. This requires less
13576 // work and less register pressure at high LMUL, and creates smaller constants
13577 // which may be cheaper to materialize.
13578 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13579 KnownBits Known = DAG.computeKnownBits(N);
13580 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13581 LLVMContext &C = *DAG.getContext();
13582 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13583 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13584 N = DAG.getNode(ISD::TRUNCATE, DL,
13585 VT.changeVectorElementType(ResultVT), N);
13586 return true;
13587 }
13588 }
13589
13590 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13591 if (N.getOpcode() != ISD::SHL)
13592 return false;
13593
13594 SDValue N0 = N.getOperand(0);
13595 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13597 return false;
13598 if (!N0->hasOneUse())
13599 return false;
13600
13601 APInt ShAmt;
13602 SDValue N1 = N.getOperand(1);
13603 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13604 return false;
13605
13606 SDValue Src = N0.getOperand(0);
13607 EVT SrcVT = Src.getValueType();
13608 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13609 unsigned ShAmtV = ShAmt.getZExtValue();
13610 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13611 NewElen = std::max(NewElen, 8U);
13612
13613 // Skip if NewElen is not narrower than the original extended type.
13614 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13615 return false;
13616
13617 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13618 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13619
13620 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13621 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13622 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13623 return true;
13624}
13625
13626// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13627// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13628// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13629// can become a sext.w instead of a shift pair.
13631 const RISCVSubtarget &Subtarget) {
13632 SDValue N0 = N->getOperand(0);
13633 SDValue N1 = N->getOperand(1);
13634 EVT VT = N->getValueType(0);
13635 EVT OpVT = N0.getValueType();
13636
13637 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13638 return SDValue();
13639
13640 // RHS needs to be a constant.
13641 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13642 if (!N1C)
13643 return SDValue();
13644
13645 // LHS needs to be (and X, 0xffffffff).
13646 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13647 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13648 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13649 return SDValue();
13650
13651 // Looking for an equality compare.
13652 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13653 if (!isIntEqualitySetCC(Cond))
13654 return SDValue();
13655
13656 // Don't do this if the sign bit is provably zero, it will be turned back into
13657 // an AND.
13658 APInt SignMask = APInt::getOneBitSet(64, 31);
13659 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13660 return SDValue();
13661
13662 const APInt &C1 = N1C->getAPIntValue();
13663
13664 SDLoc dl(N);
13665 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13666 // to be equal.
13667 if (C1.getActiveBits() > 32)
13668 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13669
13670 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13671 N0.getOperand(0), DAG.getValueType(MVT::i32));
13672 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13673 dl, OpVT), Cond);
13674}
13675
13676static SDValue
13678 const RISCVSubtarget &Subtarget) {
13679 SDValue Src = N->getOperand(0);
13680 EVT VT = N->getValueType(0);
13681
13682 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13683 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13684 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13685 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13686 Src.getOperand(0));
13687
13688 return SDValue();
13689}
13690
13691namespace {
13692// Forward declaration of the structure holding the necessary information to
13693// apply a combine.
13694struct CombineResult;
13695
13696enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13697/// Helper class for folding sign/zero extensions.
13698/// In particular, this class is used for the following combines:
13699/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13700/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13701/// mul | mul_vl -> vwmul(u) | vwmul_su
13702/// shl | shl_vl -> vwsll
13703/// fadd -> vfwadd | vfwadd_w
13704/// fsub -> vfwsub | vfwsub_w
13705/// fmul -> vfwmul
13706/// An object of this class represents an operand of the operation we want to
13707/// combine.
13708/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13709/// NodeExtensionHelper for `a` and one for `b`.
13710///
13711/// This class abstracts away how the extension is materialized and
13712/// how its number of users affect the combines.
13713///
13714/// In particular:
13715/// - VWADD_W is conceptually == add(op0, sext(op1))
13716/// - VWADDU_W == add(op0, zext(op1))
13717/// - VWSUB_W == sub(op0, sext(op1))
13718/// - VWSUBU_W == sub(op0, zext(op1))
13719/// - VFWADD_W == fadd(op0, fpext(op1))
13720/// - VFWSUB_W == fsub(op0, fpext(op1))
13721/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13722/// zext|sext(smaller_value).
13723struct NodeExtensionHelper {
13724 /// Records if this operand is like being zero extended.
13725 bool SupportsZExt;
13726 /// Records if this operand is like being sign extended.
13727 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13728 /// instance, a splat constant (e.g., 3), would support being both sign and
13729 /// zero extended.
13730 bool SupportsSExt;
13731 /// Records if this operand is like being floating-Point extended.
13732 bool SupportsFPExt;
13733 /// This boolean captures whether we care if this operand would still be
13734 /// around after the folding happens.
13735 bool EnforceOneUse;
13736 /// Original value that this NodeExtensionHelper represents.
13737 SDValue OrigOperand;
13738
13739 /// Get the value feeding the extension or the value itself.
13740 /// E.g., for zext(a), this would return a.
13741 SDValue getSource() const {
13742 switch (OrigOperand.getOpcode()) {
13743 case ISD::ZERO_EXTEND:
13744 case ISD::SIGN_EXTEND:
13745 case RISCVISD::VSEXT_VL:
13746 case RISCVISD::VZEXT_VL:
13748 return OrigOperand.getOperand(0);
13749 default:
13750 return OrigOperand;
13751 }
13752 }
13753
13754 /// Check if this instance represents a splat.
13755 bool isSplat() const {
13756 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13757 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13758 }
13759
13760 /// Get the extended opcode.
13761 unsigned getExtOpc(ExtKind SupportsExt) const {
13762 switch (SupportsExt) {
13763 case ExtKind::SExt:
13764 return RISCVISD::VSEXT_VL;
13765 case ExtKind::ZExt:
13766 return RISCVISD::VZEXT_VL;
13767 case ExtKind::FPExt:
13769 }
13770 llvm_unreachable("Unknown ExtKind enum");
13771 }
13772
13773 /// Get or create a value that can feed \p Root with the given extension \p
13774 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13775 /// operand. \see ::getSource().
13776 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13777 const RISCVSubtarget &Subtarget,
13778 std::optional<ExtKind> SupportsExt) const {
13779 if (!SupportsExt.has_value())
13780 return OrigOperand;
13781
13782 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13783
13784 SDValue Source = getSource();
13785 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13786 if (Source.getValueType() == NarrowVT)
13787 return Source;
13788
13789 unsigned ExtOpc = getExtOpc(*SupportsExt);
13790
13791 // If we need an extension, we should be changing the type.
13792 SDLoc DL(OrigOperand);
13793 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13794 switch (OrigOperand.getOpcode()) {
13795 case ISD::ZERO_EXTEND:
13796 case ISD::SIGN_EXTEND:
13797 case RISCVISD::VSEXT_VL:
13798 case RISCVISD::VZEXT_VL:
13800 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13801 case ISD::SPLAT_VECTOR:
13802 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13804 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13805 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13806 default:
13807 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13808 // and that operand should already have the right NarrowVT so no
13809 // extension should be required at this point.
13810 llvm_unreachable("Unsupported opcode");
13811 }
13812 }
13813
13814 /// Helper function to get the narrow type for \p Root.
13815 /// The narrow type is the type of \p Root where we divided the size of each
13816 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13817 /// \pre Both the narrow type and the original type should be legal.
13818 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13819 MVT VT = Root->getSimpleValueType(0);
13820
13821 // Determine the narrow size.
13822 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13823
13824 MVT EltVT = SupportsExt == ExtKind::FPExt
13825 ? MVT::getFloatingPointVT(NarrowSize)
13826 : MVT::getIntegerVT(NarrowSize);
13827
13828 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13829 "Trying to extend something we can't represent");
13830 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13831 return NarrowVT;
13832 }
13833
13834 /// Get the opcode to materialize:
13835 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13836 static unsigned getSExtOpcode(unsigned Opcode) {
13837 switch (Opcode) {
13838 case ISD::ADD:
13839 case RISCVISD::ADD_VL:
13842 case ISD::OR:
13843 return RISCVISD::VWADD_VL;
13844 case ISD::SUB:
13845 case RISCVISD::SUB_VL:
13848 return RISCVISD::VWSUB_VL;
13849 case ISD::MUL:
13850 case RISCVISD::MUL_VL:
13851 return RISCVISD::VWMUL_VL;
13852 default:
13853 llvm_unreachable("Unexpected opcode");
13854 }
13855 }
13856
13857 /// Get the opcode to materialize:
13858 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13859 static unsigned getZExtOpcode(unsigned Opcode) {
13860 switch (Opcode) {
13861 case ISD::ADD:
13862 case RISCVISD::ADD_VL:
13865 case ISD::OR:
13866 return RISCVISD::VWADDU_VL;
13867 case ISD::SUB:
13868 case RISCVISD::SUB_VL:
13871 return RISCVISD::VWSUBU_VL;
13872 case ISD::MUL:
13873 case RISCVISD::MUL_VL:
13874 return RISCVISD::VWMULU_VL;
13875 case ISD::SHL:
13876 case RISCVISD::SHL_VL:
13877 return RISCVISD::VWSLL_VL;
13878 default:
13879 llvm_unreachable("Unexpected opcode");
13880 }
13881 }
13882
13883 /// Get the opcode to materialize:
13884 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13885 static unsigned getFPExtOpcode(unsigned Opcode) {
13886 switch (Opcode) {
13887 case RISCVISD::FADD_VL:
13889 return RISCVISD::VFWADD_VL;
13890 case RISCVISD::FSUB_VL:
13892 return RISCVISD::VFWSUB_VL;
13893 case RISCVISD::FMUL_VL:
13894 return RISCVISD::VFWMUL_VL;
13895 default:
13896 llvm_unreachable("Unexpected opcode");
13897 }
13898 }
13899
13900 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13901 /// newOpcode(a, b).
13902 static unsigned getSUOpcode(unsigned Opcode) {
13903 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13904 "SU is only supported for MUL");
13905 return RISCVISD::VWMULSU_VL;
13906 }
13907
13908 /// Get the opcode to materialize
13909 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13910 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13911 switch (Opcode) {
13912 case ISD::ADD:
13913 case RISCVISD::ADD_VL:
13914 case ISD::OR:
13915 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13917 case ISD::SUB:
13918 case RISCVISD::SUB_VL:
13919 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13921 case RISCVISD::FADD_VL:
13922 return RISCVISD::VFWADD_W_VL;
13923 case RISCVISD::FSUB_VL:
13924 return RISCVISD::VFWSUB_W_VL;
13925 default:
13926 llvm_unreachable("Unexpected opcode");
13927 }
13928 }
13929
13930 using CombineToTry = std::function<std::optional<CombineResult>(
13931 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13932 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13933 const RISCVSubtarget &)>;
13934
13935 /// Check if this node needs to be fully folded or extended for all users.
13936 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13937
13938 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
13939 const RISCVSubtarget &Subtarget) {
13940 unsigned Opc = OrigOperand.getOpcode();
13941 MVT VT = OrigOperand.getSimpleValueType();
13942
13943 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
13944 "Unexpected Opcode");
13945
13946 // The pasthru must be undef for tail agnostic.
13947 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
13948 return;
13949
13950 // Get the scalar value.
13951 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
13952 : OrigOperand.getOperand(1);
13953
13954 // See if we have enough sign bits or zero bits in the scalar to use a
13955 // widening opcode by splatting to smaller element size.
13956 unsigned EltBits = VT.getScalarSizeInBits();
13957 unsigned ScalarBits = Op.getValueSizeInBits();
13958 // Make sure we're getting all element bits from the scalar register.
13959 // FIXME: Support implicit sign extension of vmv.v.x?
13960 if (ScalarBits < EltBits)
13961 return;
13962
13963 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13964 // If the narrow type cannot be expressed with a legal VMV,
13965 // this is not a valid candidate.
13966 if (NarrowSize < 8)
13967 return;
13968
13969 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13970 SupportsSExt = true;
13971
13972 if (DAG.MaskedValueIsZero(Op,
13973 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13974 SupportsZExt = true;
13975
13976 EnforceOneUse = false;
13977 }
13978
13979 /// Helper method to set the various fields of this struct based on the
13980 /// type of \p Root.
13981 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13982 const RISCVSubtarget &Subtarget) {
13983 SupportsZExt = false;
13984 SupportsSExt = false;
13985 SupportsFPExt = false;
13986 EnforceOneUse = true;
13987 unsigned Opc = OrigOperand.getOpcode();
13988 // For the nodes we handle below, we end up using their inputs directly: see
13989 // getSource(). However since they either don't have a passthru or we check
13990 // that their passthru is undef, we can safely ignore their mask and VL.
13991 switch (Opc) {
13992 case ISD::ZERO_EXTEND:
13993 case ISD::SIGN_EXTEND: {
13994 MVT VT = OrigOperand.getSimpleValueType();
13995 if (!VT.isVector())
13996 break;
13997
13998 SDValue NarrowElt = OrigOperand.getOperand(0);
13999 MVT NarrowVT = NarrowElt.getSimpleValueType();
14000 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14001 if (NarrowVT.getVectorElementType() == MVT::i1)
14002 break;
14003
14004 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14005 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14006 break;
14007 }
14008 case RISCVISD::VZEXT_VL:
14009 SupportsZExt = true;
14010 break;
14011 case RISCVISD::VSEXT_VL:
14012 SupportsSExt = true;
14013 break;
14015 SupportsFPExt = true;
14016 break;
14017 case ISD::SPLAT_VECTOR:
14019 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14020 break;
14021 default:
14022 break;
14023 }
14024 }
14025
14026 /// Check if \p Root supports any extension folding combines.
14027 static bool isSupportedRoot(const SDNode *Root,
14028 const RISCVSubtarget &Subtarget) {
14029 switch (Root->getOpcode()) {
14030 case ISD::ADD:
14031 case ISD::SUB:
14032 case ISD::MUL: {
14033 return Root->getValueType(0).isScalableVector();
14034 }
14035 case ISD::OR: {
14036 return Root->getValueType(0).isScalableVector() &&
14037 Root->getFlags().hasDisjoint();
14038 }
14039 // Vector Widening Integer Add/Sub/Mul Instructions
14040 case RISCVISD::ADD_VL:
14041 case RISCVISD::MUL_VL:
14044 case RISCVISD::SUB_VL:
14047 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14048 case RISCVISD::FADD_VL:
14049 case RISCVISD::FSUB_VL:
14050 case RISCVISD::FMUL_VL:
14053 return true;
14054 case ISD::SHL:
14055 return Root->getValueType(0).isScalableVector() &&
14056 Subtarget.hasStdExtZvbb();
14057 case RISCVISD::SHL_VL:
14058 return Subtarget.hasStdExtZvbb();
14059 default:
14060 return false;
14061 }
14062 }
14063
14064 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14065 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14066 const RISCVSubtarget &Subtarget) {
14067 assert(isSupportedRoot(Root, Subtarget) &&
14068 "Trying to build an helper with an "
14069 "unsupported root");
14070 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14072 OrigOperand = Root->getOperand(OperandIdx);
14073
14074 unsigned Opc = Root->getOpcode();
14075 switch (Opc) {
14076 // We consider
14077 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14078 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14079 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14086 if (OperandIdx == 1) {
14087 SupportsZExt =
14089 SupportsSExt =
14091 SupportsFPExt =
14093 // There's no existing extension here, so we don't have to worry about
14094 // making sure it gets removed.
14095 EnforceOneUse = false;
14096 break;
14097 }
14098 [[fallthrough]];
14099 default:
14100 fillUpExtensionSupport(Root, DAG, Subtarget);
14101 break;
14102 }
14103 }
14104
14105 /// Helper function to get the Mask and VL from \p Root.
14106 static std::pair<SDValue, SDValue>
14107 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14108 const RISCVSubtarget &Subtarget) {
14109 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14110 switch (Root->getOpcode()) {
14111 case ISD::ADD:
14112 case ISD::SUB:
14113 case ISD::MUL:
14114 case ISD::OR:
14115 case ISD::SHL: {
14116 SDLoc DL(Root);
14117 MVT VT = Root->getSimpleValueType(0);
14118 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14119 }
14120 default:
14121 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14122 }
14123 }
14124
14125 /// Helper function to check if \p N is commutative with respect to the
14126 /// foldings that are supported by this class.
14127 static bool isCommutative(const SDNode *N) {
14128 switch (N->getOpcode()) {
14129 case ISD::ADD:
14130 case ISD::MUL:
14131 case ISD::OR:
14132 case RISCVISD::ADD_VL:
14133 case RISCVISD::MUL_VL:
14136 case RISCVISD::FADD_VL:
14137 case RISCVISD::FMUL_VL:
14139 return true;
14140 case ISD::SUB:
14141 case RISCVISD::SUB_VL:
14144 case RISCVISD::FSUB_VL:
14146 case ISD::SHL:
14147 case RISCVISD::SHL_VL:
14148 return false;
14149 default:
14150 llvm_unreachable("Unexpected opcode");
14151 }
14152 }
14153
14154 /// Get a list of combine to try for folding extensions in \p Root.
14155 /// Note that each returned CombineToTry function doesn't actually modify
14156 /// anything. Instead they produce an optional CombineResult that if not None,
14157 /// need to be materialized for the combine to be applied.
14158 /// \see CombineResult::materialize.
14159 /// If the related CombineToTry function returns std::nullopt, that means the
14160 /// combine didn't match.
14161 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14162};
14163
14164/// Helper structure that holds all the necessary information to materialize a
14165/// combine that does some extension folding.
14166struct CombineResult {
14167 /// Opcode to be generated when materializing the combine.
14168 unsigned TargetOpcode;
14169 // No value means no extension is needed.
14170 std::optional<ExtKind> LHSExt;
14171 std::optional<ExtKind> RHSExt;
14172 /// Root of the combine.
14173 SDNode *Root;
14174 /// LHS of the TargetOpcode.
14175 NodeExtensionHelper LHS;
14176 /// RHS of the TargetOpcode.
14177 NodeExtensionHelper RHS;
14178
14179 CombineResult(unsigned TargetOpcode, SDNode *Root,
14180 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14181 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14182 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14183 LHS(LHS), RHS(RHS) {}
14184
14185 /// Return a value that uses TargetOpcode and that can be used to replace
14186 /// Root.
14187 /// The actual replacement is *not* done in that method.
14188 SDValue materialize(SelectionDAG &DAG,
14189 const RISCVSubtarget &Subtarget) const {
14190 SDValue Mask, VL, Merge;
14191 std::tie(Mask, VL) =
14192 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14193 switch (Root->getOpcode()) {
14194 default:
14195 Merge = Root->getOperand(2);
14196 break;
14197 case ISD::ADD:
14198 case ISD::SUB:
14199 case ISD::MUL:
14200 case ISD::OR:
14201 case ISD::SHL:
14202 Merge = DAG.getUNDEF(Root->getValueType(0));
14203 break;
14204 }
14205 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14206 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14207 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14208 Merge, Mask, VL);
14209 }
14210};
14211
14212/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14213/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14214/// are zext) and LHS and RHS can be folded into Root.
14215/// AllowExtMask define which form `ext` can take in this pattern.
14216///
14217/// \note If the pattern can match with both zext and sext, the returned
14218/// CombineResult will feature the zext result.
14219///
14220/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14221/// can be used to apply the pattern.
14222static std::optional<CombineResult>
14223canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14224 const NodeExtensionHelper &RHS,
14225 uint8_t AllowExtMask, SelectionDAG &DAG,
14226 const RISCVSubtarget &Subtarget) {
14227 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14228 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14229 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14230 /*RHSExt=*/{ExtKind::ZExt});
14231 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14232 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14233 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14234 /*RHSExt=*/{ExtKind::SExt});
14235 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14236 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14237 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14238 /*RHSExt=*/{ExtKind::FPExt});
14239 return std::nullopt;
14240}
14241
14242/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14243/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14244/// are zext) and LHS and RHS can be folded into Root.
14245///
14246/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14247/// can be used to apply the pattern.
14248static std::optional<CombineResult>
14249canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14250 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14251 const RISCVSubtarget &Subtarget) {
14252 return canFoldToVWWithSameExtensionImpl(
14253 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14254 Subtarget);
14255}
14256
14257/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14258///
14259/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14260/// can be used to apply the pattern.
14261static std::optional<CombineResult>
14262canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14263 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14264 const RISCVSubtarget &Subtarget) {
14265 if (RHS.SupportsFPExt)
14266 return CombineResult(
14267 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14268 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14269
14270 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14271 // sext/zext?
14272 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14273 // purposes.
14274 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14275 return CombineResult(
14276 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14277 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14278 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14279 return CombineResult(
14280 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14281 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14282 return std::nullopt;
14283}
14284
14285/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14286///
14287/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14288/// can be used to apply the pattern.
14289static std::optional<CombineResult>
14290canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14291 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14292 const RISCVSubtarget &Subtarget) {
14293 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14294 Subtarget);
14295}
14296
14297/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14298///
14299/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14300/// can be used to apply the pattern.
14301static std::optional<CombineResult>
14302canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14303 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14304 const RISCVSubtarget &Subtarget) {
14305 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14306 Subtarget);
14307}
14308
14309/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14310///
14311/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14312/// can be used to apply the pattern.
14313static std::optional<CombineResult>
14314canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14315 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14316 const RISCVSubtarget &Subtarget) {
14317 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14318 Subtarget);
14319}
14320
14321/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14322///
14323/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14324/// can be used to apply the pattern.
14325static std::optional<CombineResult>
14326canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14327 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14328 const RISCVSubtarget &Subtarget) {
14329
14330 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14331 return std::nullopt;
14332 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14333 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14334 /*RHSExt=*/{ExtKind::ZExt});
14335}
14336
14338NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14339 SmallVector<CombineToTry> Strategies;
14340 switch (Root->getOpcode()) {
14341 case ISD::ADD:
14342 case ISD::SUB:
14343 case ISD::OR:
14344 case RISCVISD::ADD_VL:
14345 case RISCVISD::SUB_VL:
14346 case RISCVISD::FADD_VL:
14347 case RISCVISD::FSUB_VL:
14348 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14349 Strategies.push_back(canFoldToVWWithSameExtension);
14350 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14351 Strategies.push_back(canFoldToVW_W);
14352 break;
14353 case RISCVISD::FMUL_VL:
14354 Strategies.push_back(canFoldToVWWithSameExtension);
14355 break;
14356 case ISD::MUL:
14357 case RISCVISD::MUL_VL:
14358 // mul -> vwmul(u)
14359 Strategies.push_back(canFoldToVWWithSameExtension);
14360 // mul -> vwmulsu
14361 Strategies.push_back(canFoldToVW_SU);
14362 break;
14363 case ISD::SHL:
14364 case RISCVISD::SHL_VL:
14365 // shl -> vwsll
14366 Strategies.push_back(canFoldToVWWithZEXT);
14367 break;
14370 // vwadd_w|vwsub_w -> vwadd|vwsub
14371 Strategies.push_back(canFoldToVWWithSEXT);
14372 break;
14375 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14376 Strategies.push_back(canFoldToVWWithZEXT);
14377 break;
14380 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14381 Strategies.push_back(canFoldToVWWithFPEXT);
14382 break;
14383 default:
14384 llvm_unreachable("Unexpected opcode");
14385 }
14386 return Strategies;
14387}
14388} // End anonymous namespace.
14389
14390/// Combine a binary operation to its equivalent VW or VW_W form.
14391/// The supported combines are:
14392/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14393/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14394/// mul | mul_vl -> vwmul(u) | vwmul_su
14395/// shl | shl_vl -> vwsll
14396/// fadd_vl -> vfwadd | vfwadd_w
14397/// fsub_vl -> vfwsub | vfwsub_w
14398/// fmul_vl -> vfwmul
14399/// vwadd_w(u) -> vwadd(u)
14400/// vwsub_w(u) -> vwsub(u)
14401/// vfwadd_w -> vfwadd
14402/// vfwsub_w -> vfwsub
14405 const RISCVSubtarget &Subtarget) {
14406 SelectionDAG &DAG = DCI.DAG;
14407 if (DCI.isBeforeLegalize())
14408 return SDValue();
14409
14410 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14411 return SDValue();
14412
14413 SmallVector<SDNode *> Worklist;
14414 SmallSet<SDNode *, 8> Inserted;
14415 Worklist.push_back(N);
14416 Inserted.insert(N);
14417 SmallVector<CombineResult> CombinesToApply;
14418
14419 while (!Worklist.empty()) {
14420 SDNode *Root = Worklist.pop_back_val();
14421 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14422 return SDValue();
14423
14424 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14425 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14426 auto AppendUsersIfNeeded = [&Worklist,
14427 &Inserted](const NodeExtensionHelper &Op) {
14428 if (Op.needToPromoteOtherUsers()) {
14429 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14430 if (Inserted.insert(TheUse).second)
14431 Worklist.push_back(TheUse);
14432 }
14433 }
14434 };
14435
14436 // Control the compile time by limiting the number of node we look at in
14437 // total.
14438 if (Inserted.size() > ExtensionMaxWebSize)
14439 return SDValue();
14440
14442 NodeExtensionHelper::getSupportedFoldings(N);
14443
14444 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14445 bool Matched = false;
14446 for (int Attempt = 0;
14447 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14448 ++Attempt) {
14449
14450 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14451 FoldingStrategies) {
14452 std::optional<CombineResult> Res =
14453 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14454 if (Res) {
14455 Matched = true;
14456 CombinesToApply.push_back(*Res);
14457 // All the inputs that are extended need to be folded, otherwise
14458 // we would be leaving the old input (since it is may still be used),
14459 // and the new one.
14460 if (Res->LHSExt.has_value())
14461 AppendUsersIfNeeded(LHS);
14462 if (Res->RHSExt.has_value())
14463 AppendUsersIfNeeded(RHS);
14464 break;
14465 }
14466 }
14467 std::swap(LHS, RHS);
14468 }
14469 // Right now we do an all or nothing approach.
14470 if (!Matched)
14471 return SDValue();
14472 }
14473 // Store the value for the replacement of the input node separately.
14474 SDValue InputRootReplacement;
14475 // We do the RAUW after we materialize all the combines, because some replaced
14476 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14477 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14478 // yet-to-be-visited CombinesToApply roots.
14480 ValuesToReplace.reserve(CombinesToApply.size());
14481 for (CombineResult Res : CombinesToApply) {
14482 SDValue NewValue = Res.materialize(DAG, Subtarget);
14483 if (!InputRootReplacement) {
14484 assert(Res.Root == N &&
14485 "First element is expected to be the current node");
14486 InputRootReplacement = NewValue;
14487 } else {
14488 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14489 }
14490 }
14491 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14492 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14493 DCI.AddToWorklist(OldNewValues.second.getNode());
14494 }
14495 return InputRootReplacement;
14496}
14497
14498// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14499// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14500// y will be the Passthru and cond will be the Mask.
14502 unsigned Opc = N->getOpcode();
14505
14506 SDValue Y = N->getOperand(0);
14507 SDValue MergeOp = N->getOperand(1);
14508 unsigned MergeOpc = MergeOp.getOpcode();
14509
14510 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14511 return SDValue();
14512
14513 SDValue X = MergeOp->getOperand(1);
14514
14515 if (!MergeOp.hasOneUse())
14516 return SDValue();
14517
14518 // Passthru should be undef
14519 SDValue Passthru = N->getOperand(2);
14520 if (!Passthru.isUndef())
14521 return SDValue();
14522
14523 // Mask should be all ones
14524 SDValue Mask = N->getOperand(3);
14525 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14526 return SDValue();
14527
14528 // False value of MergeOp should be all zeros
14529 SDValue Z = MergeOp->getOperand(2);
14530
14531 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14532 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14533 Z = Z.getOperand(1);
14534
14535 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14536 return SDValue();
14537
14538 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14539 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14540 N->getFlags());
14541}
14542
14545 const RISCVSubtarget &Subtarget) {
14546 [[maybe_unused]] unsigned Opc = N->getOpcode();
14549
14550 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14551 return V;
14552
14553 return combineVWADDSUBWSelect(N, DCI.DAG);
14554}
14555
14556// Helper function for performMemPairCombine.
14557// Try to combine the memory loads/stores LSNode1 and LSNode2
14558// into a single memory pair operation.
14560 LSBaseSDNode *LSNode2, SDValue BasePtr,
14561 uint64_t Imm) {
14563 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14564
14565 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14566 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14567 return SDValue();
14568
14570 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14571
14572 // The new operation has twice the width.
14573 MVT XLenVT = Subtarget.getXLenVT();
14574 EVT MemVT = LSNode1->getMemoryVT();
14575 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14576 MachineMemOperand *MMO = LSNode1->getMemOperand();
14578 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14579
14580 if (LSNode1->getOpcode() == ISD::LOAD) {
14581 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14582 unsigned Opcode;
14583 if (MemVT == MVT::i32)
14584 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14585 else
14586 Opcode = RISCVISD::TH_LDD;
14587
14588 SDValue Res = DAG.getMemIntrinsicNode(
14589 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14590 {LSNode1->getChain(), BasePtr,
14591 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14592 NewMemVT, NewMMO);
14593
14594 SDValue Node1 =
14595 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14596 SDValue Node2 =
14597 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14598
14599 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14600 return Node1;
14601 } else {
14602 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14603
14604 SDValue Res = DAG.getMemIntrinsicNode(
14605 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14606 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14607 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14608 NewMemVT, NewMMO);
14609
14610 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14611 return Res;
14612 }
14613}
14614
14615// Try to combine two adjacent loads/stores to a single pair instruction from
14616// the XTHeadMemPair vendor extension.
14619 SelectionDAG &DAG = DCI.DAG;
14621 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14622
14623 // Target does not support load/store pair.
14624 if (!Subtarget.hasVendorXTHeadMemPair())
14625 return SDValue();
14626
14627 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14628 EVT MemVT = LSNode1->getMemoryVT();
14629 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14630
14631 // No volatile, indexed or atomic loads/stores.
14632 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14633 return SDValue();
14634
14635 // Function to get a base + constant representation from a memory value.
14636 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14637 if (Ptr->getOpcode() == ISD::ADD)
14638 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14639 return {Ptr->getOperand(0), C1->getZExtValue()};
14640 return {Ptr, 0};
14641 };
14642
14643 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14644
14645 SDValue Chain = N->getOperand(0);
14646 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14647 UI != UE; ++UI) {
14648 SDUse &Use = UI.getUse();
14649 if (Use.getUser() != N && Use.getResNo() == 0 &&
14650 Use.getUser()->getOpcode() == N->getOpcode()) {
14651 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14652
14653 // No volatile, indexed or atomic loads/stores.
14654 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14655 continue;
14656
14657 // Check if LSNode1 and LSNode2 have the same type and extension.
14658 if (LSNode1->getOpcode() == ISD::LOAD)
14659 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14660 cast<LoadSDNode>(LSNode1)->getExtensionType())
14661 continue;
14662
14663 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14664 continue;
14665
14666 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14667
14668 // Check if the base pointer is the same for both instruction.
14669 if (Base1 != Base2)
14670 continue;
14671
14672 // Check if the offsets match the XTHeadMemPair encoding contraints.
14673 bool Valid = false;
14674 if (MemVT == MVT::i32) {
14675 // Check for adjacent i32 values and a 2-bit index.
14676 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14677 Valid = true;
14678 } else if (MemVT == MVT::i64) {
14679 // Check for adjacent i64 values and a 2-bit index.
14680 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14681 Valid = true;
14682 }
14683
14684 if (!Valid)
14685 continue;
14686
14687 // Try to combine.
14688 if (SDValue Res =
14689 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14690 return Res;
14691 }
14692 }
14693
14694 return SDValue();
14695}
14696
14697// Fold
14698// (fp_to_int (froundeven X)) -> fcvt X, rne
14699// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14700// (fp_to_int (ffloor X)) -> fcvt X, rdn
14701// (fp_to_int (fceil X)) -> fcvt X, rup
14702// (fp_to_int (fround X)) -> fcvt X, rmm
14703// (fp_to_int (frint X)) -> fcvt X
14706 const RISCVSubtarget &Subtarget) {
14707 SelectionDAG &DAG = DCI.DAG;
14708 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14709 MVT XLenVT = Subtarget.getXLenVT();
14710
14711 SDValue Src = N->getOperand(0);
14712
14713 // Don't do this for strict-fp Src.
14714 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14715 return SDValue();
14716
14717 // Ensure the FP type is legal.
14718 if (!TLI.isTypeLegal(Src.getValueType()))
14719 return SDValue();
14720
14721 // Don't do this for f16 with Zfhmin and not Zfh.
14722 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14723 return SDValue();
14724
14725 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14726 // If the result is invalid, we didn't find a foldable instruction.
14727 if (FRM == RISCVFPRndMode::Invalid)
14728 return SDValue();
14729
14730 SDLoc DL(N);
14731 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14732 EVT VT = N->getValueType(0);
14733
14734 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14735 MVT SrcVT = Src.getSimpleValueType();
14736 MVT SrcContainerVT = SrcVT;
14737 MVT ContainerVT = VT.getSimpleVT();
14738 SDValue XVal = Src.getOperand(0);
14739
14740 // For widening and narrowing conversions we just combine it into a
14741 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14742 // end up getting lowered to their appropriate pseudo instructions based on
14743 // their operand types
14744 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14745 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14746 return SDValue();
14747
14748 // Make fixed-length vectors scalable first
14749 if (SrcVT.isFixedLengthVector()) {
14750 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14751 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14752 ContainerVT =
14753 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14754 }
14755
14756 auto [Mask, VL] =
14757 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14758
14759 SDValue FpToInt;
14760 if (FRM == RISCVFPRndMode::RTZ) {
14761 // Use the dedicated trunc static rounding mode if we're truncating so we
14762 // don't need to generate calls to fsrmi/fsrm
14763 unsigned Opc =
14765 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14766 } else if (FRM == RISCVFPRndMode::DYN) {
14767 unsigned Opc =
14769 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14770 } else {
14771 unsigned Opc =
14773 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14774 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14775 }
14776
14777 // If converted from fixed-length to scalable, convert back
14778 if (VT.isFixedLengthVector())
14779 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14780
14781 return FpToInt;
14782 }
14783
14784 // Only handle XLen or i32 types. Other types narrower than XLen will
14785 // eventually be legalized to XLenVT.
14786 if (VT != MVT::i32 && VT != XLenVT)
14787 return SDValue();
14788
14789 unsigned Opc;
14790 if (VT == XLenVT)
14791 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14792 else
14794
14795 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14796 DAG.getTargetConstant(FRM, DL, XLenVT));
14797 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14798}
14799
14800// Fold
14801// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14802// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14803// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14804// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14805// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14806// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14809 const RISCVSubtarget &Subtarget) {
14810 SelectionDAG &DAG = DCI.DAG;
14811 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14812 MVT XLenVT = Subtarget.getXLenVT();
14813
14814 // Only handle XLen types. Other types narrower than XLen will eventually be
14815 // legalized to XLenVT.
14816 EVT DstVT = N->getValueType(0);
14817 if (DstVT != XLenVT)
14818 return SDValue();
14819
14820 SDValue Src = N->getOperand(0);
14821
14822 // Don't do this for strict-fp Src.
14823 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14824 return SDValue();
14825
14826 // Ensure the FP type is also legal.
14827 if (!TLI.isTypeLegal(Src.getValueType()))
14828 return SDValue();
14829
14830 // Don't do this for f16 with Zfhmin and not Zfh.
14831 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14832 return SDValue();
14833
14834 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14835
14836 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14837 if (FRM == RISCVFPRndMode::Invalid)
14838 return SDValue();
14839
14840 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14841
14842 unsigned Opc;
14843 if (SatVT == DstVT)
14844 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14845 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14847 else
14848 return SDValue();
14849 // FIXME: Support other SatVTs by clamping before or after the conversion.
14850
14851 Src = Src.getOperand(0);
14852
14853 SDLoc DL(N);
14854 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14855 DAG.getTargetConstant(FRM, DL, XLenVT));
14856
14857 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14858 // extend.
14859 if (Opc == RISCVISD::FCVT_WU_RV64)
14860 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14861
14862 // RISC-V FP-to-int conversions saturate to the destination register size, but
14863 // don't produce 0 for nan.
14864 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14865 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14866}
14867
14868// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14869// smaller than XLenVT.
14871 const RISCVSubtarget &Subtarget) {
14872 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14873
14874 SDValue Src = N->getOperand(0);
14875 if (Src.getOpcode() != ISD::BSWAP)
14876 return SDValue();
14877
14878 EVT VT = N->getValueType(0);
14879 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14880 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14881 return SDValue();
14882
14883 SDLoc DL(N);
14884 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14885}
14886
14887// Convert from one FMA opcode to another based on whether we are negating the
14888// multiply result and/or the accumulator.
14889// NOTE: Only supports RVV operations with VL.
14890static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14891 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14892 if (NegMul) {
14893 // clang-format off
14894 switch (Opcode) {
14895 default: llvm_unreachable("Unexpected opcode");
14896 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14897 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14898 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14899 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14904 }
14905 // clang-format on
14906 }
14907
14908 // Negating the accumulator changes ADD<->SUB.
14909 if (NegAcc) {
14910 // clang-format off
14911 switch (Opcode) {
14912 default: llvm_unreachable("Unexpected opcode");
14913 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14914 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14915 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14916 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14921 }
14922 // clang-format on
14923 }
14924
14925 return Opcode;
14926}
14927
14929 // Fold FNEG_VL into FMA opcodes.
14930 // The first operand of strict-fp is chain.
14931 unsigned Offset = N->isTargetStrictFPOpcode();
14932 SDValue A = N->getOperand(0 + Offset);
14933 SDValue B = N->getOperand(1 + Offset);
14934 SDValue C = N->getOperand(2 + Offset);
14935 SDValue Mask = N->getOperand(3 + Offset);
14936 SDValue VL = N->getOperand(4 + Offset);
14937
14938 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14939 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14940 V.getOperand(2) == VL) {
14941 // Return the negated input.
14942 V = V.getOperand(0);
14943 return true;
14944 }
14945
14946 return false;
14947 };
14948
14949 bool NegA = invertIfNegative(A);
14950 bool NegB = invertIfNegative(B);
14951 bool NegC = invertIfNegative(C);
14952
14953 // If no operands are negated, we're done.
14954 if (!NegA && !NegB && !NegC)
14955 return SDValue();
14956
14957 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14958 if (N->isTargetStrictFPOpcode())
14959 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14960 {N->getOperand(0), A, B, C, Mask, VL});
14961 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14962 VL);
14963}
14964
14966 const RISCVSubtarget &Subtarget) {
14968 return V;
14969
14970 if (N->getValueType(0).isScalableVector() &&
14971 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14972 (Subtarget.hasVInstructionsF16Minimal() &&
14973 !Subtarget.hasVInstructionsF16())) {
14974 return SDValue();
14975 }
14976
14977 // FIXME: Ignore strict opcodes for now.
14978 if (N->isTargetStrictFPOpcode())
14979 return SDValue();
14980
14981 // Try to form widening FMA.
14982 SDValue Op0 = N->getOperand(0);
14983 SDValue Op1 = N->getOperand(1);
14984 SDValue Mask = N->getOperand(3);
14985 SDValue VL = N->getOperand(4);
14986
14987 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14989 return SDValue();
14990
14991 // TODO: Refactor to handle more complex cases similar to
14992 // combineBinOp_VLToVWBinOp_VL.
14993 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14994 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14995 return SDValue();
14996
14997 // Check the mask and VL are the same.
14998 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14999 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15000 return SDValue();
15001
15002 unsigned NewOpc;
15003 switch (N->getOpcode()) {
15004 default:
15005 llvm_unreachable("Unexpected opcode");
15007 NewOpc = RISCVISD::VFWMADD_VL;
15008 break;
15010 NewOpc = RISCVISD::VFWNMSUB_VL;
15011 break;
15013 NewOpc = RISCVISD::VFWNMADD_VL;
15014 break;
15016 NewOpc = RISCVISD::VFWMSUB_VL;
15017 break;
15018 }
15019
15020 Op0 = Op0.getOperand(0);
15021 Op1 = Op1.getOperand(0);
15022
15023 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15024 N->getOperand(2), Mask, VL);
15025}
15026
15028 const RISCVSubtarget &Subtarget) {
15029 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15030
15031 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15032 return SDValue();
15033
15034 if (!isa<ConstantSDNode>(N->getOperand(1)))
15035 return SDValue();
15036 uint64_t ShAmt = N->getConstantOperandVal(1);
15037 if (ShAmt > 32)
15038 return SDValue();
15039
15040 SDValue N0 = N->getOperand(0);
15041
15042 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15043 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15044 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15045 if (ShAmt < 32 &&
15046 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15047 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15048 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15049 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15050 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15051 if (LShAmt < 32) {
15052 SDLoc ShlDL(N0.getOperand(0));
15053 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15054 N0.getOperand(0).getOperand(0),
15055 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15056 SDLoc DL(N);
15057 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15058 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15059 }
15060 }
15061
15062 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15063 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15064 //
15065 // Also try these folds where an add or sub is in the middle.
15066 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15067 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15068 SDValue Shl;
15069 ConstantSDNode *AddC = nullptr;
15070
15071 // We might have an ADD or SUB between the SRA and SHL.
15072 bool IsAdd = N0.getOpcode() == ISD::ADD;
15073 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15074 // Other operand needs to be a constant we can modify.
15075 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15076 if (!AddC)
15077 return SDValue();
15078
15079 // AddC needs to have at least 32 trailing zeros.
15080 if (AddC->getAPIntValue().countr_zero() < 32)
15081 return SDValue();
15082
15083 // All users should be a shift by constant less than or equal to 32. This
15084 // ensures we'll do this optimization for each of them to produce an
15085 // add/sub+sext_inreg they can all share.
15086 for (SDNode *U : N0->uses()) {
15087 if (U->getOpcode() != ISD::SRA ||
15088 !isa<ConstantSDNode>(U->getOperand(1)) ||
15089 U->getConstantOperandVal(1) > 32)
15090 return SDValue();
15091 }
15092
15093 Shl = N0.getOperand(IsAdd ? 0 : 1);
15094 } else {
15095 // Not an ADD or SUB.
15096 Shl = N0;
15097 }
15098
15099 // Look for a shift left by 32.
15100 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15101 Shl.getConstantOperandVal(1) != 32)
15102 return SDValue();
15103
15104 // We if we didn't look through an add/sub, then the shl should have one use.
15105 // If we did look through an add/sub, the sext_inreg we create is free so
15106 // we're only creating 2 new instructions. It's enough to only remove the
15107 // original sra+add/sub.
15108 if (!AddC && !Shl.hasOneUse())
15109 return SDValue();
15110
15111 SDLoc DL(N);
15112 SDValue In = Shl.getOperand(0);
15113
15114 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15115 // constant.
15116 if (AddC) {
15117 SDValue ShiftedAddC =
15118 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15119 if (IsAdd)
15120 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15121 else
15122 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15123 }
15124
15125 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15126 DAG.getValueType(MVT::i32));
15127 if (ShAmt == 32)
15128 return SExt;
15129
15130 return DAG.getNode(
15131 ISD::SHL, DL, MVT::i64, SExt,
15132 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15133}
15134
15135// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15136// the result is used as the conditon of a br_cc or select_cc we can invert,
15137// inverting the setcc is free, and Z is 0/1. Caller will invert the
15138// br_cc/select_cc.
15140 bool IsAnd = Cond.getOpcode() == ISD::AND;
15141 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15142 return SDValue();
15143
15144 if (!Cond.hasOneUse())
15145 return SDValue();
15146
15147 SDValue Setcc = Cond.getOperand(0);
15148 SDValue Xor = Cond.getOperand(1);
15149 // Canonicalize setcc to LHS.
15150 if (Setcc.getOpcode() != ISD::SETCC)
15151 std::swap(Setcc, Xor);
15152 // LHS should be a setcc and RHS should be an xor.
15153 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15154 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15155 return SDValue();
15156
15157 // If the condition is an And, SimplifyDemandedBits may have changed
15158 // (xor Z, 1) to (not Z).
15159 SDValue Xor1 = Xor.getOperand(1);
15160 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15161 return SDValue();
15162
15163 EVT VT = Cond.getValueType();
15164 SDValue Xor0 = Xor.getOperand(0);
15165
15166 // The LHS of the xor needs to be 0/1.
15168 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15169 return SDValue();
15170
15171 // We can only invert integer setccs.
15172 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15173 if (!SetCCOpVT.isScalarInteger())
15174 return SDValue();
15175
15176 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15177 if (ISD::isIntEqualitySetCC(CCVal)) {
15178 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15179 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15180 Setcc.getOperand(1), CCVal);
15181 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15182 // Invert (setlt 0, X) by converting to (setlt X, 1).
15183 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15184 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15185 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15186 // (setlt X, 1) by converting to (setlt 0, X).
15187 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15188 DAG.getConstant(0, SDLoc(Setcc), VT),
15189 Setcc.getOperand(0), CCVal);
15190 } else
15191 return SDValue();
15192
15193 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15194 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15195}
15196
15197// Perform common combines for BR_CC and SELECT_CC condtions.
15198static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15199 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15200 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15201
15202 // As far as arithmetic right shift always saves the sign,
15203 // shift can be omitted.
15204 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15205 // setge (sra X, N), 0 -> setge X, 0
15206 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15207 LHS.getOpcode() == ISD::SRA) {
15208 LHS = LHS.getOperand(0);
15209 return true;
15210 }
15211
15212 if (!ISD::isIntEqualitySetCC(CCVal))
15213 return false;
15214
15215 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15216 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15217 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15218 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15219 // If we're looking for eq 0 instead of ne 0, we need to invert the
15220 // condition.
15221 bool Invert = CCVal == ISD::SETEQ;
15222 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15223 if (Invert)
15224 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15225
15226 RHS = LHS.getOperand(1);
15227 LHS = LHS.getOperand(0);
15228 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15229
15230 CC = DAG.getCondCode(CCVal);
15231 return true;
15232 }
15233
15234 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15235 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15236 RHS = LHS.getOperand(1);
15237 LHS = LHS.getOperand(0);
15238 return true;
15239 }
15240
15241 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15242 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15243 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15244 SDValue LHS0 = LHS.getOperand(0);
15245 if (LHS0.getOpcode() == ISD::AND &&
15246 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15247 uint64_t Mask = LHS0.getConstantOperandVal(1);
15248 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15249 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15250 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15251 CC = DAG.getCondCode(CCVal);
15252
15253 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15254 LHS = LHS0.getOperand(0);
15255 if (ShAmt != 0)
15256 LHS =
15257 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15258 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15259 return true;
15260 }
15261 }
15262 }
15263
15264 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15265 // This can occur when legalizing some floating point comparisons.
15266 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15267 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15268 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15269 CC = DAG.getCondCode(CCVal);
15270 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15271 return true;
15272 }
15273
15274 if (isNullConstant(RHS)) {
15275 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15276 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15277 CC = DAG.getCondCode(CCVal);
15278 LHS = NewCond;
15279 return true;
15280 }
15281 }
15282
15283 return false;
15284}
15285
15286// Fold
15287// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15288// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15289// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15290// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15292 SDValue TrueVal, SDValue FalseVal,
15293 bool Swapped) {
15294 bool Commutative = true;
15295 unsigned Opc = TrueVal.getOpcode();
15296 switch (Opc) {
15297 default:
15298 return SDValue();
15299 case ISD::SHL:
15300 case ISD::SRA:
15301 case ISD::SRL:
15302 case ISD::SUB:
15303 Commutative = false;
15304 break;
15305 case ISD::ADD:
15306 case ISD::OR:
15307 case ISD::XOR:
15308 break;
15309 }
15310
15311 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15312 return SDValue();
15313
15314 unsigned OpToFold;
15315 if (FalseVal == TrueVal.getOperand(0))
15316 OpToFold = 0;
15317 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15318 OpToFold = 1;
15319 else
15320 return SDValue();
15321
15322 EVT VT = N->getValueType(0);
15323 SDLoc DL(N);
15324 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15325 EVT OtherOpVT = OtherOp->getValueType(0);
15326 SDValue IdentityOperand =
15327 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15328 if (!Commutative)
15329 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15330 assert(IdentityOperand && "No identity operand!");
15331
15332 if (Swapped)
15333 std::swap(OtherOp, IdentityOperand);
15334 SDValue NewSel =
15335 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15336 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15337}
15338
15339// This tries to get rid of `select` and `icmp` that are being used to handle
15340// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15342 SDValue Cond = N->getOperand(0);
15343
15344 // This represents either CTTZ or CTLZ instruction.
15345 SDValue CountZeroes;
15346
15347 SDValue ValOnZero;
15348
15349 if (Cond.getOpcode() != ISD::SETCC)
15350 return SDValue();
15351
15352 if (!isNullConstant(Cond->getOperand(1)))
15353 return SDValue();
15354
15355 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15356 if (CCVal == ISD::CondCode::SETEQ) {
15357 CountZeroes = N->getOperand(2);
15358 ValOnZero = N->getOperand(1);
15359 } else if (CCVal == ISD::CondCode::SETNE) {
15360 CountZeroes = N->getOperand(1);
15361 ValOnZero = N->getOperand(2);
15362 } else {
15363 return SDValue();
15364 }
15365
15366 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15367 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15368 CountZeroes = CountZeroes.getOperand(0);
15369
15370 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15371 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15372 CountZeroes.getOpcode() != ISD::CTLZ &&
15373 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15374 return SDValue();
15375
15376 if (!isNullConstant(ValOnZero))
15377 return SDValue();
15378
15379 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15380 if (Cond->getOperand(0) != CountZeroesArgument)
15381 return SDValue();
15382
15383 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15384 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15385 CountZeroes.getValueType(), CountZeroesArgument);
15386 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15387 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15388 CountZeroes.getValueType(), CountZeroesArgument);
15389 }
15390
15391 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15392 SDValue BitWidthMinusOne =
15393 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15394
15395 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15396 CountZeroes, BitWidthMinusOne);
15397 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15398}
15399
15401 const RISCVSubtarget &Subtarget) {
15402 SDValue Cond = N->getOperand(0);
15403 SDValue True = N->getOperand(1);
15404 SDValue False = N->getOperand(2);
15405 SDLoc DL(N);
15406 EVT VT = N->getValueType(0);
15407 EVT CondVT = Cond.getValueType();
15408
15409 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15410 return SDValue();
15411
15412 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15413 // BEXTI, where C is power of 2.
15414 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15415 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15416 SDValue LHS = Cond.getOperand(0);
15417 SDValue RHS = Cond.getOperand(1);
15418 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15419 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15420 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15421 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15422 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15423 return DAG.getSelect(DL, VT,
15424 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15425 False, True);
15426 }
15427 }
15428 return SDValue();
15429}
15430
15432 const RISCVSubtarget &Subtarget) {
15433 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15434 return Folded;
15435
15436 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15437 return V;
15438
15439 if (Subtarget.hasConditionalMoveFusion())
15440 return SDValue();
15441
15442 SDValue TrueVal = N->getOperand(1);
15443 SDValue FalseVal = N->getOperand(2);
15444 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15445 return V;
15446 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15447}
15448
15449/// If we have a build_vector where each lane is binop X, C, where C
15450/// is a constant (but not necessarily the same constant on all lanes),
15451/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15452/// We assume that materializing a constant build vector will be no more
15453/// expensive that performing O(n) binops.
15455 const RISCVSubtarget &Subtarget,
15456 const RISCVTargetLowering &TLI) {
15457 SDLoc DL(N);
15458 EVT VT = N->getValueType(0);
15459
15460 assert(!VT.isScalableVector() && "unexpected build vector");
15461
15462 if (VT.getVectorNumElements() == 1)
15463 return SDValue();
15464
15465 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15466 if (!TLI.isBinOp(Opcode))
15467 return SDValue();
15468
15469 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15470 return SDValue();
15471
15472 // This BUILD_VECTOR involves an implicit truncation, and sinking
15473 // truncates through binops is non-trivial.
15474 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15475 return SDValue();
15476
15477 SmallVector<SDValue> LHSOps;
15478 SmallVector<SDValue> RHSOps;
15479 for (SDValue Op : N->ops()) {
15480 if (Op.isUndef()) {
15481 // We can't form a divide or remainder from undef.
15482 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15483 return SDValue();
15484
15485 LHSOps.push_back(Op);
15486 RHSOps.push_back(Op);
15487 continue;
15488 }
15489
15490 // TODO: We can handle operations which have an neutral rhs value
15491 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15492 // of profit in a more explicit manner.
15493 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15494 return SDValue();
15495
15496 LHSOps.push_back(Op.getOperand(0));
15497 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15498 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15499 return SDValue();
15500 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15501 // have different LHS and RHS types.
15502 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15503 return SDValue();
15504
15505 RHSOps.push_back(Op.getOperand(1));
15506 }
15507
15508 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15509 DAG.getBuildVector(VT, DL, RHSOps));
15510}
15511
15513 const RISCVSubtarget &Subtarget,
15514 const RISCVTargetLowering &TLI) {
15515 SDValue InVec = N->getOperand(0);
15516 SDValue InVal = N->getOperand(1);
15517 SDValue EltNo = N->getOperand(2);
15518 SDLoc DL(N);
15519
15520 EVT VT = InVec.getValueType();
15521 if (VT.isScalableVector())
15522 return SDValue();
15523
15524 if (!InVec.hasOneUse())
15525 return SDValue();
15526
15527 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15528 // move the insert_vector_elts into the arms of the binop. Note that
15529 // the new RHS must be a constant.
15530 const unsigned InVecOpcode = InVec->getOpcode();
15531 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15532 InVal.hasOneUse()) {
15533 SDValue InVecLHS = InVec->getOperand(0);
15534 SDValue InVecRHS = InVec->getOperand(1);
15535 SDValue InValLHS = InVal->getOperand(0);
15536 SDValue InValRHS = InVal->getOperand(1);
15537
15539 return SDValue();
15540 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15541 return SDValue();
15542 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15543 // have different LHS and RHS types.
15544 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15545 return SDValue();
15547 InVecLHS, InValLHS, EltNo);
15549 InVecRHS, InValRHS, EltNo);
15550 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15551 }
15552
15553 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15554 // move the insert_vector_elt to the source operand of the concat_vector.
15555 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15556 return SDValue();
15557
15558 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15559 if (!IndexC)
15560 return SDValue();
15561 unsigned Elt = IndexC->getZExtValue();
15562
15563 EVT ConcatVT = InVec.getOperand(0).getValueType();
15564 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15565 return SDValue();
15566 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15567 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15568
15569 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15570 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15571 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15572 ConcatOp, InVal, NewIdx);
15573
15574 SmallVector<SDValue> ConcatOps;
15575 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15576 ConcatOps[ConcatOpIdx] = ConcatOp;
15577 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15578}
15579
15580// If we're concatenating a series of vector loads like
15581// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15582// Then we can turn this into a strided load by widening the vector elements
15583// vlse32 p, stride=n
15585 const RISCVSubtarget &Subtarget,
15586 const RISCVTargetLowering &TLI) {
15587 SDLoc DL(N);
15588 EVT VT = N->getValueType(0);
15589
15590 // Only perform this combine on legal MVTs.
15591 if (!TLI.isTypeLegal(VT))
15592 return SDValue();
15593
15594 // TODO: Potentially extend this to scalable vectors
15595 if (VT.isScalableVector())
15596 return SDValue();
15597
15598 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15599 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15600 !SDValue(BaseLd, 0).hasOneUse())
15601 return SDValue();
15602
15603 EVT BaseLdVT = BaseLd->getValueType(0);
15604
15605 // Go through the loads and check that they're strided
15607 Lds.push_back(BaseLd);
15608 Align Align = BaseLd->getAlign();
15609 for (SDValue Op : N->ops().drop_front()) {
15610 auto *Ld = dyn_cast<LoadSDNode>(Op);
15611 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15612 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15613 Ld->getValueType(0) != BaseLdVT)
15614 return SDValue();
15615
15616 Lds.push_back(Ld);
15617
15618 // The common alignment is the most restrictive (smallest) of all the loads
15619 Align = std::min(Align, Ld->getAlign());
15620 }
15621
15622 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15623 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15624 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15625 // If the load ptrs can be decomposed into a common (Base + Index) with a
15626 // common constant stride, then return the constant stride.
15627 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15628 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15629 if (BIO1.equalBaseIndex(BIO2, DAG))
15630 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15631
15632 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15633 SDValue P1 = Ld1->getBasePtr();
15634 SDValue P2 = Ld2->getBasePtr();
15635 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15636 return {{P2.getOperand(1), false}};
15637 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15638 return {{P1.getOperand(1), true}};
15639
15640 return std::nullopt;
15641 };
15642
15643 // Get the distance between the first and second loads
15644 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15645 if (!BaseDiff)
15646 return SDValue();
15647
15648 // Check all the loads are the same distance apart
15649 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15650 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15651 return SDValue();
15652
15653 // TODO: At this point, we've successfully matched a generalized gather
15654 // load. Maybe we should emit that, and then move the specialized
15655 // matchers above and below into a DAG combine?
15656
15657 // Get the widened scalar type, e.g. v4i8 -> i64
15658 unsigned WideScalarBitWidth =
15659 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15660 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15661
15662 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15663 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15664 if (!TLI.isTypeLegal(WideVecVT))
15665 return SDValue();
15666
15667 // Check that the operation is legal
15668 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15669 return SDValue();
15670
15671 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15672 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15673 ? std::get<SDValue>(StrideVariant)
15674 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15675 Lds[0]->getOffset().getValueType());
15676 if (MustNegateStride)
15677 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15678
15679 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15680 SDValue IntID =
15681 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15682 Subtarget.getXLenVT());
15683
15684 SDValue AllOneMask =
15685 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15686 DAG.getConstant(1, DL, MVT::i1));
15687
15688 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15689 BaseLd->getBasePtr(), Stride, AllOneMask};
15690
15691 uint64_t MemSize;
15692 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15693 ConstStride && ConstStride->getSExtValue() >= 0)
15694 // total size = (elsize * n) + (stride - elsize) * (n-1)
15695 // = elsize + stride * (n-1)
15696 MemSize = WideScalarVT.getSizeInBits() +
15697 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15698 else
15699 // If Stride isn't constant, then we can't know how much it will load
15701
15703 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15704 Align);
15705
15706 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15707 Ops, WideVecVT, MMO);
15708 for (SDValue Ld : N->ops())
15709 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15710
15711 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15712}
15713
15715 const RISCVSubtarget &Subtarget) {
15716
15717 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15718
15719 if (N->getValueType(0).isFixedLengthVector())
15720 return SDValue();
15721
15722 SDValue Addend = N->getOperand(0);
15723 SDValue MulOp = N->getOperand(1);
15724
15725 if (N->getOpcode() == RISCVISD::ADD_VL) {
15726 SDValue AddMergeOp = N->getOperand(2);
15727 if (!AddMergeOp.isUndef())
15728 return SDValue();
15729 }
15730
15731 auto IsVWMulOpc = [](unsigned Opc) {
15732 switch (Opc) {
15733 case RISCVISD::VWMUL_VL:
15736 return true;
15737 default:
15738 return false;
15739 }
15740 };
15741
15742 if (!IsVWMulOpc(MulOp.getOpcode()))
15743 std::swap(Addend, MulOp);
15744
15745 if (!IsVWMulOpc(MulOp.getOpcode()))
15746 return SDValue();
15747
15748 SDValue MulMergeOp = MulOp.getOperand(2);
15749
15750 if (!MulMergeOp.isUndef())
15751 return SDValue();
15752
15753 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15754 const RISCVSubtarget &Subtarget) {
15755 if (N->getOpcode() == ISD::ADD) {
15756 SDLoc DL(N);
15757 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15758 Subtarget);
15759 }
15760 return std::make_pair(N->getOperand(3), N->getOperand(4));
15761 }(N, DAG, Subtarget);
15762
15763 SDValue MulMask = MulOp.getOperand(3);
15764 SDValue MulVL = MulOp.getOperand(4);
15765
15766 if (AddMask != MulMask || AddVL != MulVL)
15767 return SDValue();
15768
15769 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15770 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15771 "Unexpected opcode after VWMACC_VL");
15772 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15773 "Unexpected opcode after VWMACC_VL!");
15774 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15775 "Unexpected opcode after VWMUL_VL!");
15776 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15777 "Unexpected opcode after VWMUL_VL!");
15778
15779 SDLoc DL(N);
15780 EVT VT = N->getValueType(0);
15781 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15782 AddVL};
15783 return DAG.getNode(Opc, DL, VT, Ops);
15784}
15785
15787 ISD::MemIndexType &IndexType,
15789 if (!DCI.isBeforeLegalize())
15790 return false;
15791
15792 SelectionDAG &DAG = DCI.DAG;
15793 const MVT XLenVT =
15794 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15795
15796 const EVT IndexVT = Index.getValueType();
15797
15798 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15799 // mode, so anything else must be manually legalized.
15800 if (!isIndexTypeSigned(IndexType))
15801 return false;
15802
15803 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15804 // Any index legalization should first promote to XLenVT, so we don't lose
15805 // bits when scaling. This may create an illegal index type so we let
15806 // LLVM's legalization take care of the splitting.
15807 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15809 IndexVT.changeVectorElementType(XLenVT), Index);
15810 }
15811 IndexType = ISD::UNSIGNED_SCALED;
15812 return true;
15813}
15814
15815/// Match the index vector of a scatter or gather node as the shuffle mask
15816/// which performs the rearrangement if possible. Will only match if
15817/// all lanes are touched, and thus replacing the scatter or gather with
15818/// a unit strided access and shuffle is legal.
15820 SmallVector<int> &ShuffleMask) {
15821 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15822 return false;
15824 return false;
15825
15826 const unsigned ElementSize = VT.getScalarStoreSize();
15827 const unsigned NumElems = VT.getVectorNumElements();
15828
15829 // Create the shuffle mask and check all bits active
15830 assert(ShuffleMask.empty());
15831 BitVector ActiveLanes(NumElems);
15832 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15833 // TODO: We've found an active bit of UB, and could be
15834 // more aggressive here if desired.
15835 if (Index->getOperand(i)->isUndef())
15836 return false;
15837 uint64_t C = Index->getConstantOperandVal(i);
15838 if (C % ElementSize != 0)
15839 return false;
15840 C = C / ElementSize;
15841 if (C >= NumElems)
15842 return false;
15843 ShuffleMask.push_back(C);
15844 ActiveLanes.set(C);
15845 }
15846 return ActiveLanes.all();
15847}
15848
15849/// Match the index of a gather or scatter operation as an operation
15850/// with twice the element width and half the number of elements. This is
15851/// generally profitable (if legal) because these operations are linear
15852/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15853/// come out ahead.
15855 Align BaseAlign, const RISCVSubtarget &ST) {
15856 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15857 return false;
15859 return false;
15860
15861 // Attempt a doubling. If we can use a element type 4x or 8x in
15862 // size, this will happen via multiply iterations of the transform.
15863 const unsigned NumElems = VT.getVectorNumElements();
15864 if (NumElems % 2 != 0)
15865 return false;
15866
15867 const unsigned ElementSize = VT.getScalarStoreSize();
15868 const unsigned WiderElementSize = ElementSize * 2;
15869 if (WiderElementSize > ST.getELen()/8)
15870 return false;
15871
15872 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
15873 return false;
15874
15875 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15876 // TODO: We've found an active bit of UB, and could be
15877 // more aggressive here if desired.
15878 if (Index->getOperand(i)->isUndef())
15879 return false;
15880 // TODO: This offset check is too strict if we support fully
15881 // misaligned memory operations.
15882 uint64_t C = Index->getConstantOperandVal(i);
15883 if (i % 2 == 0) {
15884 if (C % WiderElementSize != 0)
15885 return false;
15886 continue;
15887 }
15888 uint64_t Last = Index->getConstantOperandVal(i-1);
15889 if (C != Last + ElementSize)
15890 return false;
15891 }
15892 return true;
15893}
15894
15895
15897 DAGCombinerInfo &DCI) const {
15898 SelectionDAG &DAG = DCI.DAG;
15899 const MVT XLenVT = Subtarget.getXLenVT();
15900 SDLoc DL(N);
15901
15902 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15903 // bits are demanded. N will be added to the Worklist if it was not deleted.
15904 // Caller should return SDValue(N, 0) if this returns true.
15905 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15906 SDValue Op = N->getOperand(OpNo);
15907 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15908 if (!SimplifyDemandedBits(Op, Mask, DCI))
15909 return false;
15910
15911 if (N->getOpcode() != ISD::DELETED_NODE)
15912 DCI.AddToWorklist(N);
15913 return true;
15914 };
15915
15916 switch (N->getOpcode()) {
15917 default:
15918 break;
15919 case RISCVISD::SplitF64: {
15920 SDValue Op0 = N->getOperand(0);
15921 // If the input to SplitF64 is just BuildPairF64 then the operation is
15922 // redundant. Instead, use BuildPairF64's operands directly.
15923 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15924 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15925
15926 if (Op0->isUndef()) {
15927 SDValue Lo = DAG.getUNDEF(MVT::i32);
15928 SDValue Hi = DAG.getUNDEF(MVT::i32);
15929 return DCI.CombineTo(N, Lo, Hi);
15930 }
15931
15932 // It's cheaper to materialise two 32-bit integers than to load a double
15933 // from the constant pool and transfer it to integer registers through the
15934 // stack.
15935 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15936 APInt V = C->getValueAPF().bitcastToAPInt();
15937 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15938 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15939 return DCI.CombineTo(N, Lo, Hi);
15940 }
15941
15942 // This is a target-specific version of a DAGCombine performed in
15943 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15944 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15945 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15946 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15947 !Op0.getNode()->hasOneUse())
15948 break;
15949 SDValue NewSplitF64 =
15950 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15951 Op0.getOperand(0));
15952 SDValue Lo = NewSplitF64.getValue(0);
15953 SDValue Hi = NewSplitF64.getValue(1);
15954 APInt SignBit = APInt::getSignMask(32);
15955 if (Op0.getOpcode() == ISD::FNEG) {
15956 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15957 DAG.getConstant(SignBit, DL, MVT::i32));
15958 return DCI.CombineTo(N, Lo, NewHi);
15959 }
15960 assert(Op0.getOpcode() == ISD::FABS);
15961 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15962 DAG.getConstant(~SignBit, DL, MVT::i32));
15963 return DCI.CombineTo(N, Lo, NewHi);
15964 }
15965 case RISCVISD::SLLW:
15966 case RISCVISD::SRAW:
15967 case RISCVISD::SRLW:
15968 case RISCVISD::RORW:
15969 case RISCVISD::ROLW: {
15970 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15971 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15972 SimplifyDemandedLowBitsHelper(1, 5))
15973 return SDValue(N, 0);
15974
15975 break;
15976 }
15977 case RISCVISD::CLZW:
15978 case RISCVISD::CTZW: {
15979 // Only the lower 32 bits of the first operand are read
15980 if (SimplifyDemandedLowBitsHelper(0, 32))
15981 return SDValue(N, 0);
15982 break;
15983 }
15985 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15986 // conversion is unnecessary and can be replaced with the
15987 // FMV_X_ANYEXTW_RV64 operand.
15988 SDValue Op0 = N->getOperand(0);
15990 return Op0.getOperand(0);
15991 break;
15992 }
15995 SDLoc DL(N);
15996 SDValue Op0 = N->getOperand(0);
15997 MVT VT = N->getSimpleValueType(0);
15998 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15999 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16000 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16001 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16002 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16003 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16004 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16005 assert(Op0.getOperand(0).getValueType() == VT &&
16006 "Unexpected value type!");
16007 return Op0.getOperand(0);
16008 }
16009
16010 // This is a target-specific version of a DAGCombine performed in
16011 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16012 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16013 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16014 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16015 !Op0.getNode()->hasOneUse())
16016 break;
16017 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16018 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16019 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16020 if (Op0.getOpcode() == ISD::FNEG)
16021 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16022 DAG.getConstant(SignBit, DL, VT));
16023
16024 assert(Op0.getOpcode() == ISD::FABS);
16025 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16026 DAG.getConstant(~SignBit, DL, VT));
16027 }
16028 case ISD::ABS: {
16029 EVT VT = N->getValueType(0);
16030 SDValue N0 = N->getOperand(0);
16031 // abs (sext) -> zext (abs)
16032 // abs (zext) -> zext (handled elsewhere)
16033 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16034 SDValue Src = N0.getOperand(0);
16035 SDLoc DL(N);
16036 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16037 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16038 }
16039 break;
16040 }
16041 case ISD::ADD: {
16042 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16043 return V;
16044 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16045 return V;
16046 return performADDCombine(N, DAG, Subtarget);
16047 }
16048 case ISD::SUB: {
16049 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16050 return V;
16051 return performSUBCombine(N, DAG, Subtarget);
16052 }
16053 case ISD::AND:
16054 return performANDCombine(N, DCI, Subtarget);
16055 case ISD::OR: {
16056 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16057 return V;
16058 return performORCombine(N, DCI, Subtarget);
16059 }
16060 case ISD::XOR:
16061 return performXORCombine(N, DAG, Subtarget);
16062 case ISD::MUL:
16063 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16064 return V;
16065 return performMULCombine(N, DAG, DCI, Subtarget);
16066 case ISD::SDIV:
16067 case ISD::UDIV:
16068 case ISD::SREM:
16069 case ISD::UREM:
16070 if (SDValue V = combineBinOpOfZExt(N, DAG))
16071 return V;
16072 break;
16073 case ISD::FADD:
16074 case ISD::UMAX:
16075 case ISD::UMIN:
16076 case ISD::SMAX:
16077 case ISD::SMIN:
16078 case ISD::FMAXNUM:
16079 case ISD::FMINNUM: {
16080 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16081 return V;
16082 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16083 return V;
16084 return SDValue();
16085 }
16086 case ISD::SETCC:
16087 return performSETCCCombine(N, DAG, Subtarget);
16089 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16090 case ISD::ZERO_EXTEND:
16091 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16092 // type legalization. This is safe because fp_to_uint produces poison if
16093 // it overflows.
16094 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16095 SDValue Src = N->getOperand(0);
16096 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16097 isTypeLegal(Src.getOperand(0).getValueType()))
16098 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16099 Src.getOperand(0));
16100 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16101 isTypeLegal(Src.getOperand(1).getValueType())) {
16102 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16103 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16104 Src.getOperand(0), Src.getOperand(1));
16105 DCI.CombineTo(N, Res);
16106 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16107 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16108 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16109 }
16110 }
16111 return SDValue();
16113 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16114 // This would be benefit for the cases where X and Y are both the same value
16115 // type of low precision vectors. Since the truncate would be lowered into
16116 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16117 // restriction, such pattern would be expanded into a series of "vsetvli"
16118 // and "vnsrl" instructions later to reach this point.
16119 auto IsTruncNode = [](SDValue V) {
16120 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16121 return false;
16122 SDValue VL = V.getOperand(2);
16123 auto *C = dyn_cast<ConstantSDNode>(VL);
16124 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16125 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16126 (isa<RegisterSDNode>(VL) &&
16127 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16128 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16129 IsVLMAXForVMSET;
16130 };
16131
16132 SDValue Op = N->getOperand(0);
16133
16134 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16135 // to distinguish such pattern.
16136 while (IsTruncNode(Op)) {
16137 if (!Op.hasOneUse())
16138 return SDValue();
16139 Op = Op.getOperand(0);
16140 }
16141
16142 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16143 SDValue N0 = Op.getOperand(0);
16144 SDValue N1 = Op.getOperand(1);
16145 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16146 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16147 SDValue N00 = N0.getOperand(0);
16148 SDValue N10 = N1.getOperand(0);
16149 if (N00.getValueType().isVector() &&
16150 N00.getValueType() == N10.getValueType() &&
16151 N->getValueType(0) == N10.getValueType()) {
16152 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16153 SDValue SMin = DAG.getNode(
16154 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16155 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16156 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16157 }
16158 }
16159 }
16160 break;
16161 }
16162 case ISD::TRUNCATE:
16163 return performTRUNCATECombine(N, DAG, Subtarget);
16164 case ISD::SELECT:
16165 return performSELECTCombine(N, DAG, Subtarget);
16168 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
16169 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
16170 if (N->getOperand(1).getOpcode() == ISD::XOR &&
16171 isOneConstant(N->getOperand(1).getOperand(1))) {
16172 SDValue Cond = N->getOperand(1).getOperand(0);
16173 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
16174 if (DAG.MaskedValueIsZero(Cond, Mask)) {
16175 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16178 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
16179 N->getOperand(0), Cond);
16180 }
16181 }
16182 return SDValue();
16183
16184 case RISCVISD::SELECT_CC: {
16185 // Transform
16186 SDValue LHS = N->getOperand(0);
16187 SDValue RHS = N->getOperand(1);
16188 SDValue CC = N->getOperand(2);
16189 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16190 SDValue TrueV = N->getOperand(3);
16191 SDValue FalseV = N->getOperand(4);
16192 SDLoc DL(N);
16193 EVT VT = N->getValueType(0);
16194
16195 // If the True and False values are the same, we don't need a select_cc.
16196 if (TrueV == FalseV)
16197 return TrueV;
16198
16199 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16200 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16201 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16202 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16203 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16204 if (CCVal == ISD::CondCode::SETGE)
16205 std::swap(TrueV, FalseV);
16206
16207 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16208 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16209 // Only handle simm12, if it is not in this range, it can be considered as
16210 // register.
16211 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16212 isInt<12>(TrueSImm - FalseSImm)) {
16213 SDValue SRA =
16214 DAG.getNode(ISD::SRA, DL, VT, LHS,
16215 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16216 SDValue AND =
16217 DAG.getNode(ISD::AND, DL, VT, SRA,
16218 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16219 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16220 }
16221
16222 if (CCVal == ISD::CondCode::SETGE)
16223 std::swap(TrueV, FalseV);
16224 }
16225
16226 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16227 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16228 {LHS, RHS, CC, TrueV, FalseV});
16229
16230 if (!Subtarget.hasConditionalMoveFusion()) {
16231 // (select c, -1, y) -> -c | y
16232 if (isAllOnesConstant(TrueV)) {
16233 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16234 SDValue Neg = DAG.getNegative(C, DL, VT);
16235 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16236 }
16237 // (select c, y, -1) -> -!c | y
16238 if (isAllOnesConstant(FalseV)) {
16239 SDValue C =
16240 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16241 SDValue Neg = DAG.getNegative(C, DL, VT);
16242 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16243 }
16244
16245 // (select c, 0, y) -> -!c & y
16246 if (isNullConstant(TrueV)) {
16247 SDValue C =
16248 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16249 SDValue Neg = DAG.getNegative(C, DL, VT);
16250 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16251 }
16252 // (select c, y, 0) -> -c & y
16253 if (isNullConstant(FalseV)) {
16254 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16255 SDValue Neg = DAG.getNegative(C, DL, VT);
16256 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16257 }
16258 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16259 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16260 if (((isOneConstant(FalseV) && LHS == TrueV &&
16261 CCVal == ISD::CondCode::SETNE) ||
16262 (isOneConstant(TrueV) && LHS == FalseV &&
16263 CCVal == ISD::CondCode::SETEQ)) &&
16265 // freeze it to be safe.
16266 LHS = DAG.getFreeze(LHS);
16268 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16269 }
16270 }
16271
16272 // If both true/false are an xor with 1, pull through the select.
16273 // This can occur after op legalization if both operands are setccs that
16274 // require an xor to invert.
16275 // FIXME: Generalize to other binary ops with identical operand?
16276 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16277 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16278 isOneConstant(TrueV.getOperand(1)) &&
16279 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16280 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16281 TrueV.getOperand(0), FalseV.getOperand(0));
16282 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16283 }
16284
16285 return SDValue();
16286 }
16287 case RISCVISD::BR_CC: {
16288 SDValue LHS = N->getOperand(1);
16289 SDValue RHS = N->getOperand(2);
16290 SDValue CC = N->getOperand(3);
16291 SDLoc DL(N);
16292
16293 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16294 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16295 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16296
16297 return SDValue();
16298 }
16299 case ISD::BITREVERSE:
16300 return performBITREVERSECombine(N, DAG, Subtarget);
16301 case ISD::FP_TO_SINT:
16302 case ISD::FP_TO_UINT:
16303 return performFP_TO_INTCombine(N, DCI, Subtarget);
16306 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16307 case ISD::FCOPYSIGN: {
16308 EVT VT = N->getValueType(0);
16309 if (!VT.isVector())
16310 break;
16311 // There is a form of VFSGNJ which injects the negated sign of its second
16312 // operand. Try and bubble any FNEG up after the extend/round to produce
16313 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16314 // TRUNC=1.
16315 SDValue In2 = N->getOperand(1);
16316 // Avoid cases where the extend/round has multiple uses, as duplicating
16317 // those is typically more expensive than removing a fneg.
16318 if (!In2.hasOneUse())
16319 break;
16320 if (In2.getOpcode() != ISD::FP_EXTEND &&
16321 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16322 break;
16323 In2 = In2.getOperand(0);
16324 if (In2.getOpcode() != ISD::FNEG)
16325 break;
16326 SDLoc DL(N);
16327 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16328 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16329 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16330 }
16331 case ISD::MGATHER: {
16332 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16333 const EVT VT = N->getValueType(0);
16334 SDValue Index = MGN->getIndex();
16335 SDValue ScaleOp = MGN->getScale();
16336 ISD::MemIndexType IndexType = MGN->getIndexType();
16337 assert(!MGN->isIndexScaled() &&
16338 "Scaled gather/scatter should not be formed");
16339
16340 SDLoc DL(N);
16341 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16342 return DAG.getMaskedGather(
16343 N->getVTList(), MGN->getMemoryVT(), DL,
16344 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16345 MGN->getBasePtr(), Index, ScaleOp},
16346 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16347
16348 if (narrowIndex(Index, IndexType, DAG))
16349 return DAG.getMaskedGather(
16350 N->getVTList(), MGN->getMemoryVT(), DL,
16351 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16352 MGN->getBasePtr(), Index, ScaleOp},
16353 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16354
16355 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16356 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16357 // The sequence will be XLenVT, not the type of Index. Tell
16358 // isSimpleVIDSequence this so we avoid overflow.
16359 if (std::optional<VIDSequence> SimpleVID =
16360 isSimpleVIDSequence(Index, Subtarget.getXLen());
16361 SimpleVID && SimpleVID->StepDenominator == 1) {
16362 const int64_t StepNumerator = SimpleVID->StepNumerator;
16363 const int64_t Addend = SimpleVID->Addend;
16364
16365 // Note: We don't need to check alignment here since (by assumption
16366 // from the existance of the gather), our offsets must be sufficiently
16367 // aligned.
16368
16369 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16370 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16371 assert(IndexType == ISD::UNSIGNED_SCALED);
16372 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16373 DAG.getConstant(Addend, DL, PtrVT));
16374
16375 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16376 SDValue IntID =
16377 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16378 XLenVT);
16379 SDValue Ops[] =
16380 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16381 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16383 Ops, VT, MGN->getMemOperand());
16384 }
16385 }
16386
16387 SmallVector<int> ShuffleMask;
16388 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16389 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16390 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16391 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16392 MGN->getMask(), DAG.getUNDEF(VT),
16393 MGN->getMemoryVT(), MGN->getMemOperand(),
16395 SDValue Shuffle =
16396 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16397 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16398 }
16399
16400 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16401 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16402 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16403 SmallVector<SDValue> NewIndices;
16404 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16405 NewIndices.push_back(Index.getOperand(i));
16406 EVT IndexVT = Index.getValueType()
16408 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16409
16410 unsigned ElementSize = VT.getScalarStoreSize();
16411 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16412 auto EltCnt = VT.getVectorElementCount();
16413 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16414 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16415 EltCnt.divideCoefficientBy(2));
16416 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16417 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16418 EltCnt.divideCoefficientBy(2));
16419 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16420
16421 SDValue Gather =
16422 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16423 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16424 Index, ScaleOp},
16425 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16426 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16427 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16428 }
16429 break;
16430 }
16431 case ISD::MSCATTER:{
16432 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16433 SDValue Index = MSN->getIndex();
16434 SDValue ScaleOp = MSN->getScale();
16435 ISD::MemIndexType IndexType = MSN->getIndexType();
16436 assert(!MSN->isIndexScaled() &&
16437 "Scaled gather/scatter should not be formed");
16438
16439 SDLoc DL(N);
16440 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16441 return DAG.getMaskedScatter(
16442 N->getVTList(), MSN->getMemoryVT(), DL,
16443 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16444 Index, ScaleOp},
16445 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16446
16447 if (narrowIndex(Index, IndexType, DAG))
16448 return DAG.getMaskedScatter(
16449 N->getVTList(), MSN->getMemoryVT(), DL,
16450 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16451 Index, ScaleOp},
16452 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16453
16454 EVT VT = MSN->getValue()->getValueType(0);
16455 SmallVector<int> ShuffleMask;
16456 if (!MSN->isTruncatingStore() &&
16457 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16458 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16459 DAG.getUNDEF(VT), ShuffleMask);
16460 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16461 DAG.getUNDEF(XLenVT), MSN->getMask(),
16462 MSN->getMemoryVT(), MSN->getMemOperand(),
16463 ISD::UNINDEXED, false);
16464 }
16465 break;
16466 }
16467 case ISD::VP_GATHER: {
16468 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16469 SDValue Index = VPGN->getIndex();
16470 SDValue ScaleOp = VPGN->getScale();
16471 ISD::MemIndexType IndexType = VPGN->getIndexType();
16472 assert(!VPGN->isIndexScaled() &&
16473 "Scaled gather/scatter should not be formed");
16474
16475 SDLoc DL(N);
16476 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16477 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16478 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16479 ScaleOp, VPGN->getMask(),
16480 VPGN->getVectorLength()},
16481 VPGN->getMemOperand(), IndexType);
16482
16483 if (narrowIndex(Index, IndexType, DAG))
16484 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16485 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16486 ScaleOp, VPGN->getMask(),
16487 VPGN->getVectorLength()},
16488 VPGN->getMemOperand(), IndexType);
16489
16490 break;
16491 }
16492 case ISD::VP_SCATTER: {
16493 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16494 SDValue Index = VPSN->getIndex();
16495 SDValue ScaleOp = VPSN->getScale();
16496 ISD::MemIndexType IndexType = VPSN->getIndexType();
16497 assert(!VPSN->isIndexScaled() &&
16498 "Scaled gather/scatter should not be formed");
16499
16500 SDLoc DL(N);
16501 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16502 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16503 {VPSN->getChain(), VPSN->getValue(),
16504 VPSN->getBasePtr(), Index, ScaleOp,
16505 VPSN->getMask(), VPSN->getVectorLength()},
16506 VPSN->getMemOperand(), IndexType);
16507
16508 if (narrowIndex(Index, IndexType, DAG))
16509 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16510 {VPSN->getChain(), VPSN->getValue(),
16511 VPSN->getBasePtr(), Index, ScaleOp,
16512 VPSN->getMask(), VPSN->getVectorLength()},
16513 VPSN->getMemOperand(), IndexType);
16514 break;
16515 }
16516 case RISCVISD::SHL_VL:
16517 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16518 return V;
16519 [[fallthrough]];
16520 case RISCVISD::SRA_VL:
16521 case RISCVISD::SRL_VL: {
16522 SDValue ShAmt = N->getOperand(1);
16524 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16525 SDLoc DL(N);
16526 SDValue VL = N->getOperand(4);
16527 EVT VT = N->getValueType(0);
16528 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16529 ShAmt.getOperand(1), VL);
16530 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16531 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16532 }
16533 break;
16534 }
16535 case ISD::SRA:
16536 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16537 return V;
16538 [[fallthrough]];
16539 case ISD::SRL:
16540 case ISD::SHL: {
16541 if (N->getOpcode() == ISD::SHL) {
16542 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16543 return V;
16544 }
16545 SDValue ShAmt = N->getOperand(1);
16547 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16548 SDLoc DL(N);
16549 EVT VT = N->getValueType(0);
16550 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16551 ShAmt.getOperand(1),
16552 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16553 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16554 }
16555 break;
16556 }
16557 case RISCVISD::ADD_VL:
16558 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16559 return V;
16560 return combineToVWMACC(N, DAG, Subtarget);
16565 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16566 case RISCVISD::SUB_VL:
16567 case RISCVISD::MUL_VL:
16568 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16577 return performVFMADD_VLCombine(N, DAG, Subtarget);
16578 case RISCVISD::FADD_VL:
16579 case RISCVISD::FSUB_VL:
16580 case RISCVISD::FMUL_VL:
16582 case RISCVISD::VFWSUB_W_VL: {
16583 if (N->getValueType(0).isScalableVector() &&
16584 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16585 (Subtarget.hasVInstructionsF16Minimal() &&
16586 !Subtarget.hasVInstructionsF16()))
16587 return SDValue();
16588 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16589 }
16590 case ISD::LOAD:
16591 case ISD::STORE: {
16592 if (DCI.isAfterLegalizeDAG())
16593 if (SDValue V = performMemPairCombine(N, DCI))
16594 return V;
16595
16596 if (N->getOpcode() != ISD::STORE)
16597 break;
16598
16599 auto *Store = cast<StoreSDNode>(N);
16600 SDValue Chain = Store->getChain();
16601 EVT MemVT = Store->getMemoryVT();
16602 SDValue Val = Store->getValue();
16603 SDLoc DL(N);
16604
16605 bool IsScalarizable =
16606 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16607 Store->isSimple() &&
16608 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16609 isPowerOf2_64(MemVT.getSizeInBits()) &&
16610 MemVT.getSizeInBits() <= Subtarget.getXLen();
16611
16612 // If sufficiently aligned we can scalarize stores of constant vectors of
16613 // any power-of-two size up to XLen bits, provided that they aren't too
16614 // expensive to materialize.
16615 // vsetivli zero, 2, e8, m1, ta, ma
16616 // vmv.v.i v8, 4
16617 // vse64.v v8, (a0)
16618 // ->
16619 // li a1, 1028
16620 // sh a1, 0(a0)
16621 if (DCI.isBeforeLegalize() && IsScalarizable &&
16623 // Get the constant vector bits
16624 APInt NewC(Val.getValueSizeInBits(), 0);
16625 uint64_t EltSize = Val.getScalarValueSizeInBits();
16626 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16627 if (Val.getOperand(i).isUndef())
16628 continue;
16629 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16630 i * EltSize);
16631 }
16632 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16633
16634 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16635 true) <= 2 &&
16637 NewVT, *Store->getMemOperand())) {
16638 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16639 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16640 Store->getPointerInfo(), Store->getOriginalAlign(),
16641 Store->getMemOperand()->getFlags());
16642 }
16643 }
16644
16645 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16646 // vsetivli zero, 2, e16, m1, ta, ma
16647 // vle16.v v8, (a0)
16648 // vse16.v v8, (a1)
16649 if (auto *L = dyn_cast<LoadSDNode>(Val);
16650 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16651 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16652 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16653 L->getMemoryVT() == MemVT) {
16654 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16656 NewVT, *Store->getMemOperand()) &&
16658 NewVT, *L->getMemOperand())) {
16659 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16660 L->getPointerInfo(), L->getOriginalAlign(),
16661 L->getMemOperand()->getFlags());
16662 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16663 Store->getPointerInfo(), Store->getOriginalAlign(),
16664 Store->getMemOperand()->getFlags());
16665 }
16666 }
16667
16668 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16669 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16670 // any illegal types.
16671 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16672 (DCI.isAfterLegalizeDAG() &&
16674 isNullConstant(Val.getOperand(1)))) {
16675 SDValue Src = Val.getOperand(0);
16676 MVT VecVT = Src.getSimpleValueType();
16677 // VecVT should be scalable and memory VT should match the element type.
16678 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16679 MemVT == VecVT.getVectorElementType()) {
16680 SDLoc DL(N);
16681 MVT MaskVT = getMaskTypeFor(VecVT);
16682 return DAG.getStoreVP(
16683 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16684 DAG.getConstant(1, DL, MaskVT),
16685 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16686 Store->getMemOperand(), Store->getAddressingMode(),
16687 Store->isTruncatingStore(), /*IsCompress*/ false);
16688 }
16689 }
16690
16691 break;
16692 }
16693 case ISD::SPLAT_VECTOR: {
16694 EVT VT = N->getValueType(0);
16695 // Only perform this combine on legal MVT types.
16696 if (!isTypeLegal(VT))
16697 break;
16698 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16699 DAG, Subtarget))
16700 return Gather;
16701 break;
16702 }
16703 case ISD::BUILD_VECTOR:
16704 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16705 return V;
16706 break;
16708 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16709 return V;
16710 break;
16712 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16713 return V;
16714 break;
16715 case RISCVISD::VFMV_V_F_VL: {
16716 const MVT VT = N->getSimpleValueType(0);
16717 SDValue Passthru = N->getOperand(0);
16718 SDValue Scalar = N->getOperand(1);
16719 SDValue VL = N->getOperand(2);
16720
16721 // If VL is 1, we can use vfmv.s.f.
16722 if (isOneConstant(VL))
16723 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16724 break;
16725 }
16726 case RISCVISD::VMV_V_X_VL: {
16727 const MVT VT = N->getSimpleValueType(0);
16728 SDValue Passthru = N->getOperand(0);
16729 SDValue Scalar = N->getOperand(1);
16730 SDValue VL = N->getOperand(2);
16731
16732 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16733 // scalar input.
16734 unsigned ScalarSize = Scalar.getValueSizeInBits();
16735 unsigned EltWidth = VT.getScalarSizeInBits();
16736 if (ScalarSize > EltWidth && Passthru.isUndef())
16737 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16738 return SDValue(N, 0);
16739
16740 // If VL is 1 and the scalar value won't benefit from immediate, we can
16741 // use vmv.s.x.
16742 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16743 if (isOneConstant(VL) &&
16744 (!Const || Const->isZero() ||
16745 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16746 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16747
16748 break;
16749 }
16750 case RISCVISD::VFMV_S_F_VL: {
16751 SDValue Src = N->getOperand(1);
16752 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16753 // into an undef vector.
16754 // TODO: Could use a vslide or vmv.v.v for non-undef.
16755 if (N->getOperand(0).isUndef() &&
16756 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16757 isNullConstant(Src.getOperand(1)) &&
16758 Src.getOperand(0).getValueType().isScalableVector()) {
16759 EVT VT = N->getValueType(0);
16760 EVT SrcVT = Src.getOperand(0).getValueType();
16762 // Widths match, just return the original vector.
16763 if (SrcVT == VT)
16764 return Src.getOperand(0);
16765 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16766 }
16767 [[fallthrough]];
16768 }
16769 case RISCVISD::VMV_S_X_VL: {
16770 const MVT VT = N->getSimpleValueType(0);
16771 SDValue Passthru = N->getOperand(0);
16772 SDValue Scalar = N->getOperand(1);
16773 SDValue VL = N->getOperand(2);
16774
16775 // Use M1 or smaller to avoid over constraining register allocation
16776 const MVT M1VT = getLMUL1VT(VT);
16777 if (M1VT.bitsLT(VT)) {
16778 SDValue M1Passthru =
16779 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16780 DAG.getVectorIdxConstant(0, DL));
16781 SDValue Result =
16782 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16783 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16784 DAG.getVectorIdxConstant(0, DL));
16785 return Result;
16786 }
16787
16788 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16789 // higher would involve overly constraining the register allocator for
16790 // no purpose.
16791 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16792 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16793 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16794 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16795
16796 break;
16797 }
16798 case RISCVISD::VMV_X_S: {
16799 SDValue Vec = N->getOperand(0);
16800 MVT VecVT = N->getOperand(0).getSimpleValueType();
16801 const MVT M1VT = getLMUL1VT(VecVT);
16802 if (M1VT.bitsLT(VecVT)) {
16803 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16804 DAG.getVectorIdxConstant(0, DL));
16805 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16806 }
16807 break;
16808 }
16812 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16813 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16814 switch (IntNo) {
16815 // By default we do not combine any intrinsic.
16816 default:
16817 return SDValue();
16818 case Intrinsic::riscv_masked_strided_load: {
16819 MVT VT = N->getSimpleValueType(0);
16820 auto *Load = cast<MemIntrinsicSDNode>(N);
16821 SDValue PassThru = N->getOperand(2);
16822 SDValue Base = N->getOperand(3);
16823 SDValue Stride = N->getOperand(4);
16824 SDValue Mask = N->getOperand(5);
16825
16826 // If the stride is equal to the element size in bytes, we can use
16827 // a masked.load.
16828 const unsigned ElementSize = VT.getScalarStoreSize();
16829 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16830 StrideC && StrideC->getZExtValue() == ElementSize)
16831 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16832 DAG.getUNDEF(XLenVT), Mask, PassThru,
16833 Load->getMemoryVT(), Load->getMemOperand(),
16835 return SDValue();
16836 }
16837 case Intrinsic::riscv_masked_strided_store: {
16838 auto *Store = cast<MemIntrinsicSDNode>(N);
16839 SDValue Value = N->getOperand(2);
16840 SDValue Base = N->getOperand(3);
16841 SDValue Stride = N->getOperand(4);
16842 SDValue Mask = N->getOperand(5);
16843
16844 // If the stride is equal to the element size in bytes, we can use
16845 // a masked.store.
16846 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16847 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16848 StrideC && StrideC->getZExtValue() == ElementSize)
16849 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16850 DAG.getUNDEF(XLenVT), Mask,
16851 Value.getValueType(), Store->getMemOperand(),
16852 ISD::UNINDEXED, false);
16853 return SDValue();
16854 }
16855 case Intrinsic::riscv_vcpop:
16856 case Intrinsic::riscv_vcpop_mask:
16857 case Intrinsic::riscv_vfirst:
16858 case Intrinsic::riscv_vfirst_mask: {
16859 SDValue VL = N->getOperand(2);
16860 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16861 IntNo == Intrinsic::riscv_vfirst_mask)
16862 VL = N->getOperand(3);
16863 if (!isNullConstant(VL))
16864 return SDValue();
16865 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16866 SDLoc DL(N);
16867 EVT VT = N->getValueType(0);
16868 if (IntNo == Intrinsic::riscv_vfirst ||
16869 IntNo == Intrinsic::riscv_vfirst_mask)
16870 return DAG.getConstant(-1, DL, VT);
16871 return DAG.getConstant(0, DL, VT);
16872 }
16873 }
16874 }
16875 case ISD::BITCAST: {
16877 SDValue N0 = N->getOperand(0);
16878 EVT VT = N->getValueType(0);
16879 EVT SrcVT = N0.getValueType();
16880 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16881 // type, widen both sides to avoid a trip through memory.
16882 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16883 VT.isScalarInteger()) {
16884 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16885 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16886 Ops[0] = N0;
16887 SDLoc DL(N);
16888 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16889 N0 = DAG.getBitcast(MVT::i8, N0);
16890 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16891 }
16892
16893 return SDValue();
16894 }
16895 }
16896
16897 return SDValue();
16898}
16899
16901 EVT XVT, unsigned KeptBits) const {
16902 // For vectors, we don't have a preference..
16903 if (XVT.isVector())
16904 return false;
16905
16906 if (XVT != MVT::i32 && XVT != MVT::i64)
16907 return false;
16908
16909 // We can use sext.w for RV64 or an srai 31 on RV32.
16910 if (KeptBits == 32 || KeptBits == 64)
16911 return true;
16912
16913 // With Zbb we can use sext.h/sext.b.
16914 return Subtarget.hasStdExtZbb() &&
16915 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16916 KeptBits == 16);
16917}
16918
16920 const SDNode *N, CombineLevel Level) const {
16921 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16922 N->getOpcode() == ISD::SRL) &&
16923 "Expected shift op");
16924
16925 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16926 // materialised in fewer instructions than `(OP _, c1)`:
16927 //
16928 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16929 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16930 SDValue N0 = N->getOperand(0);
16931 EVT Ty = N0.getValueType();
16932 if (Ty.isScalarInteger() &&
16933 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16934 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16935 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16936 if (C1 && C2) {
16937 const APInt &C1Int = C1->getAPIntValue();
16938 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16939
16940 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16941 // and the combine should happen, to potentially allow further combines
16942 // later.
16943 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16944 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16945 return true;
16946
16947 // We can materialise `c1` in an add immediate, so it's "free", and the
16948 // combine should be prevented.
16949 if (C1Int.getSignificantBits() <= 64 &&
16951 return false;
16952
16953 // Neither constant will fit into an immediate, so find materialisation
16954 // costs.
16955 int C1Cost =
16956 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16957 /*CompressionCost*/ true);
16958 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16959 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16960 /*CompressionCost*/ true);
16961
16962 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16963 // combine should be prevented.
16964 if (C1Cost < ShiftedC1Cost)
16965 return false;
16966 }
16967 }
16968 return true;
16969}
16970
16972 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16973 TargetLoweringOpt &TLO) const {
16974 // Delay this optimization as late as possible.
16975 if (!TLO.LegalOps)
16976 return false;
16977
16978 EVT VT = Op.getValueType();
16979 if (VT.isVector())
16980 return false;
16981
16982 unsigned Opcode = Op.getOpcode();
16983 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16984 return false;
16985
16986 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16987 if (!C)
16988 return false;
16989
16990 const APInt &Mask = C->getAPIntValue();
16991
16992 // Clear all non-demanded bits initially.
16993 APInt ShrunkMask = Mask & DemandedBits;
16994
16995 // Try to make a smaller immediate by setting undemanded bits.
16996
16997 APInt ExpandedMask = Mask | ~DemandedBits;
16998
16999 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17000 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17001 };
17002 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17003 if (NewMask == Mask)
17004 return true;
17005 SDLoc DL(Op);
17006 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17007 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17008 Op.getOperand(0), NewC);
17009 return TLO.CombineTo(Op, NewOp);
17010 };
17011
17012 // If the shrunk mask fits in sign extended 12 bits, let the target
17013 // independent code apply it.
17014 if (ShrunkMask.isSignedIntN(12))
17015 return false;
17016
17017 // And has a few special cases for zext.
17018 if (Opcode == ISD::AND) {
17019 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17020 // otherwise use SLLI + SRLI.
17021 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17022 if (IsLegalMask(NewMask))
17023 return UseMask(NewMask);
17024
17025 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17026 if (VT == MVT::i64) {
17027 APInt NewMask = APInt(64, 0xffffffff);
17028 if (IsLegalMask(NewMask))
17029 return UseMask(NewMask);
17030 }
17031 }
17032
17033 // For the remaining optimizations, we need to be able to make a negative
17034 // number through a combination of mask and undemanded bits.
17035 if (!ExpandedMask.isNegative())
17036 return false;
17037
17038 // What is the fewest number of bits we need to represent the negative number.
17039 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17040
17041 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17042 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17043 // If we can't create a simm12, we shouldn't change opaque constants.
17044 APInt NewMask = ShrunkMask;
17045 if (MinSignedBits <= 12)
17046 NewMask.setBitsFrom(11);
17047 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17048 NewMask.setBitsFrom(31);
17049 else
17050 return false;
17051
17052 // Check that our new mask is a subset of the demanded mask.
17053 assert(IsLegalMask(NewMask));
17054 return UseMask(NewMask);
17055}
17056
17057static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17058 static const uint64_t GREVMasks[] = {
17059 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17060 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17061
17062 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17063 unsigned Shift = 1 << Stage;
17064 if (ShAmt & Shift) {
17065 uint64_t Mask = GREVMasks[Stage];
17066 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17067 if (IsGORC)
17068 Res |= x;
17069 x = Res;
17070 }
17071 }
17072
17073 return x;
17074}
17075
17077 KnownBits &Known,
17078 const APInt &DemandedElts,
17079 const SelectionDAG &DAG,
17080 unsigned Depth) const {
17081 unsigned BitWidth = Known.getBitWidth();
17082 unsigned Opc = Op.getOpcode();
17083 assert((Opc >= ISD::BUILTIN_OP_END ||
17084 Opc == ISD::INTRINSIC_WO_CHAIN ||
17085 Opc == ISD::INTRINSIC_W_CHAIN ||
17086 Opc == ISD::INTRINSIC_VOID) &&
17087 "Should use MaskedValueIsZero if you don't know whether Op"
17088 " is a target node!");
17089
17090 Known.resetAll();
17091 switch (Opc) {
17092 default: break;
17093 case RISCVISD::SELECT_CC: {
17094 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17095 // If we don't know any bits, early out.
17096 if (Known.isUnknown())
17097 break;
17098 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17099
17100 // Only known if known in both the LHS and RHS.
17101 Known = Known.intersectWith(Known2);
17102 break;
17103 }
17106 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17107 // Result is either all zero or operand 0. We can propagate zeros, but not
17108 // ones.
17109 Known.One.clearAllBits();
17110 break;
17111 case RISCVISD::REMUW: {
17112 KnownBits Known2;
17113 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17114 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17115 // We only care about the lower 32 bits.
17116 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17117 // Restore the original width by sign extending.
17118 Known = Known.sext(BitWidth);
17119 break;
17120 }
17121 case RISCVISD::DIVUW: {
17122 KnownBits Known2;
17123 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17124 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17125 // We only care about the lower 32 bits.
17126 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17127 // Restore the original width by sign extending.
17128 Known = Known.sext(BitWidth);
17129 break;
17130 }
17131 case RISCVISD::SLLW: {
17132 KnownBits Known2;
17133 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17134 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17135 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17136 // Restore the original width by sign extending.
17137 Known = Known.sext(BitWidth);
17138 break;
17139 }
17140 case RISCVISD::CTZW: {
17141 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17142 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17143 unsigned LowBits = llvm::bit_width(PossibleTZ);
17144 Known.Zero.setBitsFrom(LowBits);
17145 break;
17146 }
17147 case RISCVISD::CLZW: {
17148 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17149 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17150 unsigned LowBits = llvm::bit_width(PossibleLZ);
17151 Known.Zero.setBitsFrom(LowBits);
17152 break;
17153 }
17154 case RISCVISD::BREV8:
17155 case RISCVISD::ORC_B: {
17156 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17157 // control value of 7 is equivalent to brev8 and orc.b.
17158 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17159 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17160 // To compute zeros, we need to invert the value and invert it back after.
17161 Known.Zero =
17162 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17163 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17164 break;
17165 }
17166 case RISCVISD::READ_VLENB: {
17167 // We can use the minimum and maximum VLEN values to bound VLENB. We
17168 // know VLEN must be a power of two.
17169 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17170 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17171 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17172 Known.Zero.setLowBits(Log2_32(MinVLenB));
17173 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17174 if (MaxVLenB == MinVLenB)
17175 Known.One.setBit(Log2_32(MinVLenB));
17176 break;
17177 }
17178 case RISCVISD::FCLASS: {
17179 // fclass will only set one of the low 10 bits.
17180 Known.Zero.setBitsFrom(10);
17181 break;
17182 }
17185 unsigned IntNo =
17186 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17187 switch (IntNo) {
17188 default:
17189 // We can't do anything for most intrinsics.
17190 break;
17191 case Intrinsic::riscv_vsetvli:
17192 case Intrinsic::riscv_vsetvlimax: {
17193 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17194 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17195 RISCVII::VLMUL VLMUL =
17196 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17197 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17198 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17199 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17200 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17201
17202 // Result of vsetvli must be not larger than AVL.
17203 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17204 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17205
17206 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17207 if (BitWidth > KnownZeroFirstBit)
17208 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17209 break;
17210 }
17211 }
17212 break;
17213 }
17214 }
17215}
17216
17218 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17219 unsigned Depth) const {
17220 switch (Op.getOpcode()) {
17221 default:
17222 break;
17223 case RISCVISD::SELECT_CC: {
17224 unsigned Tmp =
17225 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17226 if (Tmp == 1) return 1; // Early out.
17227 unsigned Tmp2 =
17228 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17229 return std::min(Tmp, Tmp2);
17230 }
17233 // Output is either all zero or operand 0. We can propagate sign bit count
17234 // from operand 0.
17235 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17236 case RISCVISD::ABSW: {
17237 // We expand this at isel to negw+max. The result will have 33 sign bits
17238 // if the input has at least 33 sign bits.
17239 unsigned Tmp =
17240 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17241 if (Tmp < 33) return 1;
17242 return 33;
17243 }
17244 case RISCVISD::SLLW:
17245 case RISCVISD::SRAW:
17246 case RISCVISD::SRLW:
17247 case RISCVISD::DIVW:
17248 case RISCVISD::DIVUW:
17249 case RISCVISD::REMUW:
17250 case RISCVISD::ROLW:
17251 case RISCVISD::RORW:
17256 // TODO: As the result is sign-extended, this is conservatively correct. A
17257 // more precise answer could be calculated for SRAW depending on known
17258 // bits in the shift amount.
17259 return 33;
17260 case RISCVISD::VMV_X_S: {
17261 // The number of sign bits of the scalar result is computed by obtaining the
17262 // element type of the input vector operand, subtracting its width from the
17263 // XLEN, and then adding one (sign bit within the element type). If the
17264 // element type is wider than XLen, the least-significant XLEN bits are
17265 // taken.
17266 unsigned XLen = Subtarget.getXLen();
17267 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17268 if (EltBits <= XLen)
17269 return XLen - EltBits + 1;
17270 break;
17271 }
17273 unsigned IntNo = Op.getConstantOperandVal(1);
17274 switch (IntNo) {
17275 default:
17276 break;
17277 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17278 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17279 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17280 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17281 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17282 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17283 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17284 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17285 case Intrinsic::riscv_masked_cmpxchg_i64:
17286 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17287 // narrow atomic operation. These are implemented using atomic
17288 // operations at the minimum supported atomicrmw/cmpxchg width whose
17289 // result is then sign extended to XLEN. With +A, the minimum width is
17290 // 32 for both 64 and 32.
17291 assert(Subtarget.getXLen() == 64);
17293 assert(Subtarget.hasStdExtA());
17294 return 33;
17295 }
17296 break;
17297 }
17298 }
17299
17300 return 1;
17301}
17302
17304 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17305 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17306
17307 // TODO: Add more target nodes.
17308 switch (Op.getOpcode()) {
17310 // Integer select_cc cannot create poison.
17311 // TODO: What are the FP poison semantics?
17312 // TODO: This instruction blocks poison from the unselected operand, can
17313 // we do anything with that?
17314 return !Op.getValueType().isInteger();
17315 }
17317 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17318}
17319
17320const Constant *
17322 assert(Ld && "Unexpected null LoadSDNode");
17323 if (!ISD::isNormalLoad(Ld))
17324 return nullptr;
17325
17326 SDValue Ptr = Ld->getBasePtr();
17327
17328 // Only constant pools with no offset are supported.
17329 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17330 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17331 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17332 CNode->getOffset() != 0)
17333 return nullptr;
17334
17335 return CNode;
17336 };
17337
17338 // Simple case, LLA.
17339 if (Ptr.getOpcode() == RISCVISD::LLA) {
17340 auto *CNode = GetSupportedConstantPool(Ptr);
17341 if (!CNode || CNode->getTargetFlags() != 0)
17342 return nullptr;
17343
17344 return CNode->getConstVal();
17345 }
17346
17347 // Look for a HI and ADD_LO pair.
17348 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17349 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17350 return nullptr;
17351
17352 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17353 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17354
17355 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17356 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17357 return nullptr;
17358
17359 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17360 return nullptr;
17361
17362 return CNodeLo->getConstVal();
17363}
17364
17366 MachineBasicBlock *BB) {
17367 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17368
17369 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17370 // Should the count have wrapped while it was being read, we need to try
17371 // again.
17372 // For example:
17373 // ```
17374 // read:
17375 // csrrs x3, counterh # load high word of counter
17376 // csrrs x2, counter # load low word of counter
17377 // csrrs x4, counterh # load high word of counter
17378 // bne x3, x4, read # check if high word reads match, otherwise try again
17379 // ```
17380
17381 MachineFunction &MF = *BB->getParent();
17382 const BasicBlock *LLVMBB = BB->getBasicBlock();
17384
17385 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17386 MF.insert(It, LoopMBB);
17387
17388 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17389 MF.insert(It, DoneMBB);
17390
17391 // Transfer the remainder of BB and its successor edges to DoneMBB.
17392 DoneMBB->splice(DoneMBB->begin(), BB,
17393 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17395
17396 BB->addSuccessor(LoopMBB);
17397
17399 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17400 Register LoReg = MI.getOperand(0).getReg();
17401 Register HiReg = MI.getOperand(1).getReg();
17402 int64_t LoCounter = MI.getOperand(2).getImm();
17403 int64_t HiCounter = MI.getOperand(3).getImm();
17404 DebugLoc DL = MI.getDebugLoc();
17405
17407 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17408 .addImm(HiCounter)
17409 .addReg(RISCV::X0);
17410 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17411 .addImm(LoCounter)
17412 .addReg(RISCV::X0);
17413 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17414 .addImm(HiCounter)
17415 .addReg(RISCV::X0);
17416
17417 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17418 .addReg(HiReg)
17419 .addReg(ReadAgainReg)
17420 .addMBB(LoopMBB);
17421
17422 LoopMBB->addSuccessor(LoopMBB);
17423 LoopMBB->addSuccessor(DoneMBB);
17424
17425 MI.eraseFromParent();
17426
17427 return DoneMBB;
17428}
17429
17432 const RISCVSubtarget &Subtarget) {
17433 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17434
17435 MachineFunction &MF = *BB->getParent();
17436 DebugLoc DL = MI.getDebugLoc();
17439 Register LoReg = MI.getOperand(0).getReg();
17440 Register HiReg = MI.getOperand(1).getReg();
17441 Register SrcReg = MI.getOperand(2).getReg();
17442
17443 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17444 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17445
17446 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17447 RI, Register());
17449 MachineMemOperand *MMOLo =
17453 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17454 .addFrameIndex(FI)
17455 .addImm(0)
17456 .addMemOperand(MMOLo);
17457 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17458 .addFrameIndex(FI)
17459 .addImm(4)
17460 .addMemOperand(MMOHi);
17461 MI.eraseFromParent(); // The pseudo instruction is gone now.
17462 return BB;
17463}
17464
17467 const RISCVSubtarget &Subtarget) {
17468 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17469 "Unexpected instruction");
17470
17471 MachineFunction &MF = *BB->getParent();
17472 DebugLoc DL = MI.getDebugLoc();
17475 Register DstReg = MI.getOperand(0).getReg();
17476 Register LoReg = MI.getOperand(1).getReg();
17477 Register HiReg = MI.getOperand(2).getReg();
17478
17479 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17480 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17481
17483 MachineMemOperand *MMOLo =
17487 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17488 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17489 .addFrameIndex(FI)
17490 .addImm(0)
17491 .addMemOperand(MMOLo);
17492 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17493 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17494 .addFrameIndex(FI)
17495 .addImm(4)
17496 .addMemOperand(MMOHi);
17497 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17498 MI.eraseFromParent(); // The pseudo instruction is gone now.
17499 return BB;
17500}
17501
17503 switch (MI.getOpcode()) {
17504 default:
17505 return false;
17506 case RISCV::Select_GPR_Using_CC_GPR:
17507 case RISCV::Select_FPR16_Using_CC_GPR:
17508 case RISCV::Select_FPR16INX_Using_CC_GPR:
17509 case RISCV::Select_FPR32_Using_CC_GPR:
17510 case RISCV::Select_FPR32INX_Using_CC_GPR:
17511 case RISCV::Select_FPR64_Using_CC_GPR:
17512 case RISCV::Select_FPR64INX_Using_CC_GPR:
17513 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17514 return true;
17515 }
17516}
17517
17519 unsigned RelOpcode, unsigned EqOpcode,
17520 const RISCVSubtarget &Subtarget) {
17521 DebugLoc DL = MI.getDebugLoc();
17522 Register DstReg = MI.getOperand(0).getReg();
17523 Register Src1Reg = MI.getOperand(1).getReg();
17524 Register Src2Reg = MI.getOperand(2).getReg();
17526 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17528
17529 // Save the current FFLAGS.
17530 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17531
17532 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17533 .addReg(Src1Reg)
17534 .addReg(Src2Reg);
17537
17538 // Restore the FFLAGS.
17539 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17540 .addReg(SavedFFlags, RegState::Kill);
17541
17542 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17543 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17544 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17545 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17548
17549 // Erase the pseudoinstruction.
17550 MI.eraseFromParent();
17551 return BB;
17552}
17553
17554static MachineBasicBlock *
17556 MachineBasicBlock *ThisMBB,
17557 const RISCVSubtarget &Subtarget) {
17558 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17559 // Without this, custom-inserter would have generated:
17560 //
17561 // A
17562 // | \
17563 // | B
17564 // | /
17565 // C
17566 // | \
17567 // | D
17568 // | /
17569 // E
17570 //
17571 // A: X = ...; Y = ...
17572 // B: empty
17573 // C: Z = PHI [X, A], [Y, B]
17574 // D: empty
17575 // E: PHI [X, C], [Z, D]
17576 //
17577 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17578 //
17579 // A
17580 // | \
17581 // | C
17582 // | /|
17583 // |/ |
17584 // | |
17585 // | D
17586 // | /
17587 // E
17588 //
17589 // A: X = ...; Y = ...
17590 // D: empty
17591 // E: PHI [X, A], [X, C], [Y, D]
17592
17593 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17594 const DebugLoc &DL = First.getDebugLoc();
17595 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17596 MachineFunction *F = ThisMBB->getParent();
17597 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17598 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17599 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17600 MachineFunction::iterator It = ++ThisMBB->getIterator();
17601 F->insert(It, FirstMBB);
17602 F->insert(It, SecondMBB);
17603 F->insert(It, SinkMBB);
17604
17605 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17606 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17608 ThisMBB->end());
17609 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17610
17611 // Fallthrough block for ThisMBB.
17612 ThisMBB->addSuccessor(FirstMBB);
17613 // Fallthrough block for FirstMBB.
17614 FirstMBB->addSuccessor(SecondMBB);
17615 ThisMBB->addSuccessor(SinkMBB);
17616 FirstMBB->addSuccessor(SinkMBB);
17617 // This is fallthrough.
17618 SecondMBB->addSuccessor(SinkMBB);
17619
17620 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17621 Register FLHS = First.getOperand(1).getReg();
17622 Register FRHS = First.getOperand(2).getReg();
17623 // Insert appropriate branch.
17624 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17625 .addReg(FLHS)
17626 .addReg(FRHS)
17627 .addMBB(SinkMBB);
17628
17629 Register SLHS = Second.getOperand(1).getReg();
17630 Register SRHS = Second.getOperand(2).getReg();
17631 Register Op1Reg4 = First.getOperand(4).getReg();
17632 Register Op1Reg5 = First.getOperand(5).getReg();
17633
17634 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17635 // Insert appropriate branch.
17636 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17637 .addReg(SLHS)
17638 .addReg(SRHS)
17639 .addMBB(SinkMBB);
17640
17641 Register DestReg = Second.getOperand(0).getReg();
17642 Register Op2Reg4 = Second.getOperand(4).getReg();
17643 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17644 .addReg(Op2Reg4)
17645 .addMBB(ThisMBB)
17646 .addReg(Op1Reg4)
17647 .addMBB(FirstMBB)
17648 .addReg(Op1Reg5)
17649 .addMBB(SecondMBB);
17650
17651 // Now remove the Select_FPRX_s.
17652 First.eraseFromParent();
17653 Second.eraseFromParent();
17654 return SinkMBB;
17655}
17656
17659 const RISCVSubtarget &Subtarget) {
17660 // To "insert" Select_* instructions, we actually have to insert the triangle
17661 // control-flow pattern. The incoming instructions know the destination vreg
17662 // to set, the condition code register to branch on, the true/false values to
17663 // select between, and the condcode to use to select the appropriate branch.
17664 //
17665 // We produce the following control flow:
17666 // HeadMBB
17667 // | \
17668 // | IfFalseMBB
17669 // | /
17670 // TailMBB
17671 //
17672 // When we find a sequence of selects we attempt to optimize their emission
17673 // by sharing the control flow. Currently we only handle cases where we have
17674 // multiple selects with the exact same condition (same LHS, RHS and CC).
17675 // The selects may be interleaved with other instructions if the other
17676 // instructions meet some requirements we deem safe:
17677 // - They are not pseudo instructions.
17678 // - They are debug instructions. Otherwise,
17679 // - They do not have side-effects, do not access memory and their inputs do
17680 // not depend on the results of the select pseudo-instructions.
17681 // The TrueV/FalseV operands of the selects cannot depend on the result of
17682 // previous selects in the sequence.
17683 // These conditions could be further relaxed. See the X86 target for a
17684 // related approach and more information.
17685 //
17686 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17687 // is checked here and handled by a separate function -
17688 // EmitLoweredCascadedSelect.
17689 Register LHS = MI.getOperand(1).getReg();
17690 Register RHS = MI.getOperand(2).getReg();
17691 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17692
17693 SmallVector<MachineInstr *, 4> SelectDebugValues;
17694 SmallSet<Register, 4> SelectDests;
17695 SelectDests.insert(MI.getOperand(0).getReg());
17696
17697 MachineInstr *LastSelectPseudo = &MI;
17698 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17699 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17700 Next->getOpcode() == MI.getOpcode() &&
17701 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17702 Next->getOperand(5).isKill()) {
17703 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17704 }
17705
17706 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17707 SequenceMBBI != E; ++SequenceMBBI) {
17708 if (SequenceMBBI->isDebugInstr())
17709 continue;
17710 if (isSelectPseudo(*SequenceMBBI)) {
17711 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17712 SequenceMBBI->getOperand(2).getReg() != RHS ||
17713 SequenceMBBI->getOperand(3).getImm() != CC ||
17714 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17715 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17716 break;
17717 LastSelectPseudo = &*SequenceMBBI;
17718 SequenceMBBI->collectDebugValues(SelectDebugValues);
17719 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17720 continue;
17721 }
17722 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17723 SequenceMBBI->mayLoadOrStore() ||
17724 SequenceMBBI->usesCustomInsertionHook())
17725 break;
17726 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17727 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17728 }))
17729 break;
17730 }
17731
17732 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17733 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17734 DebugLoc DL = MI.getDebugLoc();
17736
17737 MachineBasicBlock *HeadMBB = BB;
17738 MachineFunction *F = BB->getParent();
17739 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17740 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17741
17742 F->insert(I, IfFalseMBB);
17743 F->insert(I, TailMBB);
17744
17745 // Transfer debug instructions associated with the selects to TailMBB.
17746 for (MachineInstr *DebugInstr : SelectDebugValues) {
17747 TailMBB->push_back(DebugInstr->removeFromParent());
17748 }
17749
17750 // Move all instructions after the sequence to TailMBB.
17751 TailMBB->splice(TailMBB->end(), HeadMBB,
17752 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17753 // Update machine-CFG edges by transferring all successors of the current
17754 // block to the new block which will contain the Phi nodes for the selects.
17755 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17756 // Set the successors for HeadMBB.
17757 HeadMBB->addSuccessor(IfFalseMBB);
17758 HeadMBB->addSuccessor(TailMBB);
17759
17760 // Insert appropriate branch.
17761 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17762 .addReg(LHS)
17763 .addReg(RHS)
17764 .addMBB(TailMBB);
17765
17766 // IfFalseMBB just falls through to TailMBB.
17767 IfFalseMBB->addSuccessor(TailMBB);
17768
17769 // Create PHIs for all of the select pseudo-instructions.
17770 auto SelectMBBI = MI.getIterator();
17771 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17772 auto InsertionPoint = TailMBB->begin();
17773 while (SelectMBBI != SelectEnd) {
17774 auto Next = std::next(SelectMBBI);
17775 if (isSelectPseudo(*SelectMBBI)) {
17776 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17777 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17778 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17779 .addReg(SelectMBBI->getOperand(4).getReg())
17780 .addMBB(HeadMBB)
17781 .addReg(SelectMBBI->getOperand(5).getReg())
17782 .addMBB(IfFalseMBB);
17783 SelectMBBI->eraseFromParent();
17784 }
17785 SelectMBBI = Next;
17786 }
17787
17788 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17789 return TailMBB;
17790}
17791
17792// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
17793static const RISCV::RISCVMaskedPseudoInfo *
17794lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
17796 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
17797 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
17798 const RISCV::RISCVMaskedPseudoInfo *Masked =
17799 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
17800 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
17801 return Masked;
17802}
17803
17806 unsigned CVTXOpc) {
17807 DebugLoc DL = MI.getDebugLoc();
17808
17810
17812 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17813
17814 // Save the old value of FFLAGS.
17815 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17816
17817 assert(MI.getNumOperands() == 7);
17818
17819 // Emit a VFCVT_X_F
17820 const TargetRegisterInfo *TRI =
17822 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17823 Register Tmp = MRI.createVirtualRegister(RC);
17824 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17825 .add(MI.getOperand(1))
17826 .add(MI.getOperand(2))
17827 .add(MI.getOperand(3))
17828 .add(MachineOperand::CreateImm(7)) // frm = DYN
17829 .add(MI.getOperand(4))
17830 .add(MI.getOperand(5))
17831 .add(MI.getOperand(6))
17832 .add(MachineOperand::CreateReg(RISCV::FRM,
17833 /*IsDef*/ false,
17834 /*IsImp*/ true));
17835
17836 // Emit a VFCVT_F_X
17837 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
17838 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
17839 // There is no E8 variant for VFCVT_F_X.
17840 assert(Log2SEW >= 4);
17841 unsigned CVTFOpc =
17842 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
17843 ->MaskedPseudo;
17844
17845 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17846 .add(MI.getOperand(0))
17847 .add(MI.getOperand(1))
17848 .addReg(Tmp)
17849 .add(MI.getOperand(3))
17850 .add(MachineOperand::CreateImm(7)) // frm = DYN
17851 .add(MI.getOperand(4))
17852 .add(MI.getOperand(5))
17853 .add(MI.getOperand(6))
17854 .add(MachineOperand::CreateReg(RISCV::FRM,
17855 /*IsDef*/ false,
17856 /*IsImp*/ true));
17857
17858 // Restore FFLAGS.
17859 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17860 .addReg(SavedFFLAGS, RegState::Kill);
17861
17862 // Erase the pseudoinstruction.
17863 MI.eraseFromParent();
17864 return BB;
17865}
17866
17868 const RISCVSubtarget &Subtarget) {
17869 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17870 const TargetRegisterClass *RC;
17871 switch (MI.getOpcode()) {
17872 default:
17873 llvm_unreachable("Unexpected opcode");
17874 case RISCV::PseudoFROUND_H:
17875 CmpOpc = RISCV::FLT_H;
17876 F2IOpc = RISCV::FCVT_W_H;
17877 I2FOpc = RISCV::FCVT_H_W;
17878 FSGNJOpc = RISCV::FSGNJ_H;
17879 FSGNJXOpc = RISCV::FSGNJX_H;
17880 RC = &RISCV::FPR16RegClass;
17881 break;
17882 case RISCV::PseudoFROUND_H_INX:
17883 CmpOpc = RISCV::FLT_H_INX;
17884 F2IOpc = RISCV::FCVT_W_H_INX;
17885 I2FOpc = RISCV::FCVT_H_W_INX;
17886 FSGNJOpc = RISCV::FSGNJ_H_INX;
17887 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17888 RC = &RISCV::GPRF16RegClass;
17889 break;
17890 case RISCV::PseudoFROUND_S:
17891 CmpOpc = RISCV::FLT_S;
17892 F2IOpc = RISCV::FCVT_W_S;
17893 I2FOpc = RISCV::FCVT_S_W;
17894 FSGNJOpc = RISCV::FSGNJ_S;
17895 FSGNJXOpc = RISCV::FSGNJX_S;
17896 RC = &RISCV::FPR32RegClass;
17897 break;
17898 case RISCV::PseudoFROUND_S_INX:
17899 CmpOpc = RISCV::FLT_S_INX;
17900 F2IOpc = RISCV::FCVT_W_S_INX;
17901 I2FOpc = RISCV::FCVT_S_W_INX;
17902 FSGNJOpc = RISCV::FSGNJ_S_INX;
17903 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17904 RC = &RISCV::GPRF32RegClass;
17905 break;
17906 case RISCV::PseudoFROUND_D:
17907 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17908 CmpOpc = RISCV::FLT_D;
17909 F2IOpc = RISCV::FCVT_L_D;
17910 I2FOpc = RISCV::FCVT_D_L;
17911 FSGNJOpc = RISCV::FSGNJ_D;
17912 FSGNJXOpc = RISCV::FSGNJX_D;
17913 RC = &RISCV::FPR64RegClass;
17914 break;
17915 case RISCV::PseudoFROUND_D_INX:
17916 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17917 CmpOpc = RISCV::FLT_D_INX;
17918 F2IOpc = RISCV::FCVT_L_D_INX;
17919 I2FOpc = RISCV::FCVT_D_L_INX;
17920 FSGNJOpc = RISCV::FSGNJ_D_INX;
17921 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17922 RC = &RISCV::GPRRegClass;
17923 break;
17924 }
17925
17926 const BasicBlock *BB = MBB->getBasicBlock();
17927 DebugLoc DL = MI.getDebugLoc();
17929
17931 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17932 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17933
17934 F->insert(I, CvtMBB);
17935 F->insert(I, DoneMBB);
17936 // Move all instructions after the sequence to DoneMBB.
17937 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17938 MBB->end());
17939 // Update machine-CFG edges by transferring all successors of the current
17940 // block to the new block which will contain the Phi nodes for the selects.
17942 // Set the successors for MBB.
17943 MBB->addSuccessor(CvtMBB);
17944 MBB->addSuccessor(DoneMBB);
17945
17946 Register DstReg = MI.getOperand(0).getReg();
17947 Register SrcReg = MI.getOperand(1).getReg();
17948 Register MaxReg = MI.getOperand(2).getReg();
17949 int64_t FRM = MI.getOperand(3).getImm();
17950
17951 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17953
17954 Register FabsReg = MRI.createVirtualRegister(RC);
17955 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17956
17957 // Compare the FP value to the max value.
17958 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17959 auto MIB =
17960 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17963
17964 // Insert branch.
17965 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17966 .addReg(CmpReg)
17967 .addReg(RISCV::X0)
17968 .addMBB(DoneMBB);
17969
17970 CvtMBB->addSuccessor(DoneMBB);
17971
17972 // Convert to integer.
17973 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17974 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17977
17978 // Convert back to FP.
17979 Register I2FReg = MRI.createVirtualRegister(RC);
17980 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17983
17984 // Restore the sign bit.
17985 Register CvtReg = MRI.createVirtualRegister(RC);
17986 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17987
17988 // Merge the results.
17989 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17990 .addReg(SrcReg)
17991 .addMBB(MBB)
17992 .addReg(CvtReg)
17993 .addMBB(CvtMBB);
17994
17995 MI.eraseFromParent();
17996 return DoneMBB;
17997}
17998
18001 MachineBasicBlock *BB) const {
18002 switch (MI.getOpcode()) {
18003 default:
18004 llvm_unreachable("Unexpected instr type to insert");
18005 case RISCV::ReadCounterWide:
18006 assert(!Subtarget.is64Bit() &&
18007 "ReadCounterWide is only to be used on riscv32");
18008 return emitReadCounterWidePseudo(MI, BB);
18009 case RISCV::Select_GPR_Using_CC_GPR:
18010 case RISCV::Select_FPR16_Using_CC_GPR:
18011 case RISCV::Select_FPR16INX_Using_CC_GPR:
18012 case RISCV::Select_FPR32_Using_CC_GPR:
18013 case RISCV::Select_FPR32INX_Using_CC_GPR:
18014 case RISCV::Select_FPR64_Using_CC_GPR:
18015 case RISCV::Select_FPR64INX_Using_CC_GPR:
18016 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18017 return emitSelectPseudo(MI, BB, Subtarget);
18018 case RISCV::BuildPairF64Pseudo:
18019 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18020 case RISCV::SplitF64Pseudo:
18021 return emitSplitF64Pseudo(MI, BB, Subtarget);
18022 case RISCV::PseudoQuietFLE_H:
18023 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18024 case RISCV::PseudoQuietFLE_H_INX:
18025 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18026 case RISCV::PseudoQuietFLT_H:
18027 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18028 case RISCV::PseudoQuietFLT_H_INX:
18029 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18030 case RISCV::PseudoQuietFLE_S:
18031 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18032 case RISCV::PseudoQuietFLE_S_INX:
18033 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18034 case RISCV::PseudoQuietFLT_S:
18035 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18036 case RISCV::PseudoQuietFLT_S_INX:
18037 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18038 case RISCV::PseudoQuietFLE_D:
18039 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18040 case RISCV::PseudoQuietFLE_D_INX:
18041 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18042 case RISCV::PseudoQuietFLE_D_IN32X:
18043 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18044 Subtarget);
18045 case RISCV::PseudoQuietFLT_D:
18046 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18047 case RISCV::PseudoQuietFLT_D_INX:
18048 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18049 case RISCV::PseudoQuietFLT_D_IN32X:
18050 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18051 Subtarget);
18052
18053 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18054 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18055 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18056 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18057 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18058 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18059 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18060 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18061 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18062 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18063 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18064 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18065 case RISCV::PseudoFROUND_H:
18066 case RISCV::PseudoFROUND_H_INX:
18067 case RISCV::PseudoFROUND_S:
18068 case RISCV::PseudoFROUND_S_INX:
18069 case RISCV::PseudoFROUND_D:
18070 case RISCV::PseudoFROUND_D_INX:
18071 case RISCV::PseudoFROUND_D_IN32X:
18072 return emitFROUND(MI, BB, Subtarget);
18073 case TargetOpcode::STATEPOINT:
18074 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18075 // while jal call instruction (where statepoint will be lowered at the end)
18076 // has implicit def. This def is early-clobber as it will be set at
18077 // the moment of the call and earlier than any use is read.
18078 // Add this implicit dead def here as a workaround.
18079 MI.addOperand(*MI.getMF(),
18081 RISCV::X1, /*isDef*/ true,
18082 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18083 /*isUndef*/ false, /*isEarlyClobber*/ true));
18084 [[fallthrough]];
18085 case TargetOpcode::STACKMAP:
18086 case TargetOpcode::PATCHPOINT:
18087 if (!Subtarget.is64Bit())
18088 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18089 "supported on 64-bit targets");
18090 return emitPatchPoint(MI, BB);
18091 }
18092}
18093
18095 SDNode *Node) const {
18096 // Add FRM dependency to any instructions with dynamic rounding mode.
18097 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18098 if (Idx < 0) {
18099 // Vector pseudos have FRM index indicated by TSFlags.
18100 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18101 if (Idx < 0)
18102 return;
18103 }
18104 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18105 return;
18106 // If the instruction already reads FRM, don't add another read.
18107 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18108 return;
18109 MI.addOperand(
18110 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18111}
18112
18113// Calling Convention Implementation.
18114// The expectations for frontend ABI lowering vary from target to target.
18115// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18116// details, but this is a longer term goal. For now, we simply try to keep the
18117// role of the frontend as simple and well-defined as possible. The rules can
18118// be summarised as:
18119// * Never split up large scalar arguments. We handle them here.
18120// * If a hardfloat calling convention is being used, and the struct may be
18121// passed in a pair of registers (fp+fp, int+fp), and both registers are
18122// available, then pass as two separate arguments. If either the GPRs or FPRs
18123// are exhausted, then pass according to the rule below.
18124// * If a struct could never be passed in registers or directly in a stack
18125// slot (as it is larger than 2*XLEN and the floating point rules don't
18126// apply), then pass it using a pointer with the byval attribute.
18127// * If a struct is less than 2*XLEN, then coerce to either a two-element
18128// word-sized array or a 2*XLEN scalar (depending on alignment).
18129// * The frontend can determine whether a struct is returned by reference or
18130// not based on its size and fields. If it will be returned by reference, the
18131// frontend must modify the prototype so a pointer with the sret annotation is
18132// passed as the first argument. This is not necessary for large scalar
18133// returns.
18134// * Struct return values and varargs should be coerced to structs containing
18135// register-size fields in the same situations they would be for fixed
18136// arguments.
18137
18138static const MCPhysReg ArgFPR16s[] = {
18139 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18140 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18141};
18142static const MCPhysReg ArgFPR32s[] = {
18143 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18144 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18145};
18146static const MCPhysReg ArgFPR64s[] = {
18147 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18148 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18149};
18150// This is an interim calling convention and it may be changed in the future.
18151static const MCPhysReg ArgVRs[] = {
18152 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18153 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18154 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18155static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18156 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18157 RISCV::V20M2, RISCV::V22M2};
18158static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18159 RISCV::V20M4};
18160static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18161
18163 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18164 // the ILP32E ABI.
18165 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18166 RISCV::X13, RISCV::X14, RISCV::X15,
18167 RISCV::X16, RISCV::X17};
18168 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18169 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18170 RISCV::X13, RISCV::X14, RISCV::X15};
18171
18172 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18173 return ArrayRef(ArgEGPRs);
18174
18175 return ArrayRef(ArgIGPRs);
18176}
18177
18179 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18180 // for save-restore libcall, so we don't use them.
18181 static const MCPhysReg FastCCIGPRs[] = {
18182 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18183 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18184 RISCV::X29, RISCV::X30, RISCV::X31};
18185
18186 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18187 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18188 RISCV::X13, RISCV::X14, RISCV::X15,
18189 RISCV::X7};
18190
18191 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18192 return ArrayRef(FastCCEGPRs);
18193
18194 return ArrayRef(FastCCIGPRs);
18195}
18196
18197// Pass a 2*XLEN argument that has been split into two XLEN values through
18198// registers or the stack as necessary.
18199static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18200 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18201 MVT ValVT2, MVT LocVT2,
18202 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18203 unsigned XLenInBytes = XLen / 8;
18204 const RISCVSubtarget &STI =
18207
18208 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18209 // At least one half can be passed via register.
18210 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18211 VA1.getLocVT(), CCValAssign::Full));
18212 } else {
18213 // Both halves must be passed on the stack, with proper alignment.
18214 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18215 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18216 Align StackAlign(XLenInBytes);
18217 if (!EABI || XLen != 32)
18218 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18219 State.addLoc(
18221 State.AllocateStack(XLenInBytes, StackAlign),
18222 VA1.getLocVT(), CCValAssign::Full));
18224 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18225 LocVT2, CCValAssign::Full));
18226 return false;
18227 }
18228
18229 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18230 // The second half can also be passed via register.
18231 State.addLoc(
18232 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18233 } else {
18234 // The second half is passed via the stack, without additional alignment.
18236 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18237 LocVT2, CCValAssign::Full));
18238 }
18239
18240 return false;
18241}
18242
18243// Implements the RISC-V calling convention. Returns true upon failure.
18244bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18245 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18246 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18247 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18248 RVVArgDispatcher &RVVDispatcher) {
18249 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18250 assert(XLen == 32 || XLen == 64);
18251 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18252
18253 // Static chain parameter must not be passed in normal argument registers,
18254 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18255 if (ArgFlags.isNest()) {
18256 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18257 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18258 return false;
18259 }
18260 }
18261
18262 // Any return value split in to more than two values can't be returned
18263 // directly. Vectors are returned via the available vector registers.
18264 if (!LocVT.isVector() && IsRet && ValNo > 1)
18265 return true;
18266
18267 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18268 // variadic argument, or if no F16/F32 argument registers are available.
18269 bool UseGPRForF16_F32 = true;
18270 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18271 // variadic argument, or if no F64 argument registers are available.
18272 bool UseGPRForF64 = true;
18273
18274 switch (ABI) {
18275 default:
18276 llvm_unreachable("Unexpected ABI");
18279 case RISCVABI::ABI_LP64:
18281 break;
18284 UseGPRForF16_F32 = !IsFixed;
18285 break;
18288 UseGPRForF16_F32 = !IsFixed;
18289 UseGPRForF64 = !IsFixed;
18290 break;
18291 }
18292
18293 // FPR16, FPR32, and FPR64 alias each other.
18294 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18295 UseGPRForF16_F32 = true;
18296 UseGPRForF64 = true;
18297 }
18298
18299 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18300 // similar local variables rather than directly checking against the target
18301 // ABI.
18302
18303 if (UseGPRForF16_F32 &&
18304 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18305 LocVT = XLenVT;
18306 LocInfo = CCValAssign::BCvt;
18307 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18308 LocVT = MVT::i64;
18309 LocInfo = CCValAssign::BCvt;
18310 }
18311
18313
18314 // If this is a variadic argument, the RISC-V calling convention requires
18315 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18316 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18317 // be used regardless of whether the original argument was split during
18318 // legalisation or not. The argument will not be passed by registers if the
18319 // original type is larger than 2*XLEN, so the register alignment rule does
18320 // not apply.
18321 // TODO: To be compatible with GCC's behaviors, we don't align registers
18322 // currently if we are using ILP32E calling convention. This behavior may be
18323 // changed when RV32E/ILP32E is ratified.
18324 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18325 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18326 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18327 ABI != RISCVABI::ABI_ILP32E) {
18328 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18329 // Skip 'odd' register if necessary.
18330 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18331 State.AllocateReg(ArgGPRs);
18332 }
18333
18334 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18335 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18336 State.getPendingArgFlags();
18337
18338 assert(PendingLocs.size() == PendingArgFlags.size() &&
18339 "PendingLocs and PendingArgFlags out of sync");
18340
18341 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18342 // registers are exhausted.
18343 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18344 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18345 // Depending on available argument GPRS, f64 may be passed in a pair of
18346 // GPRs, split between a GPR and the stack, or passed completely on the
18347 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18348 // cases.
18349 Register Reg = State.AllocateReg(ArgGPRs);
18350 if (!Reg) {
18351 unsigned StackOffset = State.AllocateStack(8, Align(8));
18352 State.addLoc(
18353 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18354 return false;
18355 }
18356 LocVT = MVT::i32;
18357 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18358 Register HiReg = State.AllocateReg(ArgGPRs);
18359 if (HiReg) {
18360 State.addLoc(
18361 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18362 } else {
18363 unsigned StackOffset = State.AllocateStack(4, Align(4));
18364 State.addLoc(
18365 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18366 }
18367 return false;
18368 }
18369
18370 // Fixed-length vectors are located in the corresponding scalable-vector
18371 // container types.
18372 if (ValVT.isFixedLengthVector())
18373 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18374
18375 // Split arguments might be passed indirectly, so keep track of the pending
18376 // values. Split vectors are passed via a mix of registers and indirectly, so
18377 // treat them as we would any other argument.
18378 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18379 LocVT = XLenVT;
18380 LocInfo = CCValAssign::Indirect;
18381 PendingLocs.push_back(
18382 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18383 PendingArgFlags.push_back(ArgFlags);
18384 if (!ArgFlags.isSplitEnd()) {
18385 return false;
18386 }
18387 }
18388
18389 // If the split argument only had two elements, it should be passed directly
18390 // in registers or on the stack.
18391 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18392 PendingLocs.size() <= 2) {
18393 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18394 // Apply the normal calling convention rules to the first half of the
18395 // split argument.
18396 CCValAssign VA = PendingLocs[0];
18397 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18398 PendingLocs.clear();
18399 PendingArgFlags.clear();
18400 return CC_RISCVAssign2XLen(
18401 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18402 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18403 }
18404
18405 // Allocate to a register if possible, or else a stack slot.
18406 Register Reg;
18407 unsigned StoreSizeBytes = XLen / 8;
18408 Align StackAlign = Align(XLen / 8);
18409
18410 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18411 Reg = State.AllocateReg(ArgFPR16s);
18412 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18413 Reg = State.AllocateReg(ArgFPR32s);
18414 else if (ValVT == MVT::f64 && !UseGPRForF64)
18415 Reg = State.AllocateReg(ArgFPR64s);
18416 else if (ValVT.isVector()) {
18417 Reg = RVVDispatcher.getNextPhysReg();
18418 if (!Reg) {
18419 // For return values, the vector must be passed fully via registers or
18420 // via the stack.
18421 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18422 // but we're using all of them.
18423 if (IsRet)
18424 return true;
18425 // Try using a GPR to pass the address
18426 if ((Reg = State.AllocateReg(ArgGPRs))) {
18427 LocVT = XLenVT;
18428 LocInfo = CCValAssign::Indirect;
18429 } else if (ValVT.isScalableVector()) {
18430 LocVT = XLenVT;
18431 LocInfo = CCValAssign::Indirect;
18432 } else {
18433 // Pass fixed-length vectors on the stack.
18434 LocVT = ValVT;
18435 StoreSizeBytes = ValVT.getStoreSize();
18436 // Align vectors to their element sizes, being careful for vXi1
18437 // vectors.
18438 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18439 }
18440 }
18441 } else {
18442 Reg = State.AllocateReg(ArgGPRs);
18443 }
18444
18445 unsigned StackOffset =
18446 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18447
18448 // If we reach this point and PendingLocs is non-empty, we must be at the
18449 // end of a split argument that must be passed indirectly.
18450 if (!PendingLocs.empty()) {
18451 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18452 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18453
18454 for (auto &It : PendingLocs) {
18455 if (Reg)
18456 It.convertToReg(Reg);
18457 else
18458 It.convertToMem(StackOffset);
18459 State.addLoc(It);
18460 }
18461 PendingLocs.clear();
18462 PendingArgFlags.clear();
18463 return false;
18464 }
18465
18466 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18467 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18468 "Expected an XLenVT or vector types at this stage");
18469
18470 if (Reg) {
18471 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18472 return false;
18473 }
18474
18475 // When a scalar floating-point value is passed on the stack, no
18476 // bit-conversion is needed.
18477 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18478 assert(!ValVT.isVector());
18479 LocVT = ValVT;
18480 LocInfo = CCValAssign::Full;
18481 }
18482 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18483 return false;
18484}
18485
18486template <typename ArgTy>
18487static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18488 for (const auto &ArgIdx : enumerate(Args)) {
18489 MVT ArgVT = ArgIdx.value().VT;
18490 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18491 return ArgIdx.index();
18492 }
18493 return std::nullopt;
18494}
18495
18496void RISCVTargetLowering::analyzeInputArgs(
18497 MachineFunction &MF, CCState &CCInfo,
18498 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18499 RISCVCCAssignFn Fn) const {
18500 unsigned NumArgs = Ins.size();
18502
18503 RVVArgDispatcher Dispatcher;
18504 if (IsRet) {
18505 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18506 } else {
18507 SmallVector<Type *, 4> TypeList;
18508 for (const Argument &Arg : MF.getFunction().args())
18509 TypeList.push_back(Arg.getType());
18510 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18511 }
18512
18513 for (unsigned i = 0; i != NumArgs; ++i) {
18514 MVT ArgVT = Ins[i].VT;
18515 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18516
18517 Type *ArgTy = nullptr;
18518 if (IsRet)
18519 ArgTy = FType->getReturnType();
18520 else if (Ins[i].isOrigArg())
18521 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18522
18524 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18525 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18526 Dispatcher)) {
18527 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18528 << ArgVT << '\n');
18529 llvm_unreachable(nullptr);
18530 }
18531 }
18532}
18533
18534void RISCVTargetLowering::analyzeOutputArgs(
18535 MachineFunction &MF, CCState &CCInfo,
18536 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18537 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18538 unsigned NumArgs = Outs.size();
18539
18540 SmallVector<Type *, 4> TypeList;
18541 if (IsRet)
18542 TypeList.push_back(MF.getFunction().getReturnType());
18543 else if (CLI)
18544 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18545 TypeList.push_back(Arg.Ty);
18546 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18547
18548 for (unsigned i = 0; i != NumArgs; i++) {
18549 MVT ArgVT = Outs[i].VT;
18550 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18551 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18552
18554 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18555 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18556 Dispatcher)) {
18557 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18558 << ArgVT << "\n");
18559 llvm_unreachable(nullptr);
18560 }
18561 }
18562}
18563
18564// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18565// values.
18567 const CCValAssign &VA, const SDLoc &DL,
18568 const RISCVSubtarget &Subtarget) {
18569 switch (VA.getLocInfo()) {
18570 default:
18571 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18572 case CCValAssign::Full:
18574 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18575 break;
18576 case CCValAssign::BCvt:
18577 if (VA.getLocVT().isInteger() &&
18578 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18579 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18580 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18581 if (RV64LegalI32) {
18582 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18583 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18584 } else {
18585 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18586 }
18587 } else {
18588 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18589 }
18590 break;
18591 }
18592 return Val;
18593}
18594
18595// The caller is responsible for loading the full value if the argument is
18596// passed with CCValAssign::Indirect.
18598 const CCValAssign &VA, const SDLoc &DL,
18599 const ISD::InputArg &In,
18600 const RISCVTargetLowering &TLI) {
18603 EVT LocVT = VA.getLocVT();
18604 SDValue Val;
18605 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18606 Register VReg = RegInfo.createVirtualRegister(RC);
18607 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18608 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18609
18610 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18611 if (In.isOrigArg()) {
18612 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18613 if (OrigArg->getType()->isIntegerTy()) {
18614 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18615 // An input zero extended from i31 can also be considered sign extended.
18616 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18617 (BitWidth < 32 && In.Flags.isZExt())) {
18619 RVFI->addSExt32Register(VReg);
18620 }
18621 }
18622 }
18623
18625 return Val;
18626
18627 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18628}
18629
18631 const CCValAssign &VA, const SDLoc &DL,
18632 const RISCVSubtarget &Subtarget) {
18633 EVT LocVT = VA.getLocVT();
18634
18635 switch (VA.getLocInfo()) {
18636 default:
18637 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18638 case CCValAssign::Full:
18639 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18640 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18641 break;
18642 case CCValAssign::BCvt:
18643 if (LocVT.isInteger() &&
18644 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18645 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18646 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18647 if (RV64LegalI32) {
18648 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18649 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18650 } else {
18651 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18652 }
18653 } else {
18654 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18655 }
18656 break;
18657 }
18658 return Val;
18659}
18660
18661// The caller is responsible for loading the full value if the argument is
18662// passed with CCValAssign::Indirect.
18664 const CCValAssign &VA, const SDLoc &DL) {
18666 MachineFrameInfo &MFI = MF.getFrameInfo();
18667 EVT LocVT = VA.getLocVT();
18668 EVT ValVT = VA.getValVT();
18670 if (ValVT.isScalableVector()) {
18671 // When the value is a scalable vector, we save the pointer which points to
18672 // the scalable vector value in the stack. The ValVT will be the pointer
18673 // type, instead of the scalable vector type.
18674 ValVT = LocVT;
18675 }
18676 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18677 /*IsImmutable=*/true);
18678 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18679 SDValue Val;
18680
18681 ISD::LoadExtType ExtType;
18682 switch (VA.getLocInfo()) {
18683 default:
18684 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18685 case CCValAssign::Full:
18687 case CCValAssign::BCvt:
18688 ExtType = ISD::NON_EXTLOAD;
18689 break;
18690 }
18691 Val = DAG.getExtLoad(
18692 ExtType, DL, LocVT, Chain, FIN,
18694 return Val;
18695}
18696
18698 const CCValAssign &VA,
18699 const CCValAssign &HiVA,
18700 const SDLoc &DL) {
18701 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18702 "Unexpected VA");
18704 MachineFrameInfo &MFI = MF.getFrameInfo();
18706
18707 assert(VA.isRegLoc() && "Expected register VA assignment");
18708
18709 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18710 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18711 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18712 SDValue Hi;
18713 if (HiVA.isMemLoc()) {
18714 // Second half of f64 is passed on the stack.
18715 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18716 /*IsImmutable=*/true);
18717 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18718 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18720 } else {
18721 // Second half of f64 is passed in another GPR.
18722 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18723 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18724 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18725 }
18726 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18727}
18728
18729// FastCC has less than 1% performance improvement for some particular
18730// benchmark. But theoretically, it may has benenfit for some cases.
18732 unsigned ValNo, MVT ValVT, MVT LocVT,
18733 CCValAssign::LocInfo LocInfo,
18734 ISD::ArgFlagsTy ArgFlags, CCState &State,
18735 bool IsFixed, bool IsRet, Type *OrigTy,
18736 const RISCVTargetLowering &TLI,
18737 RVVArgDispatcher &RVVDispatcher) {
18738 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18739 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18740 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18741 return false;
18742 }
18743 }
18744
18745 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18746
18747 if (LocVT == MVT::f16 &&
18748 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18749 static const MCPhysReg FPR16List[] = {
18750 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18751 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18752 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18753 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18754 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18755 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18756 return false;
18757 }
18758 }
18759
18760 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18761 static const MCPhysReg FPR32List[] = {
18762 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18763 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18764 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18765 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18766 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18767 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18768 return false;
18769 }
18770 }
18771
18772 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18773 static const MCPhysReg FPR64List[] = {
18774 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18775 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18776 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18777 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18778 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18779 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18780 return false;
18781 }
18782 }
18783
18784 // Check if there is an available GPR before hitting the stack.
18785 if ((LocVT == MVT::f16 &&
18786 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18787 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18788 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18789 Subtarget.hasStdExtZdinx())) {
18790 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18791 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18792 return false;
18793 }
18794 }
18795
18796 if (LocVT == MVT::f16) {
18797 unsigned Offset2 = State.AllocateStack(2, Align(2));
18798 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18799 return false;
18800 }
18801
18802 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18803 unsigned Offset4 = State.AllocateStack(4, Align(4));
18804 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18805 return false;
18806 }
18807
18808 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18809 unsigned Offset5 = State.AllocateStack(8, Align(8));
18810 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18811 return false;
18812 }
18813
18814 if (LocVT.isVector()) {
18815 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
18816 if (AllocatedVReg) {
18817 // Fixed-length vectors are located in the corresponding scalable-vector
18818 // container types.
18819 if (ValVT.isFixedLengthVector())
18820 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18821 State.addLoc(
18822 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
18823 } else {
18824 // Try and pass the address via a "fast" GPR.
18825 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18826 LocInfo = CCValAssign::Indirect;
18827 LocVT = TLI.getSubtarget().getXLenVT();
18828 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18829 } else if (ValVT.isFixedLengthVector()) {
18830 auto StackAlign =
18832 unsigned StackOffset =
18833 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18834 State.addLoc(
18835 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18836 } else {
18837 // Can't pass scalable vectors on the stack.
18838 return true;
18839 }
18840 }
18841
18842 return false;
18843 }
18844
18845 return true; // CC didn't match.
18846}
18847
18848bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18849 CCValAssign::LocInfo LocInfo,
18850 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18851 if (ArgFlags.isNest()) {
18853 "Attribute 'nest' is not supported in GHC calling convention");
18854 }
18855
18856 static const MCPhysReg GPRList[] = {
18857 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18858 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18859
18860 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18861 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18862 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18863 if (unsigned Reg = State.AllocateReg(GPRList)) {
18864 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18865 return false;
18866 }
18867 }
18868
18869 const RISCVSubtarget &Subtarget =
18871
18872 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18873 // Pass in STG registers: F1, ..., F6
18874 // fs0 ... fs5
18875 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18876 RISCV::F18_F, RISCV::F19_F,
18877 RISCV::F20_F, RISCV::F21_F};
18878 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18879 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18880 return false;
18881 }
18882 }
18883
18884 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18885 // Pass in STG registers: D1, ..., D6
18886 // fs6 ... fs11
18887 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18888 RISCV::F24_D, RISCV::F25_D,
18889 RISCV::F26_D, RISCV::F27_D};
18890 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18891 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18892 return false;
18893 }
18894 }
18895
18896 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18897 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18898 Subtarget.is64Bit())) {
18899 if (unsigned Reg = State.AllocateReg(GPRList)) {
18900 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18901 return false;
18902 }
18903 }
18904
18905 report_fatal_error("No registers left in GHC calling convention");
18906 return true;
18907}
18908
18909// Transform physical registers into virtual registers.
18911 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18912 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18913 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18914
18916
18917 switch (CallConv) {
18918 default:
18919 report_fatal_error("Unsupported calling convention");
18920 case CallingConv::C:
18921 case CallingConv::Fast:
18923 case CallingConv::GRAAL:
18925 break;
18926 case CallingConv::GHC:
18927 if (Subtarget.hasStdExtE())
18928 report_fatal_error("GHC calling convention is not supported on RVE!");
18929 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18930 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18931 "(Zdinx/D) instruction set extensions");
18932 }
18933
18934 const Function &Func = MF.getFunction();
18935 if (Func.hasFnAttribute("interrupt")) {
18936 if (!Func.arg_empty())
18938 "Functions with the interrupt attribute cannot have arguments!");
18939
18940 StringRef Kind =
18941 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18942
18943 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18945 "Function interrupt attribute argument not supported!");
18946 }
18947
18948 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18949 MVT XLenVT = Subtarget.getXLenVT();
18950 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18951 // Used with vargs to acumulate store chains.
18952 std::vector<SDValue> OutChains;
18953
18954 // Assign locations to all of the incoming arguments.
18956 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18957
18958 if (CallConv == CallingConv::GHC)
18960 else
18961 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18963 : RISCV::CC_RISCV);
18964
18965 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18966 CCValAssign &VA = ArgLocs[i];
18967 SDValue ArgValue;
18968 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18969 // case.
18970 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18971 assert(VA.needsCustom());
18972 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
18973 } else if (VA.isRegLoc())
18974 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
18975 else
18976 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18977
18978 if (VA.getLocInfo() == CCValAssign::Indirect) {
18979 // If the original argument was split and passed by reference (e.g. i128
18980 // on RV32), we need to load all parts of it here (using the same
18981 // address). Vectors may be partly split to registers and partly to the
18982 // stack, in which case the base address is partly offset and subsequent
18983 // stores are relative to that.
18984 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
18986 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
18987 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
18988 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18989 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
18990 CCValAssign &PartVA = ArgLocs[i + 1];
18991 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
18992 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18993 if (PartVA.getValVT().isScalableVector())
18994 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18995 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
18996 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
18998 ++i;
18999 ++InsIdx;
19000 }
19001 continue;
19002 }
19003 InVals.push_back(ArgValue);
19004 }
19005
19006 if (any_of(ArgLocs,
19007 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19008 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19009
19010 if (IsVarArg) {
19011 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19012 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19013 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19014 MachineFrameInfo &MFI = MF.getFrameInfo();
19015 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19017
19018 // Size of the vararg save area. For now, the varargs save area is either
19019 // zero or large enough to hold a0-a7.
19020 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19021 int FI;
19022
19023 // If all registers are allocated, then all varargs must be passed on the
19024 // stack and we don't need to save any argregs.
19025 if (VarArgsSaveSize == 0) {
19026 int VaArgOffset = CCInfo.getStackSize();
19027 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19028 } else {
19029 int VaArgOffset = -VarArgsSaveSize;
19030 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19031
19032 // If saving an odd number of registers then create an extra stack slot to
19033 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19034 // offsets to even-numbered registered remain 2*XLEN-aligned.
19035 if (Idx % 2) {
19037 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19038 VarArgsSaveSize += XLenInBytes;
19039 }
19040
19041 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19042
19043 // Copy the integer registers that may have been used for passing varargs
19044 // to the vararg save area.
19045 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19046 const Register Reg = RegInfo.createVirtualRegister(RC);
19047 RegInfo.addLiveIn(ArgRegs[I], Reg);
19048 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19049 SDValue Store = DAG.getStore(
19050 Chain, DL, ArgValue, FIN,
19051 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19052 OutChains.push_back(Store);
19053 FIN =
19054 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19055 }
19056 }
19057
19058 // Record the frame index of the first variable argument
19059 // which is a value necessary to VASTART.
19060 RVFI->setVarArgsFrameIndex(FI);
19061 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19062 }
19063
19064 // All stores are grouped in one node to allow the matching between
19065 // the size of Ins and InVals. This only happens for vararg functions.
19066 if (!OutChains.empty()) {
19067 OutChains.push_back(Chain);
19068 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19069 }
19070
19071 return Chain;
19072}
19073
19074/// isEligibleForTailCallOptimization - Check whether the call is eligible
19075/// for tail call optimization.
19076/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19077bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19078 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19079 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19080
19081 auto CalleeCC = CLI.CallConv;
19082 auto &Outs = CLI.Outs;
19083 auto &Caller = MF.getFunction();
19084 auto CallerCC = Caller.getCallingConv();
19085
19086 // Exception-handling functions need a special set of instructions to
19087 // indicate a return to the hardware. Tail-calling another function would
19088 // probably break this.
19089 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19090 // should be expanded as new function attributes are introduced.
19091 if (Caller.hasFnAttribute("interrupt"))
19092 return false;
19093
19094 // Do not tail call opt if the stack is used to pass parameters.
19095 if (CCInfo.getStackSize() != 0)
19096 return false;
19097
19098 // Do not tail call opt if any parameters need to be passed indirectly.
19099 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19100 // passed indirectly. So the address of the value will be passed in a
19101 // register, or if not available, then the address is put on the stack. In
19102 // order to pass indirectly, space on the stack often needs to be allocated
19103 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19104 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19105 // are passed CCValAssign::Indirect.
19106 for (auto &VA : ArgLocs)
19107 if (VA.getLocInfo() == CCValAssign::Indirect)
19108 return false;
19109
19110 // Do not tail call opt if either caller or callee uses struct return
19111 // semantics.
19112 auto IsCallerStructRet = Caller.hasStructRetAttr();
19113 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19114 if (IsCallerStructRet || IsCalleeStructRet)
19115 return false;
19116
19117 // The callee has to preserve all registers the caller needs to preserve.
19118 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19119 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19120 if (CalleeCC != CallerCC) {
19121 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19122 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19123 return false;
19124 }
19125
19126 // Byval parameters hand the function a pointer directly into the stack area
19127 // we want to reuse during a tail call. Working around this *is* possible
19128 // but less efficient and uglier in LowerCall.
19129 for (auto &Arg : Outs)
19130 if (Arg.Flags.isByVal())
19131 return false;
19132
19133 return true;
19134}
19135
19137 return DAG.getDataLayout().getPrefTypeAlign(
19138 VT.getTypeForEVT(*DAG.getContext()));
19139}
19140
19141// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19142// and output parameter nodes.
19144 SmallVectorImpl<SDValue> &InVals) const {
19145 SelectionDAG &DAG = CLI.DAG;
19146 SDLoc &DL = CLI.DL;
19148 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19150 SDValue Chain = CLI.Chain;
19151 SDValue Callee = CLI.Callee;
19152 bool &IsTailCall = CLI.IsTailCall;
19153 CallingConv::ID CallConv = CLI.CallConv;
19154 bool IsVarArg = CLI.IsVarArg;
19155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19156 MVT XLenVT = Subtarget.getXLenVT();
19157
19159
19160 // Analyze the operands of the call, assigning locations to each operand.
19162 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19163
19164 if (CallConv == CallingConv::GHC) {
19165 if (Subtarget.hasStdExtE())
19166 report_fatal_error("GHC calling convention is not supported on RVE!");
19168 } else
19169 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19171 : RISCV::CC_RISCV);
19172
19173 // Check if it's really possible to do a tail call.
19174 if (IsTailCall)
19175 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19176
19177 if (IsTailCall)
19178 ++NumTailCalls;
19179 else if (CLI.CB && CLI.CB->isMustTailCall())
19180 report_fatal_error("failed to perform tail call elimination on a call "
19181 "site marked musttail");
19182
19183 // Get a count of how many bytes are to be pushed on the stack.
19184 unsigned NumBytes = ArgCCInfo.getStackSize();
19185
19186 // Create local copies for byval args
19187 SmallVector<SDValue, 8> ByValArgs;
19188 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19189 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19190 if (!Flags.isByVal())
19191 continue;
19192
19193 SDValue Arg = OutVals[i];
19194 unsigned Size = Flags.getByValSize();
19195 Align Alignment = Flags.getNonZeroByValAlign();
19196
19197 int FI =
19198 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19199 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19200 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19201
19202 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19203 /*IsVolatile=*/false,
19204 /*AlwaysInline=*/false, IsTailCall,
19206 ByValArgs.push_back(FIPtr);
19207 }
19208
19209 if (!IsTailCall)
19210 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19211
19212 // Copy argument values to their designated locations.
19214 SmallVector<SDValue, 8> MemOpChains;
19215 SDValue StackPtr;
19216 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19217 ++i, ++OutIdx) {
19218 CCValAssign &VA = ArgLocs[i];
19219 SDValue ArgValue = OutVals[OutIdx];
19220 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19221
19222 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19223 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19224 assert(VA.isRegLoc() && "Expected register VA assignment");
19225 assert(VA.needsCustom());
19226 SDValue SplitF64 = DAG.getNode(
19227 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19228 SDValue Lo = SplitF64.getValue(0);
19229 SDValue Hi = SplitF64.getValue(1);
19230
19231 Register RegLo = VA.getLocReg();
19232 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19233
19234 // Get the CCValAssign for the Hi part.
19235 CCValAssign &HiVA = ArgLocs[++i];
19236
19237 if (HiVA.isMemLoc()) {
19238 // Second half of f64 is passed on the stack.
19239 if (!StackPtr.getNode())
19240 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19242 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19243 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19244 // Emit the store.
19245 MemOpChains.push_back(
19246 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19247 } else {
19248 // Second half of f64 is passed in another GPR.
19249 Register RegHigh = HiVA.getLocReg();
19250 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19251 }
19252 continue;
19253 }
19254
19255 // Promote the value if needed.
19256 // For now, only handle fully promoted and indirect arguments.
19257 if (VA.getLocInfo() == CCValAssign::Indirect) {
19258 // Store the argument in a stack slot and pass its address.
19259 Align StackAlign =
19260 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19261 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19262 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19263 // If the original argument was split (e.g. i128), we need
19264 // to store the required parts of it here (and pass just one address).
19265 // Vectors may be partly split to registers and partly to the stack, in
19266 // which case the base address is partly offset and subsequent stores are
19267 // relative to that.
19268 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19269 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19270 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19271 // Calculate the total size to store. We don't have access to what we're
19272 // actually storing other than performing the loop and collecting the
19273 // info.
19275 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19276 SDValue PartValue = OutVals[OutIdx + 1];
19277 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19278 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19279 EVT PartVT = PartValue.getValueType();
19280 if (PartVT.isScalableVector())
19281 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19282 StoredSize += PartVT.getStoreSize();
19283 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19284 Parts.push_back(std::make_pair(PartValue, Offset));
19285 ++i;
19286 ++OutIdx;
19287 }
19288 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19289 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19290 MemOpChains.push_back(
19291 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19293 for (const auto &Part : Parts) {
19294 SDValue PartValue = Part.first;
19295 SDValue PartOffset = Part.second;
19297 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19298 MemOpChains.push_back(
19299 DAG.getStore(Chain, DL, PartValue, Address,
19301 }
19302 ArgValue = SpillSlot;
19303 } else {
19304 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19305 }
19306
19307 // Use local copy if it is a byval arg.
19308 if (Flags.isByVal())
19309 ArgValue = ByValArgs[j++];
19310
19311 if (VA.isRegLoc()) {
19312 // Queue up the argument copies and emit them at the end.
19313 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19314 } else {
19315 assert(VA.isMemLoc() && "Argument not register or memory");
19316 assert(!IsTailCall && "Tail call not allowed if stack is used "
19317 "for passing parameters");
19318
19319 // Work out the address of the stack slot.
19320 if (!StackPtr.getNode())
19321 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19323 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19325
19326 // Emit the store.
19327 MemOpChains.push_back(
19328 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19329 }
19330 }
19331
19332 // Join the stores, which are independent of one another.
19333 if (!MemOpChains.empty())
19334 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19335
19336 SDValue Glue;
19337
19338 // Build a sequence of copy-to-reg nodes, chained and glued together.
19339 for (auto &Reg : RegsToPass) {
19340 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19341 Glue = Chain.getValue(1);
19342 }
19343
19344 // Validate that none of the argument registers have been marked as
19345 // reserved, if so report an error. Do the same for the return address if this
19346 // is not a tailcall.
19347 validateCCReservedRegs(RegsToPass, MF);
19348 if (!IsTailCall &&
19351 MF.getFunction(),
19352 "Return address register required, but has been reserved."});
19353
19354 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19355 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19356 // split it and then direct call can be matched by PseudoCALL.
19357 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19358 const GlobalValue *GV = S->getGlobal();
19359 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19360 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19361 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19362 }
19363
19364 // The first call operand is the chain and the second is the target address.
19366 Ops.push_back(Chain);
19367 Ops.push_back(Callee);
19368
19369 // Add argument registers to the end of the list so that they are
19370 // known live into the call.
19371 for (auto &Reg : RegsToPass)
19372 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19373
19374 if (!IsTailCall) {
19375 // Add a register mask operand representing the call-preserved registers.
19376 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19377 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19378 assert(Mask && "Missing call preserved mask for calling convention");
19379 Ops.push_back(DAG.getRegisterMask(Mask));
19380 }
19381
19382 // Glue the call to the argument copies, if any.
19383 if (Glue.getNode())
19384 Ops.push_back(Glue);
19385
19386 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19387 "Unexpected CFI type for a direct call");
19388
19389 // Emit the call.
19390 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19391
19392 if (IsTailCall) {
19394 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19395 if (CLI.CFIType)
19396 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19397 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19398 return Ret;
19399 }
19400
19401 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19402 if (CLI.CFIType)
19403 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19404 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19405 Glue = Chain.getValue(1);
19406
19407 // Mark the end of the call, which is glued to the call itself.
19408 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19409 Glue = Chain.getValue(1);
19410
19411 // Assign locations to each value returned by this call.
19413 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19414 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19415
19416 // Copy all of the result registers out of their specified physreg.
19417 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19418 auto &VA = RVLocs[i];
19419 // Copy the value out
19420 SDValue RetValue =
19421 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19422 // Glue the RetValue to the end of the call sequence
19423 Chain = RetValue.getValue(1);
19424 Glue = RetValue.getValue(2);
19425
19426 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19427 assert(VA.needsCustom());
19428 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19429 MVT::i32, Glue);
19430 Chain = RetValue2.getValue(1);
19431 Glue = RetValue2.getValue(2);
19432 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19433 RetValue2);
19434 }
19435
19436 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19437
19438 InVals.push_back(RetValue);
19439 }
19440
19441 return Chain;
19442}
19443
19445 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19446 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19448 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19449
19450 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19451
19452 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19453 MVT VT = Outs[i].VT;
19454 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19455 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19456 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19457 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19458 nullptr, *this, Dispatcher))
19459 return false;
19460 }
19461 return true;
19462}
19463
19464SDValue
19466 bool IsVarArg,
19468 const SmallVectorImpl<SDValue> &OutVals,
19469 const SDLoc &DL, SelectionDAG &DAG) const {
19471 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19472
19473 // Stores the assignment of the return value to a location.
19475
19476 // Info about the registers and stack slot.
19477 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19478 *DAG.getContext());
19479
19480 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19481 nullptr, RISCV::CC_RISCV);
19482
19483 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19484 report_fatal_error("GHC functions return void only");
19485
19486 SDValue Glue;
19487 SmallVector<SDValue, 4> RetOps(1, Chain);
19488
19489 // Copy the result values into the output registers.
19490 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19491 SDValue Val = OutVals[OutIdx];
19492 CCValAssign &VA = RVLocs[i];
19493 assert(VA.isRegLoc() && "Can only return in registers!");
19494
19495 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19496 // Handle returning f64 on RV32D with a soft float ABI.
19497 assert(VA.isRegLoc() && "Expected return via registers");
19498 assert(VA.needsCustom());
19499 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19500 DAG.getVTList(MVT::i32, MVT::i32), Val);
19501 SDValue Lo = SplitF64.getValue(0);
19502 SDValue Hi = SplitF64.getValue(1);
19503 Register RegLo = VA.getLocReg();
19504 Register RegHi = RVLocs[++i].getLocReg();
19505
19506 if (STI.isRegisterReservedByUser(RegLo) ||
19507 STI.isRegisterReservedByUser(RegHi))
19509 MF.getFunction(),
19510 "Return value register required, but has been reserved."});
19511
19512 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19513 Glue = Chain.getValue(1);
19514 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19515 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19516 Glue = Chain.getValue(1);
19517 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19518 } else {
19519 // Handle a 'normal' return.
19520 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19521 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19522
19523 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19525 MF.getFunction(),
19526 "Return value register required, but has been reserved."});
19527
19528 // Guarantee that all emitted copies are stuck together.
19529 Glue = Chain.getValue(1);
19530 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19531 }
19532 }
19533
19534 RetOps[0] = Chain; // Update chain.
19535
19536 // Add the glue node if we have it.
19537 if (Glue.getNode()) {
19538 RetOps.push_back(Glue);
19539 }
19540
19541 if (any_of(RVLocs,
19542 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19543 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19544
19545 unsigned RetOpc = RISCVISD::RET_GLUE;
19546 // Interrupt service routines use different return instructions.
19547 const Function &Func = DAG.getMachineFunction().getFunction();
19548 if (Func.hasFnAttribute("interrupt")) {
19549 if (!Func.getReturnType()->isVoidTy())
19551 "Functions with the interrupt attribute must have void return type!");
19552
19554 StringRef Kind =
19555 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19556
19557 if (Kind == "supervisor")
19558 RetOpc = RISCVISD::SRET_GLUE;
19559 else
19560 RetOpc = RISCVISD::MRET_GLUE;
19561 }
19562
19563 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19564}
19565
19566void RISCVTargetLowering::validateCCReservedRegs(
19567 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19568 MachineFunction &MF) const {
19569 const Function &F = MF.getFunction();
19570 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19571
19572 if (llvm::any_of(Regs, [&STI](auto Reg) {
19573 return STI.isRegisterReservedByUser(Reg.first);
19574 }))
19575 F.getContext().diagnose(DiagnosticInfoUnsupported{
19576 F, "Argument register required, but has been reserved."});
19577}
19578
19579// Check if the result of the node is only used as a return value, as
19580// otherwise we can't perform a tail-call.
19582 if (N->getNumValues() != 1)
19583 return false;
19584 if (!N->hasNUsesOfValue(1, 0))
19585 return false;
19586
19587 SDNode *Copy = *N->use_begin();
19588
19589 if (Copy->getOpcode() == ISD::BITCAST) {
19590 return isUsedByReturnOnly(Copy, Chain);
19591 }
19592
19593 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19594 // with soft float ABIs.
19595 if (Copy->getOpcode() != ISD::CopyToReg) {
19596 return false;
19597 }
19598
19599 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19600 // isn't safe to perform a tail call.
19601 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19602 return false;
19603
19604 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19605 bool HasRet = false;
19606 for (SDNode *Node : Copy->uses()) {
19607 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19608 return false;
19609 HasRet = true;
19610 }
19611 if (!HasRet)
19612 return false;
19613
19614 Chain = Copy->getOperand(0);
19615 return true;
19616}
19617
19619 return CI->isTailCall();
19620}
19621
19622const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19623#define NODE_NAME_CASE(NODE) \
19624 case RISCVISD::NODE: \
19625 return "RISCVISD::" #NODE;
19626 // clang-format off
19627 switch ((RISCVISD::NodeType)Opcode) {
19629 break;
19630 NODE_NAME_CASE(RET_GLUE)
19631 NODE_NAME_CASE(SRET_GLUE)
19632 NODE_NAME_CASE(MRET_GLUE)
19633 NODE_NAME_CASE(CALL)
19634 NODE_NAME_CASE(SELECT_CC)
19635 NODE_NAME_CASE(BR_CC)
19636 NODE_NAME_CASE(BuildPairF64)
19637 NODE_NAME_CASE(SplitF64)
19638 NODE_NAME_CASE(TAIL)
19639 NODE_NAME_CASE(ADD_LO)
19640 NODE_NAME_CASE(HI)
19641 NODE_NAME_CASE(LLA)
19642 NODE_NAME_CASE(ADD_TPREL)
19643 NODE_NAME_CASE(MULHSU)
19644 NODE_NAME_CASE(SHL_ADD)
19645 NODE_NAME_CASE(SLLW)
19646 NODE_NAME_CASE(SRAW)
19647 NODE_NAME_CASE(SRLW)
19648 NODE_NAME_CASE(DIVW)
19649 NODE_NAME_CASE(DIVUW)
19650 NODE_NAME_CASE(REMUW)
19651 NODE_NAME_CASE(ROLW)
19652 NODE_NAME_CASE(RORW)
19653 NODE_NAME_CASE(CLZW)
19654 NODE_NAME_CASE(CTZW)
19655 NODE_NAME_CASE(ABSW)
19656 NODE_NAME_CASE(FMV_H_X)
19657 NODE_NAME_CASE(FMV_X_ANYEXTH)
19658 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19659 NODE_NAME_CASE(FMV_W_X_RV64)
19660 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19661 NODE_NAME_CASE(FCVT_X)
19662 NODE_NAME_CASE(FCVT_XU)
19663 NODE_NAME_CASE(FCVT_W_RV64)
19664 NODE_NAME_CASE(FCVT_WU_RV64)
19665 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19666 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19667 NODE_NAME_CASE(FP_ROUND_BF16)
19668 NODE_NAME_CASE(FP_EXTEND_BF16)
19669 NODE_NAME_CASE(FROUND)
19670 NODE_NAME_CASE(FCLASS)
19671 NODE_NAME_CASE(FMAX)
19672 NODE_NAME_CASE(FMIN)
19673 NODE_NAME_CASE(READ_COUNTER_WIDE)
19674 NODE_NAME_CASE(BREV8)
19675 NODE_NAME_CASE(ORC_B)
19676 NODE_NAME_CASE(ZIP)
19677 NODE_NAME_CASE(UNZIP)
19678 NODE_NAME_CASE(CLMUL)
19679 NODE_NAME_CASE(CLMULH)
19680 NODE_NAME_CASE(CLMULR)
19681 NODE_NAME_CASE(MOPR)
19682 NODE_NAME_CASE(MOPRR)
19683 NODE_NAME_CASE(SHA256SIG0)
19684 NODE_NAME_CASE(SHA256SIG1)
19685 NODE_NAME_CASE(SHA256SUM0)
19686 NODE_NAME_CASE(SHA256SUM1)
19687 NODE_NAME_CASE(SM4KS)
19688 NODE_NAME_CASE(SM4ED)
19689 NODE_NAME_CASE(SM3P0)
19690 NODE_NAME_CASE(SM3P1)
19691 NODE_NAME_CASE(TH_LWD)
19692 NODE_NAME_CASE(TH_LWUD)
19693 NODE_NAME_CASE(TH_LDD)
19694 NODE_NAME_CASE(TH_SWD)
19695 NODE_NAME_CASE(TH_SDD)
19696 NODE_NAME_CASE(VMV_V_V_VL)
19697 NODE_NAME_CASE(VMV_V_X_VL)
19698 NODE_NAME_CASE(VFMV_V_F_VL)
19699 NODE_NAME_CASE(VMV_X_S)
19700 NODE_NAME_CASE(VMV_S_X_VL)
19701 NODE_NAME_CASE(VFMV_S_F_VL)
19702 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19703 NODE_NAME_CASE(READ_VLENB)
19704 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19705 NODE_NAME_CASE(VSLIDEUP_VL)
19706 NODE_NAME_CASE(VSLIDE1UP_VL)
19707 NODE_NAME_CASE(VSLIDEDOWN_VL)
19708 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19709 NODE_NAME_CASE(VFSLIDE1UP_VL)
19710 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19711 NODE_NAME_CASE(VID_VL)
19712 NODE_NAME_CASE(VFNCVT_ROD_VL)
19713 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19714 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19715 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19716 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19717 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19718 NODE_NAME_CASE(VECREDUCE_AND_VL)
19719 NODE_NAME_CASE(VECREDUCE_OR_VL)
19720 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19721 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19722 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19723 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19724 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19725 NODE_NAME_CASE(ADD_VL)
19726 NODE_NAME_CASE(AND_VL)
19727 NODE_NAME_CASE(MUL_VL)
19728 NODE_NAME_CASE(OR_VL)
19729 NODE_NAME_CASE(SDIV_VL)
19730 NODE_NAME_CASE(SHL_VL)
19731 NODE_NAME_CASE(SREM_VL)
19732 NODE_NAME_CASE(SRA_VL)
19733 NODE_NAME_CASE(SRL_VL)
19734 NODE_NAME_CASE(ROTL_VL)
19735 NODE_NAME_CASE(ROTR_VL)
19736 NODE_NAME_CASE(SUB_VL)
19737 NODE_NAME_CASE(UDIV_VL)
19738 NODE_NAME_CASE(UREM_VL)
19739 NODE_NAME_CASE(XOR_VL)
19740 NODE_NAME_CASE(AVGFLOORU_VL)
19741 NODE_NAME_CASE(AVGCEILU_VL)
19742 NODE_NAME_CASE(SADDSAT_VL)
19743 NODE_NAME_CASE(UADDSAT_VL)
19744 NODE_NAME_CASE(SSUBSAT_VL)
19745 NODE_NAME_CASE(USUBSAT_VL)
19746 NODE_NAME_CASE(FADD_VL)
19747 NODE_NAME_CASE(FSUB_VL)
19748 NODE_NAME_CASE(FMUL_VL)
19749 NODE_NAME_CASE(FDIV_VL)
19750 NODE_NAME_CASE(FNEG_VL)
19751 NODE_NAME_CASE(FABS_VL)
19752 NODE_NAME_CASE(FSQRT_VL)
19753 NODE_NAME_CASE(FCLASS_VL)
19754 NODE_NAME_CASE(VFMADD_VL)
19755 NODE_NAME_CASE(VFNMADD_VL)
19756 NODE_NAME_CASE(VFMSUB_VL)
19757 NODE_NAME_CASE(VFNMSUB_VL)
19758 NODE_NAME_CASE(VFWMADD_VL)
19759 NODE_NAME_CASE(VFWNMADD_VL)
19760 NODE_NAME_CASE(VFWMSUB_VL)
19761 NODE_NAME_CASE(VFWNMSUB_VL)
19762 NODE_NAME_CASE(FCOPYSIGN_VL)
19763 NODE_NAME_CASE(SMIN_VL)
19764 NODE_NAME_CASE(SMAX_VL)
19765 NODE_NAME_CASE(UMIN_VL)
19766 NODE_NAME_CASE(UMAX_VL)
19767 NODE_NAME_CASE(BITREVERSE_VL)
19768 NODE_NAME_CASE(BSWAP_VL)
19769 NODE_NAME_CASE(CTLZ_VL)
19770 NODE_NAME_CASE(CTTZ_VL)
19771 NODE_NAME_CASE(CTPOP_VL)
19772 NODE_NAME_CASE(VFMIN_VL)
19773 NODE_NAME_CASE(VFMAX_VL)
19774 NODE_NAME_CASE(MULHS_VL)
19775 NODE_NAME_CASE(MULHU_VL)
19776 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19777 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19778 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19779 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19780 NODE_NAME_CASE(VFCVT_X_F_VL)
19781 NODE_NAME_CASE(VFCVT_XU_F_VL)
19782 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19783 NODE_NAME_CASE(SINT_TO_FP_VL)
19784 NODE_NAME_CASE(UINT_TO_FP_VL)
19785 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19786 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19787 NODE_NAME_CASE(FP_EXTEND_VL)
19788 NODE_NAME_CASE(FP_ROUND_VL)
19789 NODE_NAME_CASE(STRICT_FADD_VL)
19790 NODE_NAME_CASE(STRICT_FSUB_VL)
19791 NODE_NAME_CASE(STRICT_FMUL_VL)
19792 NODE_NAME_CASE(STRICT_FDIV_VL)
19793 NODE_NAME_CASE(STRICT_FSQRT_VL)
19794 NODE_NAME_CASE(STRICT_VFMADD_VL)
19795 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19796 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19797 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19798 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19799 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19800 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19801 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19802 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19803 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19804 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19805 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19806 NODE_NAME_CASE(STRICT_FSETCC_VL)
19807 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19808 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19809 NODE_NAME_CASE(VWMUL_VL)
19810 NODE_NAME_CASE(VWMULU_VL)
19811 NODE_NAME_CASE(VWMULSU_VL)
19812 NODE_NAME_CASE(VWADD_VL)
19813 NODE_NAME_CASE(VWADDU_VL)
19814 NODE_NAME_CASE(VWSUB_VL)
19815 NODE_NAME_CASE(VWSUBU_VL)
19816 NODE_NAME_CASE(VWADD_W_VL)
19817 NODE_NAME_CASE(VWADDU_W_VL)
19818 NODE_NAME_CASE(VWSUB_W_VL)
19819 NODE_NAME_CASE(VWSUBU_W_VL)
19820 NODE_NAME_CASE(VWSLL_VL)
19821 NODE_NAME_CASE(VFWMUL_VL)
19822 NODE_NAME_CASE(VFWADD_VL)
19823 NODE_NAME_CASE(VFWSUB_VL)
19824 NODE_NAME_CASE(VFWADD_W_VL)
19825 NODE_NAME_CASE(VFWSUB_W_VL)
19826 NODE_NAME_CASE(VWMACC_VL)
19827 NODE_NAME_CASE(VWMACCU_VL)
19828 NODE_NAME_CASE(VWMACCSU_VL)
19829 NODE_NAME_CASE(VNSRL_VL)
19830 NODE_NAME_CASE(SETCC_VL)
19831 NODE_NAME_CASE(VMERGE_VL)
19832 NODE_NAME_CASE(VMAND_VL)
19833 NODE_NAME_CASE(VMOR_VL)
19834 NODE_NAME_CASE(VMXOR_VL)
19835 NODE_NAME_CASE(VMCLR_VL)
19836 NODE_NAME_CASE(VMSET_VL)
19837 NODE_NAME_CASE(VRGATHER_VX_VL)
19838 NODE_NAME_CASE(VRGATHER_VV_VL)
19839 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19840 NODE_NAME_CASE(VSEXT_VL)
19841 NODE_NAME_CASE(VZEXT_VL)
19842 NODE_NAME_CASE(VCPOP_VL)
19843 NODE_NAME_CASE(VFIRST_VL)
19844 NODE_NAME_CASE(READ_CSR)
19845 NODE_NAME_CASE(WRITE_CSR)
19846 NODE_NAME_CASE(SWAP_CSR)
19847 NODE_NAME_CASE(CZERO_EQZ)
19848 NODE_NAME_CASE(CZERO_NEZ)
19849 NODE_NAME_CASE(SF_VC_XV_SE)
19850 NODE_NAME_CASE(SF_VC_IV_SE)
19851 NODE_NAME_CASE(SF_VC_VV_SE)
19852 NODE_NAME_CASE(SF_VC_FV_SE)
19853 NODE_NAME_CASE(SF_VC_XVV_SE)
19854 NODE_NAME_CASE(SF_VC_IVV_SE)
19855 NODE_NAME_CASE(SF_VC_VVV_SE)
19856 NODE_NAME_CASE(SF_VC_FVV_SE)
19857 NODE_NAME_CASE(SF_VC_XVW_SE)
19858 NODE_NAME_CASE(SF_VC_IVW_SE)
19859 NODE_NAME_CASE(SF_VC_VVW_SE)
19860 NODE_NAME_CASE(SF_VC_FVW_SE)
19861 NODE_NAME_CASE(SF_VC_V_X_SE)
19862 NODE_NAME_CASE(SF_VC_V_I_SE)
19863 NODE_NAME_CASE(SF_VC_V_XV_SE)
19864 NODE_NAME_CASE(SF_VC_V_IV_SE)
19865 NODE_NAME_CASE(SF_VC_V_VV_SE)
19866 NODE_NAME_CASE(SF_VC_V_FV_SE)
19867 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19868 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19869 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19870 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19871 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19872 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19873 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19874 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19875 }
19876 // clang-format on
19877 return nullptr;
19878#undef NODE_NAME_CASE
19879}
19880
19881/// getConstraintType - Given a constraint letter, return the type of
19882/// constraint it is for this target.
19885 if (Constraint.size() == 1) {
19886 switch (Constraint[0]) {
19887 default:
19888 break;
19889 case 'f':
19890 return C_RegisterClass;
19891 case 'I':
19892 case 'J':
19893 case 'K':
19894 return C_Immediate;
19895 case 'A':
19896 return C_Memory;
19897 case 's':
19898 case 'S': // A symbolic address
19899 return C_Other;
19900 }
19901 } else {
19902 if (Constraint == "vr" || Constraint == "vm")
19903 return C_RegisterClass;
19904 }
19905 return TargetLowering::getConstraintType(Constraint);
19906}
19907
19908std::pair<unsigned, const TargetRegisterClass *>
19910 StringRef Constraint,
19911 MVT VT) const {
19912 // First, see if this is a constraint that directly corresponds to a RISC-V
19913 // register class.
19914 if (Constraint.size() == 1) {
19915 switch (Constraint[0]) {
19916 case 'r':
19917 // TODO: Support fixed vectors up to XLen for P extension?
19918 if (VT.isVector())
19919 break;
19920 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19921 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19922 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19923 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19924 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19925 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19926 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19927 case 'f':
19928 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19929 return std::make_pair(0U, &RISCV::FPR16RegClass);
19930 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19931 return std::make_pair(0U, &RISCV::FPR32RegClass);
19932 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19933 return std::make_pair(0U, &RISCV::FPR64RegClass);
19934 break;
19935 default:
19936 break;
19937 }
19938 } else if (Constraint == "vr") {
19939 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19940 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19941 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19942 return std::make_pair(0U, RC);
19943 }
19944 } else if (Constraint == "vm") {
19945 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19946 return std::make_pair(0U, &RISCV::VMV0RegClass);
19947 }
19948
19949 // Clang will correctly decode the usage of register name aliases into their
19950 // official names. However, other frontends like `rustc` do not. This allows
19951 // users of these frontends to use the ABI names for registers in LLVM-style
19952 // register constraints.
19953 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19954 .Case("{zero}", RISCV::X0)
19955 .Case("{ra}", RISCV::X1)
19956 .Case("{sp}", RISCV::X2)
19957 .Case("{gp}", RISCV::X3)
19958 .Case("{tp}", RISCV::X4)
19959 .Case("{t0}", RISCV::X5)
19960 .Case("{t1}", RISCV::X6)
19961 .Case("{t2}", RISCV::X7)
19962 .Cases("{s0}", "{fp}", RISCV::X8)
19963 .Case("{s1}", RISCV::X9)
19964 .Case("{a0}", RISCV::X10)
19965 .Case("{a1}", RISCV::X11)
19966 .Case("{a2}", RISCV::X12)
19967 .Case("{a3}", RISCV::X13)
19968 .Case("{a4}", RISCV::X14)
19969 .Case("{a5}", RISCV::X15)
19970 .Case("{a6}", RISCV::X16)
19971 .Case("{a7}", RISCV::X17)
19972 .Case("{s2}", RISCV::X18)
19973 .Case("{s3}", RISCV::X19)
19974 .Case("{s4}", RISCV::X20)
19975 .Case("{s5}", RISCV::X21)
19976 .Case("{s6}", RISCV::X22)
19977 .Case("{s7}", RISCV::X23)
19978 .Case("{s8}", RISCV::X24)
19979 .Case("{s9}", RISCV::X25)
19980 .Case("{s10}", RISCV::X26)
19981 .Case("{s11}", RISCV::X27)
19982 .Case("{t3}", RISCV::X28)
19983 .Case("{t4}", RISCV::X29)
19984 .Case("{t5}", RISCV::X30)
19985 .Case("{t6}", RISCV::X31)
19986 .Default(RISCV::NoRegister);
19987 if (XRegFromAlias != RISCV::NoRegister)
19988 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19989
19990 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19991 // TableGen record rather than the AsmName to choose registers for InlineAsm
19992 // constraints, plus we want to match those names to the widest floating point
19993 // register type available, manually select floating point registers here.
19994 //
19995 // The second case is the ABI name of the register, so that frontends can also
19996 // use the ABI names in register constraint lists.
19997 if (Subtarget.hasStdExtF()) {
19998 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19999 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20000 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20001 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20002 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20003 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20004 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20005 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20006 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20007 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20008 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20009 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20010 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20011 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20012 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20013 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20014 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20015 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20016 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20017 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20018 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20019 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20020 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20021 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20022 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20023 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20024 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20025 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20026 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20027 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20028 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20029 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20030 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20031 .Default(RISCV::NoRegister);
20032 if (FReg != RISCV::NoRegister) {
20033 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20034 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20035 unsigned RegNo = FReg - RISCV::F0_F;
20036 unsigned DReg = RISCV::F0_D + RegNo;
20037 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20038 }
20039 if (VT == MVT::f32 || VT == MVT::Other)
20040 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20041 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20042 unsigned RegNo = FReg - RISCV::F0_F;
20043 unsigned HReg = RISCV::F0_H + RegNo;
20044 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20045 }
20046 }
20047 }
20048
20049 if (Subtarget.hasVInstructions()) {
20050 Register VReg = StringSwitch<Register>(Constraint.lower())
20051 .Case("{v0}", RISCV::V0)
20052 .Case("{v1}", RISCV::V1)
20053 .Case("{v2}", RISCV::V2)
20054 .Case("{v3}", RISCV::V3)
20055 .Case("{v4}", RISCV::V4)
20056 .Case("{v5}", RISCV::V5)
20057 .Case("{v6}", RISCV::V6)
20058 .Case("{v7}", RISCV::V7)
20059 .Case("{v8}", RISCV::V8)
20060 .Case("{v9}", RISCV::V9)
20061 .Case("{v10}", RISCV::V10)
20062 .Case("{v11}", RISCV::V11)
20063 .Case("{v12}", RISCV::V12)
20064 .Case("{v13}", RISCV::V13)
20065 .Case("{v14}", RISCV::V14)
20066 .Case("{v15}", RISCV::V15)
20067 .Case("{v16}", RISCV::V16)
20068 .Case("{v17}", RISCV::V17)
20069 .Case("{v18}", RISCV::V18)
20070 .Case("{v19}", RISCV::V19)
20071 .Case("{v20}", RISCV::V20)
20072 .Case("{v21}", RISCV::V21)
20073 .Case("{v22}", RISCV::V22)
20074 .Case("{v23}", RISCV::V23)
20075 .Case("{v24}", RISCV::V24)
20076 .Case("{v25}", RISCV::V25)
20077 .Case("{v26}", RISCV::V26)
20078 .Case("{v27}", RISCV::V27)
20079 .Case("{v28}", RISCV::V28)
20080 .Case("{v29}", RISCV::V29)
20081 .Case("{v30}", RISCV::V30)
20082 .Case("{v31}", RISCV::V31)
20083 .Default(RISCV::NoRegister);
20084 if (VReg != RISCV::NoRegister) {
20085 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20086 return std::make_pair(VReg, &RISCV::VMRegClass);
20087 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20088 return std::make_pair(VReg, &RISCV::VRRegClass);
20089 for (const auto *RC :
20090 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20091 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20092 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20093 return std::make_pair(VReg, RC);
20094 }
20095 }
20096 }
20097 }
20098
20099 std::pair<Register, const TargetRegisterClass *> Res =
20101
20102 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20103 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20104 // Subtarget into account.
20105 if (Res.second == &RISCV::GPRF16RegClass ||
20106 Res.second == &RISCV::GPRF32RegClass ||
20107 Res.second == &RISCV::GPRPairRegClass)
20108 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20109
20110 return Res;
20111}
20112
20115 // Currently only support length 1 constraints.
20116 if (ConstraintCode.size() == 1) {
20117 switch (ConstraintCode[0]) {
20118 case 'A':
20120 default:
20121 break;
20122 }
20123 }
20124
20125 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20126}
20127
20129 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20130 SelectionDAG &DAG) const {
20131 // Currently only support length 1 constraints.
20132 if (Constraint.size() == 1) {
20133 switch (Constraint[0]) {
20134 case 'I':
20135 // Validate & create a 12-bit signed immediate operand.
20136 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20137 uint64_t CVal = C->getSExtValue();
20138 if (isInt<12>(CVal))
20139 Ops.push_back(
20140 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20141 }
20142 return;
20143 case 'J':
20144 // Validate & create an integer zero operand.
20145 if (isNullConstant(Op))
20146 Ops.push_back(
20147 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20148 return;
20149 case 'K':
20150 // Validate & create a 5-bit unsigned immediate operand.
20151 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20152 uint64_t CVal = C->getZExtValue();
20153 if (isUInt<5>(CVal))
20154 Ops.push_back(
20155 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20156 }
20157 return;
20158 case 'S':
20160 return;
20161 default:
20162 break;
20163 }
20164 }
20165 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20166}
20167
20169 Instruction *Inst,
20170 AtomicOrdering Ord) const {
20171 if (Subtarget.hasStdExtZtso()) {
20172 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20173 return Builder.CreateFence(Ord);
20174 return nullptr;
20175 }
20176
20177 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20178 return Builder.CreateFence(Ord);
20179 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20180 return Builder.CreateFence(AtomicOrdering::Release);
20181 return nullptr;
20182}
20183
20185 Instruction *Inst,
20186 AtomicOrdering Ord) const {
20187 if (Subtarget.hasStdExtZtso()) {
20188 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20189 return Builder.CreateFence(Ord);
20190 return nullptr;
20191 }
20192
20193 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20194 return Builder.CreateFence(AtomicOrdering::Acquire);
20195 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
20198 return nullptr;
20199}
20200
20203 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20204 // point operations can't be used in an lr/sc sequence without breaking the
20205 // forward-progress guarantee.
20206 if (AI->isFloatingPointOperation() ||
20210
20211 // Don't expand forced atomics, we want to have __sync libcalls instead.
20212 if (Subtarget.hasForcedAtomics())
20214
20215 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20216 if (AI->getOperation() == AtomicRMWInst::Nand) {
20217 if (Subtarget.hasStdExtZacas() &&
20218 (Size >= 32 || Subtarget.hasStdExtZabha()))
20220 if (Size < 32)
20222 }
20223
20224 if (Size < 32 && !Subtarget.hasStdExtZabha())
20226
20228}
20229
20230static Intrinsic::ID
20232 if (XLen == 32) {
20233 switch (BinOp) {
20234 default:
20235 llvm_unreachable("Unexpected AtomicRMW BinOp");
20237 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20238 case AtomicRMWInst::Add:
20239 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20240 case AtomicRMWInst::Sub:
20241 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20243 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20244 case AtomicRMWInst::Max:
20245 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20246 case AtomicRMWInst::Min:
20247 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20249 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20251 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20252 }
20253 }
20254
20255 if (XLen == 64) {
20256 switch (BinOp) {
20257 default:
20258 llvm_unreachable("Unexpected AtomicRMW BinOp");
20260 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20261 case AtomicRMWInst::Add:
20262 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20263 case AtomicRMWInst::Sub:
20264 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20266 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20267 case AtomicRMWInst::Max:
20268 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20269 case AtomicRMWInst::Min:
20270 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20272 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20274 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20275 }
20276 }
20277
20278 llvm_unreachable("Unexpected XLen\n");
20279}
20280
20282 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20283 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20284 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20285 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20286 // mask, as this produces better code than the LR/SC loop emitted by
20287 // int_riscv_masked_atomicrmw_xchg.
20288 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20289 isa<ConstantInt>(AI->getValOperand())) {
20290 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20291 if (CVal->isZero())
20292 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20293 Builder.CreateNot(Mask, "Inv_Mask"),
20294 AI->getAlign(), Ord);
20295 if (CVal->isMinusOne())
20296 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20297 AI->getAlign(), Ord);
20298 }
20299
20300 unsigned XLen = Subtarget.getXLen();
20301 Value *Ordering =
20302 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20303 Type *Tys[] = {AlignedAddr->getType()};
20304 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20305 AI->getModule(),
20307
20308 if (XLen == 64) {
20309 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20310 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20311 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20312 }
20313
20314 Value *Result;
20315
20316 // Must pass the shift amount needed to sign extend the loaded value prior
20317 // to performing a signed comparison for min/max. ShiftAmt is the number of
20318 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20319 // is the number of bits to left+right shift the value in order to
20320 // sign-extend.
20321 if (AI->getOperation() == AtomicRMWInst::Min ||
20323 const DataLayout &DL = AI->getModule()->getDataLayout();
20324 unsigned ValWidth =
20325 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20326 Value *SextShamt =
20327 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20328 Result = Builder.CreateCall(LrwOpScwLoop,
20329 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20330 } else {
20331 Result =
20332 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20333 }
20334
20335 if (XLen == 64)
20336 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20337 return Result;
20338}
20339
20342 AtomicCmpXchgInst *CI) const {
20343 // Don't expand forced atomics, we want to have __sync libcalls instead.
20344 if (Subtarget.hasForcedAtomics())
20346
20348 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20349 (Size == 8 || Size == 16))
20352}
20353
20355 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20356 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20357 unsigned XLen = Subtarget.getXLen();
20358 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20359 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20360 if (XLen == 64) {
20361 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20362 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20363 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20364 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20365 }
20366 Type *Tys[] = {AlignedAddr->getType()};
20367 Function *MaskedCmpXchg =
20368 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20369 Value *Result = Builder.CreateCall(
20370 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20371 if (XLen == 64)
20372 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20373 return Result;
20374}
20375
20377 EVT DataVT) const {
20378 // We have indexed loads for all supported EEW types. Indices are always
20379 // zero extended.
20380 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20381 isTypeLegal(Extend.getValueType()) &&
20382 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20383 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20384}
20385
20387 EVT VT) const {
20388 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20389 return false;
20390
20391 switch (FPVT.getSimpleVT().SimpleTy) {
20392 case MVT::f16:
20393 return Subtarget.hasStdExtZfhmin();
20394 case MVT::f32:
20395 return Subtarget.hasStdExtF();
20396 case MVT::f64:
20397 return Subtarget.hasStdExtD();
20398 default:
20399 return false;
20400 }
20401}
20402
20404 // If we are using the small code model, we can reduce size of jump table
20405 // entry to 4 bytes.
20406 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20409 }
20411}
20412
20414 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20415 unsigned uid, MCContext &Ctx) const {
20416 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20418 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20419}
20420
20422 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20423 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20424 // a power of two as well.
20425 // FIXME: This doesn't work for zve32, but that's already broken
20426 // elsewhere for the same reason.
20427 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20428 static_assert(RISCV::RVVBitsPerBlock == 64,
20429 "RVVBitsPerBlock changed, audit needed");
20430 return true;
20431}
20432
20434 SDValue &Offset,
20436 SelectionDAG &DAG) const {
20437 // Target does not support indexed loads.
20438 if (!Subtarget.hasVendorXTHeadMemIdx())
20439 return false;
20440
20441 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20442 return false;
20443
20444 Base = Op->getOperand(0);
20445 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20446 int64_t RHSC = RHS->getSExtValue();
20447 if (Op->getOpcode() == ISD::SUB)
20448 RHSC = -(uint64_t)RHSC;
20449
20450 // The constants that can be encoded in the THeadMemIdx instructions
20451 // are of the form (sign_extend(imm5) << imm2).
20452 bool isLegalIndexedOffset = false;
20453 for (unsigned i = 0; i < 4; i++)
20454 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20455 isLegalIndexedOffset = true;
20456 break;
20457 }
20458
20459 if (!isLegalIndexedOffset)
20460 return false;
20461
20462 Offset = Op->getOperand(1);
20463 return true;
20464 }
20465
20466 return false;
20467}
20468
20470 SDValue &Offset,
20472 SelectionDAG &DAG) const {
20473 EVT VT;
20474 SDValue Ptr;
20475 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20476 VT = LD->getMemoryVT();
20477 Ptr = LD->getBasePtr();
20478 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20479 VT = ST->getMemoryVT();
20480 Ptr = ST->getBasePtr();
20481 } else
20482 return false;
20483
20484 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20485 return false;
20486
20487 AM = ISD::PRE_INC;
20488 return true;
20489}
20490
20492 SDValue &Base,
20493 SDValue &Offset,
20495 SelectionDAG &DAG) const {
20496 EVT VT;
20497 SDValue Ptr;
20498 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20499 VT = LD->getMemoryVT();
20500 Ptr = LD->getBasePtr();
20501 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20502 VT = ST->getMemoryVT();
20503 Ptr = ST->getBasePtr();
20504 } else
20505 return false;
20506
20507 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20508 return false;
20509 // Post-indexing updates the base, so it's not a valid transform
20510 // if that's not the same as the load's pointer.
20511 if (Ptr != Base)
20512 return false;
20513
20514 AM = ISD::POST_INC;
20515 return true;
20516}
20517
20519 EVT VT) const {
20520 EVT SVT = VT.getScalarType();
20521
20522 if (!SVT.isSimple())
20523 return false;
20524
20525 switch (SVT.getSimpleVT().SimpleTy) {
20526 case MVT::f16:
20527 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20528 : Subtarget.hasStdExtZfhOrZhinx();
20529 case MVT::f32:
20530 return Subtarget.hasStdExtFOrZfinx();
20531 case MVT::f64:
20532 return Subtarget.hasStdExtDOrZdinx();
20533 default:
20534 break;
20535 }
20536
20537 return false;
20538}
20539
20541 // Zacas will use amocas.w which does not require extension.
20542 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20543}
20544
20546 const Constant *PersonalityFn) const {
20547 return RISCV::X10;
20548}
20549
20551 const Constant *PersonalityFn) const {
20552 return RISCV::X11;
20553}
20554
20556 // Return false to suppress the unnecessary extensions if the LibCall
20557 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20558 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20559 Type.getSizeInBits() < Subtarget.getXLen()))
20560 return false;
20561
20562 return true;
20563}
20564
20566 if (Subtarget.is64Bit() && Type == MVT::i32)
20567 return true;
20568
20569 return IsSigned;
20570}
20571
20573 SDValue C) const {
20574 // Check integral scalar types.
20575 const bool HasExtMOrZmmul =
20576 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20577 if (!VT.isScalarInteger())
20578 return false;
20579
20580 // Omit the optimization if the sub target has the M extension and the data
20581 // size exceeds XLen.
20582 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20583 return false;
20584
20585 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20586 // Break the MUL to a SLLI and an ADD/SUB.
20587 const APInt &Imm = ConstNode->getAPIntValue();
20588 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20589 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20590 return true;
20591
20592 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20593 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20594 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20595 (Imm - 8).isPowerOf2()))
20596 return true;
20597
20598 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20599 // a pair of LUI/ADDI.
20600 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20601 ConstNode->hasOneUse()) {
20602 APInt ImmS = Imm.ashr(Imm.countr_zero());
20603 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20604 (1 - ImmS).isPowerOf2())
20605 return true;
20606 }
20607 }
20608
20609 return false;
20610}
20611
20613 SDValue ConstNode) const {
20614 // Let the DAGCombiner decide for vectors.
20615 EVT VT = AddNode.getValueType();
20616 if (VT.isVector())
20617 return true;
20618
20619 // Let the DAGCombiner decide for larger types.
20620 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20621 return true;
20622
20623 // It is worse if c1 is simm12 while c1*c2 is not.
20624 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20625 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20626 const APInt &C1 = C1Node->getAPIntValue();
20627 const APInt &C2 = C2Node->getAPIntValue();
20628 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20629 return false;
20630
20631 // Default to true and let the DAGCombiner decide.
20632 return true;
20633}
20634
20636 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20637 unsigned *Fast) const {
20638 if (!VT.isVector()) {
20639 if (Fast)
20640 *Fast = Subtarget.enableUnalignedScalarMem();
20641 return Subtarget.enableUnalignedScalarMem();
20642 }
20643
20644 // All vector implementations must support element alignment
20645 EVT ElemVT = VT.getVectorElementType();
20646 if (Alignment >= ElemVT.getStoreSize()) {
20647 if (Fast)
20648 *Fast = 1;
20649 return true;
20650 }
20651
20652 // Note: We lower an unmasked unaligned vector access to an equally sized
20653 // e8 element type access. Given this, we effectively support all unmasked
20654 // misaligned accesses. TODO: Work through the codegen implications of
20655 // allowing such accesses to be formed, and considered fast.
20656 if (Fast)
20657 *Fast = Subtarget.enableUnalignedVectorMem();
20658 return Subtarget.enableUnalignedVectorMem();
20659}
20660
20661
20663 const AttributeList &FuncAttributes) const {
20664 if (!Subtarget.hasVInstructions())
20665 return MVT::Other;
20666
20667 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20668 return MVT::Other;
20669
20670 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20671 // has an expansion threshold, and we want the number of hardware memory
20672 // operations to correspond roughly to that threshold. LMUL>1 operations
20673 // are typically expanded linearly internally, and thus correspond to more
20674 // than one actual memory operation. Note that store merging and load
20675 // combining will typically form larger LMUL operations from the LMUL1
20676 // operations emitted here, and that's okay because combining isn't
20677 // introducing new memory operations; it's just merging existing ones.
20678 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20679 if (Op.size() < MinVLenInBytes)
20680 // TODO: Figure out short memops. For the moment, do the default thing
20681 // which ends up using scalar sequences.
20682 return MVT::Other;
20683
20684 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20685 // a large scalar constant and instead use vmv.v.x/i to do the
20686 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20687 // maximize the chance we can encode the size in the vsetvli.
20688 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20689 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20690
20691 // Do we have sufficient alignment for our preferred VT? If not, revert
20692 // to largest size allowed by our alignment criteria.
20693 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20694 Align RequiredAlign(PreferredVT.getStoreSize());
20695 if (Op.isFixedDstAlign())
20696 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20697 if (Op.isMemcpy())
20698 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20699 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20700 }
20701 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20702}
20703
20705 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20706 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20707 bool IsABIRegCopy = CC.has_value();
20708 EVT ValueVT = Val.getValueType();
20709 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20710 PartVT == MVT::f32) {
20711 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20712 // nan, and cast to f32.
20713 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20714 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20715 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20716 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20717 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20718 Parts[0] = Val;
20719 return true;
20720 }
20721
20722 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20723 LLVMContext &Context = *DAG.getContext();
20724 EVT ValueEltVT = ValueVT.getVectorElementType();
20725 EVT PartEltVT = PartVT.getVectorElementType();
20726 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20727 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20728 if (PartVTBitSize % ValueVTBitSize == 0) {
20729 assert(PartVTBitSize >= ValueVTBitSize);
20730 // If the element types are different, bitcast to the same element type of
20731 // PartVT first.
20732 // Give an example here, we want copy a <vscale x 1 x i8> value to
20733 // <vscale x 4 x i16>.
20734 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20735 // subvector, then we can bitcast to <vscale x 4 x i16>.
20736 if (ValueEltVT != PartEltVT) {
20737 if (PartVTBitSize > ValueVTBitSize) {
20738 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20739 assert(Count != 0 && "The number of element should not be zero.");
20740 EVT SameEltTypeVT =
20741 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20742 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20743 DAG.getUNDEF(SameEltTypeVT), Val,
20744 DAG.getVectorIdxConstant(0, DL));
20745 }
20746 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20747 } else {
20748 Val =
20749 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20750 Val, DAG.getVectorIdxConstant(0, DL));
20751 }
20752 Parts[0] = Val;
20753 return true;
20754 }
20755 }
20756 return false;
20757}
20758
20760 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20761 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20762 bool IsABIRegCopy = CC.has_value();
20763 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20764 PartVT == MVT::f32) {
20765 SDValue Val = Parts[0];
20766
20767 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20768 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20769 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20770 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20771 return Val;
20772 }
20773
20774 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20775 LLVMContext &Context = *DAG.getContext();
20776 SDValue Val = Parts[0];
20777 EVT ValueEltVT = ValueVT.getVectorElementType();
20778 EVT PartEltVT = PartVT.getVectorElementType();
20779 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20780 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20781 if (PartVTBitSize % ValueVTBitSize == 0) {
20782 assert(PartVTBitSize >= ValueVTBitSize);
20783 EVT SameEltTypeVT = ValueVT;
20784 // If the element types are different, convert it to the same element type
20785 // of PartVT.
20786 // Give an example here, we want copy a <vscale x 1 x i8> value from
20787 // <vscale x 4 x i16>.
20788 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20789 // then we can extract <vscale x 1 x i8>.
20790 if (ValueEltVT != PartEltVT) {
20791 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20792 assert(Count != 0 && "The number of element should not be zero.");
20793 SameEltTypeVT =
20794 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20795 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20796 }
20797 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20798 DAG.getVectorIdxConstant(0, DL));
20799 return Val;
20800 }
20801 }
20802 return SDValue();
20803}
20804
20806 // When aggressively optimizing for code size, we prefer to use a div
20807 // instruction, as it is usually smaller than the alternative sequence.
20808 // TODO: Add vector division?
20809 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20810 return OptSize && !VT.isVector();
20811}
20812
20814 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20815 // some situation.
20816 unsigned Opc = N->getOpcode();
20817 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20818 return false;
20819 return true;
20820}
20821
20822static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20823 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20824 Function *ThreadPointerFunc =
20825 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20826 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20827 IRB.CreateCall(ThreadPointerFunc), Offset);
20828}
20829
20831 // Fuchsia provides a fixed TLS slot for the stack cookie.
20832 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20833 if (Subtarget.isTargetFuchsia())
20834 return useTpOffset(IRB, -0x10);
20835
20837}
20838
20840 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20841 const DataLayout &DL) const {
20842 EVT VT = getValueType(DL, VTy);
20843 // Don't lower vlseg/vsseg for vector types that can't be split.
20844 if (!isTypeLegal(VT))
20845 return false;
20846
20848 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20849 Alignment))
20850 return false;
20851
20852 MVT ContainerVT = VT.getSimpleVT();
20853
20854 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20855 if (!Subtarget.useRVVForFixedLengthVectors())
20856 return false;
20857 // Sometimes the interleaved access pass picks up splats as interleaves of
20858 // one element. Don't lower these.
20859 if (FVTy->getNumElements() < 2)
20860 return false;
20861
20863 }
20864
20865 // Need to make sure that EMUL * NFIELDS ≤ 8
20866 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20867 if (Fractional)
20868 return true;
20869 return Factor * LMUL <= 8;
20870}
20871
20873 Align Alignment) const {
20874 if (!Subtarget.hasVInstructions())
20875 return false;
20876
20877 // Only support fixed vectors if we know the minimum vector size.
20878 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20879 return false;
20880
20881 EVT ScalarType = DataType.getScalarType();
20882 if (!isLegalElementTypeForRVV(ScalarType))
20883 return false;
20884
20885 if (!Subtarget.enableUnalignedVectorMem() &&
20886 Alignment < ScalarType.getStoreSize())
20887 return false;
20888
20889 return true;
20890}
20891
20893 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20894 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20895 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20896 Intrinsic::riscv_seg8_load};
20897
20898/// Lower an interleaved load into a vlsegN intrinsic.
20899///
20900/// E.g. Lower an interleaved load (Factor = 2):
20901/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20902/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20903/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20904///
20905/// Into:
20906/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20907/// %ptr, i64 4)
20908/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20909/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20912 ArrayRef<unsigned> Indices, unsigned Factor) const {
20913 IRBuilder<> Builder(LI);
20914
20915 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20916 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20918 LI->getModule()->getDataLayout()))
20919 return false;
20920
20921 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20922
20923 Function *VlsegNFunc =
20925 {VTy, LI->getPointerOperandType(), XLenTy});
20926
20927 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20928
20929 CallInst *VlsegN =
20930 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20931
20932 for (unsigned i = 0; i < Shuffles.size(); i++) {
20933 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20934 Shuffles[i]->replaceAllUsesWith(SubVec);
20935 }
20936
20937 return true;
20938}
20939
20941 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20942 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20943 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20944 Intrinsic::riscv_seg8_store};
20945
20946/// Lower an interleaved store into a vssegN intrinsic.
20947///
20948/// E.g. Lower an interleaved store (Factor = 3):
20949/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20950/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20951/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20952///
20953/// Into:
20954/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20955/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20956/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20957/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20958/// %ptr, i32 4)
20959///
20960/// Note that the new shufflevectors will be removed and we'll only generate one
20961/// vsseg3 instruction in CodeGen.
20963 ShuffleVectorInst *SVI,
20964 unsigned Factor) const {
20965 IRBuilder<> Builder(SI);
20966 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
20967 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20968 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
20969 ShuffleVTy->getNumElements() / Factor);
20970 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
20971 SI->getPointerAddressSpace(),
20972 SI->getModule()->getDataLayout()))
20973 return false;
20974
20975 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20976
20977 Function *VssegNFunc =
20978 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
20979 {VTy, SI->getPointerOperandType(), XLenTy});
20980
20981 auto Mask = SVI->getShuffleMask();
20983
20984 for (unsigned i = 0; i < Factor; i++) {
20985 Value *Shuffle = Builder.CreateShuffleVector(
20986 SVI->getOperand(0), SVI->getOperand(1),
20987 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
20988 Ops.push_back(Shuffle);
20989 }
20990 // This VL should be OK (should be executable in one vsseg instruction,
20991 // potentially under larger LMULs) because we checked that the fixed vector
20992 // type fits in isLegalInterleavedAccessType
20993 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20994 Ops.append({SI->getPointerOperand(), VL});
20995
20996 Builder.CreateCall(VssegNFunc, Ops);
20997
20998 return true;
20999}
21000
21002 LoadInst *LI) const {
21003 assert(LI->isSimple());
21004 IRBuilder<> Builder(LI);
21005
21006 // Only deinterleave2 supported at present.
21007 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
21008 return false;
21009
21010 unsigned Factor = 2;
21011
21012 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21013 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21014
21015 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21017 LI->getModule()->getDataLayout()))
21018 return false;
21019
21020 Function *VlsegNFunc;
21021 Value *VL;
21022 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21024
21025 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21026 VlsegNFunc = Intrinsic::getDeclaration(
21027 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21028 {ResVTy, LI->getPointerOperandType(), XLenTy});
21029 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21030 } else {
21031 static const Intrinsic::ID IntrIds[] = {
21032 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21033 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21034 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21035 Intrinsic::riscv_vlseg8};
21036
21037 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21038 {ResVTy, XLenTy});
21039 VL = Constant::getAllOnesValue(XLenTy);
21040 Ops.append(Factor, PoisonValue::get(ResVTy));
21041 }
21042
21043 Ops.append({LI->getPointerOperand(), VL});
21044
21045 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21046 DI->replaceAllUsesWith(Vlseg);
21047
21048 return true;
21049}
21050
21052 StoreInst *SI) const {
21053 assert(SI->isSimple());
21054 IRBuilder<> Builder(SI);
21055
21056 // Only interleave2 supported at present.
21057 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
21058 return false;
21059
21060 unsigned Factor = 2;
21061
21062 VectorType *VTy = cast<VectorType>(II->getType());
21063 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21064
21065 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21066 SI->getPointerAddressSpace(),
21067 SI->getModule()->getDataLayout()))
21068 return false;
21069
21070 Function *VssegNFunc;
21071 Value *VL;
21072 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21073
21074 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21075 VssegNFunc = Intrinsic::getDeclaration(
21076 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21077 {InVTy, SI->getPointerOperandType(), XLenTy});
21078 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21079 } else {
21080 static const Intrinsic::ID IntrIds[] = {
21081 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21082 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21083 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21084 Intrinsic::riscv_vsseg8};
21085
21086 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21087 {InVTy, XLenTy});
21088 VL = Constant::getAllOnesValue(XLenTy);
21089 }
21090
21091 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21092 SI->getPointerOperand(), VL});
21093
21094 return true;
21095}
21096
21100 const TargetInstrInfo *TII) const {
21101 assert(MBBI->isCall() && MBBI->getCFIType() &&
21102 "Invalid call instruction for a KCFI check");
21103 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21104 MBBI->getOpcode()));
21105
21106 MachineOperand &Target = MBBI->getOperand(0);
21107 Target.setIsRenamable(false);
21108
21109 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21110 .addReg(Target.getReg())
21111 .addImm(MBBI->getCFIType())
21112 .getInstr();
21113}
21114
21115#define GET_REGISTER_MATCHER
21116#include "RISCVGenAsmMatcher.inc"
21117
21120 const MachineFunction &MF) const {
21122 if (Reg == RISCV::NoRegister)
21124 if (Reg == RISCV::NoRegister)
21126 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21127 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21128 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21129 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21130 StringRef(RegName) + "\"."));
21131 return Reg;
21132}
21133
21136 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21137
21138 if (NontemporalInfo == nullptr)
21140
21141 // 1 for default value work as __RISCV_NTLH_ALL
21142 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21143 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21144 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21145 // 5 -> __RISCV_NTLH_ALL
21146 int NontemporalLevel = 5;
21147 const MDNode *RISCVNontemporalInfo =
21148 I.getMetadata("riscv-nontemporal-domain");
21149 if (RISCVNontemporalInfo != nullptr)
21150 NontemporalLevel =
21151 cast<ConstantInt>(
21152 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21153 ->getValue())
21154 ->getZExtValue();
21155
21156 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21157 "RISC-V target doesn't support this non-temporal domain.");
21158
21159 NontemporalLevel -= 2;
21161 if (NontemporalLevel & 0b1)
21162 Flags |= MONontemporalBit0;
21163 if (NontemporalLevel & 0b10)
21164 Flags |= MONontemporalBit1;
21165
21166 return Flags;
21167}
21168
21171
21172 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21174 TargetFlags |= (NodeFlags & MONontemporalBit0);
21175 TargetFlags |= (NodeFlags & MONontemporalBit1);
21176 return TargetFlags;
21177}
21178
21180 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21181 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21182}
21183
21185 if (VT.isScalableVector())
21186 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21187 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21188 return true;
21189 return Subtarget.hasStdExtZbb() &&
21190 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21191}
21192
21194 ISD::CondCode Cond) const {
21195 return isCtpopFast(VT) ? 0 : 1;
21196}
21197
21199
21200 // GISel support is in progress or complete for these opcodes.
21201 unsigned Op = Inst.getOpcode();
21202 if (Op == Instruction::Add || Op == Instruction::Sub ||
21203 Op == Instruction::And || Op == Instruction::Or ||
21204 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21205 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21206 return false;
21207
21208 if (Inst.getType()->isScalableTy())
21209 return true;
21210
21211 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21212 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21213 !isa<ReturnInst>(&Inst))
21214 return true;
21215
21216 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21217 if (AI->getAllocatedType()->isScalableTy())
21218 return true;
21219 }
21220
21221 return false;
21222}
21223
21224SDValue
21225RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21226 SelectionDAG &DAG,
21227 SmallVectorImpl<SDNode *> &Created) const {
21229 if (isIntDivCheap(N->getValueType(0), Attr))
21230 return SDValue(N, 0); // Lower SDIV as SDIV
21231
21232 // Only perform this transform if short forward branch opt is supported.
21233 if (!Subtarget.hasShortForwardBranchOpt())
21234 return SDValue();
21235 EVT VT = N->getValueType(0);
21236 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21237 return SDValue();
21238
21239 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21240 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21241 return SDValue();
21242 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21243}
21244
21245bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21246 EVT VT, const APInt &AndMask) const {
21247 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21248 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21250}
21251
21252unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21253 return Subtarget.getMinimumJumpTableEntries();
21254}
21255
21256// Handle single arg such as return value.
21257template <typename Arg>
21258void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21259 // This lambda determines whether an array of types are constructed by
21260 // homogeneous vector types.
21261 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21262 // First, extract the first element in the argument type.
21263 auto It = ArgList.begin();
21264 MVT FirstArgRegType = It->VT;
21265
21266 // Return if there is no return or the type needs split.
21267 if (It == ArgList.end() || It->Flags.isSplit())
21268 return false;
21269
21270 ++It;
21271
21272 // Return if this argument type contains only 1 element, or it's not a
21273 // vector type.
21274 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21275 return false;
21276
21277 // Second, check if the following elements in this argument type are all the
21278 // same.
21279 for (; It != ArgList.end(); ++It)
21280 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21281 return false;
21282
21283 return true;
21284 };
21285
21286 if (isHomogeneousScalableVectorType(ArgList)) {
21287 // Handle as tuple type
21288 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21289 } else {
21290 // Handle as normal vector type
21291 bool FirstVMaskAssigned = false;
21292 for (const auto &OutArg : ArgList) {
21293 MVT RegisterVT = OutArg.VT;
21294
21295 // Skip non-RVV register type
21296 if (!RegisterVT.isVector())
21297 continue;
21298
21299 if (RegisterVT.isFixedLengthVector())
21300 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21301
21302 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21303 RVVArgInfos.push_back({1, RegisterVT, true});
21304 FirstVMaskAssigned = true;
21305 continue;
21306 }
21307
21308 RVVArgInfos.push_back({1, RegisterVT, false});
21309 }
21310 }
21311}
21312
21313// Handle multiple args.
21314template <>
21315void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21316 const DataLayout &DL = MF->getDataLayout();
21317 const Function &F = MF->getFunction();
21318 LLVMContext &Context = F.getContext();
21319
21320 bool FirstVMaskAssigned = false;
21321 for (Type *Ty : TypeList) {
21322 StructType *STy = dyn_cast<StructType>(Ty);
21323 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21324 Type *ElemTy = STy->getTypeAtIndex(0U);
21325 EVT VT = TLI->getValueType(DL, ElemTy);
21326 MVT RegisterVT =
21327 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21328 unsigned NumRegs =
21329 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21330
21331 RVVArgInfos.push_back(
21332 {NumRegs * STy->getNumElements(), RegisterVT, false});
21333 } else {
21334 SmallVector<EVT, 4> ValueVTs;
21335 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21336
21337 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21338 ++Value) {
21339 EVT VT = ValueVTs[Value];
21340 MVT RegisterVT =
21341 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21342 unsigned NumRegs =
21343 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21344
21345 // Skip non-RVV register type
21346 if (!RegisterVT.isVector())
21347 continue;
21348
21349 if (RegisterVT.isFixedLengthVector())
21350 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21351
21352 if (!FirstVMaskAssigned &&
21353 RegisterVT.getVectorElementType() == MVT::i1) {
21354 RVVArgInfos.push_back({1, RegisterVT, true});
21355 FirstVMaskAssigned = true;
21356 --NumRegs;
21357 }
21358
21359 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21360 }
21361 }
21362 }
21363}
21364
21365void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21366 unsigned StartReg) {
21367 assert((StartReg % LMul) == 0 &&
21368 "Start register number should be multiple of lmul");
21369 const MCPhysReg *VRArrays;
21370 switch (LMul) {
21371 default:
21372 report_fatal_error("Invalid lmul");
21373 case 1:
21374 VRArrays = ArgVRs;
21375 break;
21376 case 2:
21377 VRArrays = ArgVRM2s;
21378 break;
21379 case 4:
21380 VRArrays = ArgVRM4s;
21381 break;
21382 case 8:
21383 VRArrays = ArgVRM8s;
21384 break;
21385 }
21386
21387 for (unsigned i = 0; i < NF; ++i)
21388 if (StartReg)
21389 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21390 else
21391 AllocatedPhysRegs.push_back(MCPhysReg());
21392}
21393
21394/// This function determines if each RVV argument is passed by register, if the
21395/// argument can be assigned to a VR, then give it a specific register.
21396/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21397void RVVArgDispatcher::compute() {
21398 uint32_t AssignedMap = 0;
21399 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21400 // Allocate first vector mask argument to V0.
21401 if (ArgInfo.FirstVMask) {
21402 AllocatedPhysRegs.push_back(RISCV::V0);
21403 return;
21404 }
21405
21406 unsigned RegsNeeded = divideCeil(
21407 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21408 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21409 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21410 StartReg += RegsNeeded) {
21411 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21412 if ((AssignedMap & Map) == 0) {
21413 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21414 AssignedMap |= Map;
21415 return;
21416 }
21417 }
21418
21419 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21420 };
21421
21422 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21423 allocate(RVVArgInfos[i]);
21424}
21425
21427 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21428 return AllocatedPhysRegs[CurIdx++];
21429}
21430
21432
21433#define GET_RISCVVIntrinsicsTable_IMPL
21434#include "RISCVGenSearchableTables.inc"
21435
21436} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:299
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
iterator_range< arg_iterator > args()
Definition: Function.h:842
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Argument * getArg(unsigned i) const
Definition: Function.h:836
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:243
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1132
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1128
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1345
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1376
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1161
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1277
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1278
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1361
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1365
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1234
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1239
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1375
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1273
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1274
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:885
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1194
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1358
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1227
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1362
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:994
@ STRICT_LROUND
Definition: ISDOpcodes.h:431
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1083
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1276
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1062
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:586
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1377
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1157
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1370
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1271
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1217
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1335
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1254
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1279
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1047
@ STRICT_LRINT
Definition: ISDOpcodes.h:433
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:591
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1378
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1269
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1270
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1188
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1214
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:636
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:434
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1268
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ STRICT_LLROUND
Definition: ISDOpcodes.h:432
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1366
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1152
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1076
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:580
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1491
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1491
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1478
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1412
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1574
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:560
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1507
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)