LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
717 ISD::VP_REDUCE_FMAXIMUM};
718
719 static const unsigned IntegerVecReduceOps[] = {
723
724 static const unsigned FloatingPointVecReduceOps[] = {
727
728 if (!Subtarget.is64Bit()) {
729 // We must custom-lower certain vXi64 operations on RV32 due to the vector
730 // element type being illegal.
732 MVT::i64, Custom);
733
734 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
735
736 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
737 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
738 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
739 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
740 MVT::i64, Custom);
741 }
742
743 for (MVT VT : BoolVecVTs) {
744 if (!isTypeLegal(VT))
745 continue;
746
748
749 // Mask VTs are custom-expanded into a series of standard nodes
753 VT, Custom);
754
756 Custom);
757
760 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
761 Expand);
762
763 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
764 Custom);
765
766 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
767
770 Custom);
771
773 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
774 Custom);
775
776 // RVV has native int->float & float->int conversions where the
777 // element type sizes are within one power-of-two of each other. Any
778 // wider distances between type sizes have to be lowered as sequences
779 // which progressively narrow the gap in stages.
784 VT, Custom);
786 Custom);
787
788 // Expand all extending loads to types larger than this, and truncating
789 // stores from types larger than this.
791 setTruncStoreAction(VT, OtherVT, Expand);
793 OtherVT, Expand);
794 }
795
796 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
797 ISD::VP_TRUNCATE, ISD::VP_SETCC},
798 VT, Custom);
799
802
804
805 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
806 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
807
810 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
811 }
812
813 for (MVT VT : IntVecVTs) {
814 if (!isTypeLegal(VT))
815 continue;
816
819
820 // Vectors implement MULHS/MULHU.
822
823 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
824 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
826
828 Legal);
829
831
832 // Custom-lower extensions and truncations from/to mask types.
834 VT, Custom);
835
836 // RVV has native int->float & float->int conversions where the
837 // element type sizes are within one power-of-two of each other. Any
838 // wider distances between type sizes have to be lowered as sequences
839 // which progressively narrow the gap in stages.
844 VT, Custom);
846 Custom);
849 VT, Legal);
850
851 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
852 // nodes which truncate by one power of two at a time.
854
855 // Custom-lower insert/extract operations to simplify patterns.
857 Custom);
858
859 // Custom-lower reduction operations to set up the corresponding custom
860 // nodes' operands.
861 setOperationAction(IntegerVecReduceOps, VT, Custom);
862
863 setOperationAction(IntegerVPOps, VT, Custom);
864
866
868 VT, Custom);
869
871 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
872 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
873 VT, Custom);
874
877 VT, Custom);
878
881
883
885 setTruncStoreAction(VT, OtherVT, Expand);
887 OtherVT, Expand);
888 }
889
892
893 // Splice
895
896 if (Subtarget.hasStdExtZvkb()) {
898 setOperationAction(ISD::VP_BSWAP, VT, Custom);
899 } else {
900 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
902 }
903
904 if (Subtarget.hasStdExtZvbb()) {
906 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
907 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
908 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
909 VT, Custom);
910 } else {
911 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Expand);
916
917 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
918 // range of f32.
919 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
920 if (isTypeLegal(FloatVT)) {
922 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
923 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
924 VT, Custom);
925 }
926 }
927 }
928
929 // Expand various CCs to best match the RVV ISA, which natively supports UNE
930 // but no other unordered comparisons, and supports all ordered comparisons
931 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
932 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
933 // and we pattern-match those back to the "original", swapping operands once
934 // more. This way we catch both operations and both "vf" and "fv" forms with
935 // fewer patterns.
936 static const ISD::CondCode VFPCCToExpand[] = {
940 };
941
942 // TODO: support more ops.
943 static const unsigned ZvfhminPromoteOps[] = {
951
952 // TODO: support more vp ops.
953 static const unsigned ZvfhminPromoteVPOps[] = {
954 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
955 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
956 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
957 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
958 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
959 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
960 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
961 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
962 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
963
964 // Sets common operation actions on RVV floating-point vector types.
965 const auto SetCommonVFPActions = [&](MVT VT) {
967 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
968 // sizes are within one power-of-two of each other. Therefore conversions
969 // between vXf16 and vXf64 must be lowered as sequences which convert via
970 // vXf32.
973 // Custom-lower insert/extract operations to simplify patterns.
975 Custom);
976 // Expand various condition codes (explained above).
977 setCondCodeAction(VFPCCToExpand, VT, Expand);
978
981
985 VT, Custom);
986
987 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
988
989 // Expand FP operations that need libcalls.
1001
1003
1005
1007 VT, Custom);
1008
1010 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1011 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1012 VT, Custom);
1013
1016
1019 VT, Custom);
1020
1023
1025
1026 setOperationAction(FloatingPointVPOps, VT, Custom);
1027
1029 Custom);
1032 VT, Legal);
1037 VT, Custom);
1038 };
1039
1040 // Sets common extload/truncstore actions on RVV floating-point vector
1041 // types.
1042 const auto SetCommonVFPExtLoadTruncStoreActions =
1043 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1044 for (auto SmallVT : SmallerVTs) {
1045 setTruncStoreAction(VT, SmallVT, Expand);
1046 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1047 }
1048 };
1049
1050 if (Subtarget.hasVInstructionsF16()) {
1051 for (MVT VT : F16VecVTs) {
1052 if (!isTypeLegal(VT))
1053 continue;
1054 SetCommonVFPActions(VT);
1055 }
1056 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1057 for (MVT VT : F16VecVTs) {
1058 if (!isTypeLegal(VT))
1059 continue;
1062 Custom);
1063 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1064 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1065 Custom);
1068 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1069 VT, Custom);
1072 VT, Custom);
1073 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1075 // load/store
1077
1078 // Custom split nxv32f16 since nxv32f32 if not legal.
1079 if (VT == MVT::nxv32f16) {
1080 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1081 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1082 continue;
1083 }
1084 // Add more promote ops.
1085 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1086 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1087 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1088 }
1089 }
1090
1091 // TODO: Could we merge some code with zvfhmin?
1092 if (Subtarget.hasVInstructionsBF16()) {
1093 for (MVT VT : BF16VecVTs) {
1094 if (!isTypeLegal(VT))
1095 continue;
1097 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1099 Custom);
1102 VT, Custom);
1104 // TODO: Promote to fp32.
1105 }
1106 }
1107
1108 if (Subtarget.hasVInstructionsF32()) {
1109 for (MVT VT : F32VecVTs) {
1110 if (!isTypeLegal(VT))
1111 continue;
1112 SetCommonVFPActions(VT);
1113 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1114 }
1115 }
1116
1117 if (Subtarget.hasVInstructionsF64()) {
1118 for (MVT VT : F64VecVTs) {
1119 if (!isTypeLegal(VT))
1120 continue;
1121 SetCommonVFPActions(VT);
1122 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1123 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1124 }
1125 }
1126
1127 if (Subtarget.useRVVForFixedLengthVectors()) {
1129 if (!useRVVForFixedLengthVectorVT(VT))
1130 continue;
1131
1132 // By default everything must be expanded.
1133 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1136 setTruncStoreAction(VT, OtherVT, Expand);
1138 OtherVT, Expand);
1139 }
1140
1141 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1142 // expansion to a build_vector of 0s.
1144
1145 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1147 Custom);
1148
1150 Custom);
1151
1153 VT, Custom);
1154
1156
1158
1160
1162
1164
1166
1169 Custom);
1170
1172 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1173 Custom);
1174
1176 {
1185 },
1186 VT, Custom);
1188 Custom);
1189
1191
1192 // Operations below are different for between masks and other vectors.
1193 if (VT.getVectorElementType() == MVT::i1) {
1194 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1195 ISD::OR, ISD::XOR},
1196 VT, Custom);
1197
1198 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1199 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1200 VT, Custom);
1201
1202 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1203 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1204 continue;
1205 }
1206
1207 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1208 // it before type legalization for i64 vectors on RV32. It will then be
1209 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1210 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1211 // improvements first.
1212 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1215 }
1216
1219
1220 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1221 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1222 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1223 ISD::VP_SCATTER},
1224 VT, Custom);
1225
1229 VT, Custom);
1230
1233
1235
1236 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1237 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1239
1242 VT, Custom);
1243
1246
1249
1250 // Custom-lower reduction operations to set up the corresponding custom
1251 // nodes' operands.
1255 VT, Custom);
1256
1257 setOperationAction(IntegerVPOps, VT, Custom);
1258
1259 if (Subtarget.hasStdExtZvkb())
1261
1262 if (Subtarget.hasStdExtZvbb()) {
1265 VT, Custom);
1266 } else {
1267 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1268 // range of f32.
1269 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1270 if (isTypeLegal(FloatVT))
1273 Custom);
1274 }
1275 }
1276
1278 // There are no extending loads or truncating stores.
1279 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1280 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1281 setTruncStoreAction(VT, InnerVT, Expand);
1282 }
1283
1284 if (!useRVVForFixedLengthVectorVT(VT))
1285 continue;
1286
1287 // By default everything must be expanded.
1288 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1290
1291 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1292 // expansion to a build_vector of 0s.
1294
1295 if (VT.getVectorElementType() == MVT::f16 &&
1296 !Subtarget.hasVInstructionsF16()) {
1299 Custom);
1300 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1302 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1303 Custom);
1305 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1306 VT, Custom);
1309 VT, Custom);
1312 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313 // Don't promote f16 vector operations to f32 if f32 vector type is
1314 // not legal.
1315 // TODO: could split the f16 vector into two vectors and do promotion.
1316 if (!isTypeLegal(F32VecVT))
1317 continue;
1318 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1319 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1320 continue;
1321 }
1322
1323 if (VT.getVectorElementType() == MVT::bf16) {
1325 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1327 Custom);
1330 VT, Custom);
1332 // TODO: Promote to fp32.
1333 continue;
1334 }
1335
1336 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1338 Custom);
1339
1343 VT, Custom);
1344
1347 VT, Custom);
1348
1349 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1350 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1352 ISD::VP_SCATTER},
1353 VT, Custom);
1354
1359 VT, Custom);
1360
1362
1365 VT, Custom);
1366
1367 setCondCodeAction(VFPCCToExpand, VT, Expand);
1368
1372
1374
1375 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1376
1377 setOperationAction(FloatingPointVPOps, VT, Custom);
1378
1380 Custom);
1387 VT, Custom);
1388 }
1389
1390 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1391 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1392 Custom);
1393 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1395 if (Subtarget.hasStdExtFOrZfinx())
1397 if (Subtarget.hasStdExtDOrZdinx())
1399 }
1400 }
1401
1402 if (Subtarget.hasStdExtA()) {
1404 if (RV64LegalI32 && Subtarget.is64Bit())
1406 }
1407
1408 if (Subtarget.hasForcedAtomics()) {
1409 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1415 XLenVT, LibCall);
1416 }
1417
1418 if (Subtarget.hasVendorXTHeadMemIdx()) {
1419 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1420 setIndexedLoadAction(im, MVT::i8, Legal);
1421 setIndexedStoreAction(im, MVT::i8, Legal);
1422 setIndexedLoadAction(im, MVT::i16, Legal);
1423 setIndexedStoreAction(im, MVT::i16, Legal);
1424 setIndexedLoadAction(im, MVT::i32, Legal);
1425 setIndexedStoreAction(im, MVT::i32, Legal);
1426
1427 if (Subtarget.is64Bit()) {
1428 setIndexedLoadAction(im, MVT::i64, Legal);
1429 setIndexedStoreAction(im, MVT::i64, Legal);
1430 }
1431 }
1432 }
1433
1434 // Function alignments.
1435 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1436 setMinFunctionAlignment(FunctionAlignment);
1437 // Set preferred alignments.
1440
1444 if (Subtarget.is64Bit())
1446
1447 if (Subtarget.hasStdExtFOrZfinx())
1449
1450 if (Subtarget.hasStdExtZbb())
1452
1453 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1455
1456 if (Subtarget.hasStdExtZbkb())
1458 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1460 if (Subtarget.hasStdExtFOrZfinx())
1463 if (Subtarget.hasVInstructions())
1465 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1468 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1471 if (Subtarget.hasVendorXTHeadMemPair())
1473 if (Subtarget.useRVVForFixedLengthVectors())
1475
1476 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1477 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1478
1479 // Disable strict node mutation.
1480 IsStrictFPEnabled = true;
1481}
1482
1484 LLVMContext &Context,
1485 EVT VT) const {
1486 if (!VT.isVector())
1487 return getPointerTy(DL);
1488 if (Subtarget.hasVInstructions() &&
1489 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1490 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1492}
1493
1494MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1495 return Subtarget.getXLenVT();
1496}
1497
1498// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1499bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1500 unsigned VF,
1501 bool IsScalable) const {
1502 if (!Subtarget.hasVInstructions())
1503 return true;
1504
1505 if (!IsScalable)
1506 return true;
1507
1508 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1509 return true;
1510
1511 // Don't allow VF=1 if those types are't legal.
1512 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1513 return true;
1514
1515 // VLEN=32 support is incomplete.
1516 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1517 return true;
1518
1519 // The maximum VF is for the smallest element width with LMUL=8.
1520 // VF must be a power of 2.
1521 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1522 return VF > MaxVF || !isPowerOf2_32(VF);
1523}
1524
1525bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const {
1526 return !Subtarget.hasVInstructions() ||
1527 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1528}
1529
1531 const CallInst &I,
1532 MachineFunction &MF,
1533 unsigned Intrinsic) const {
1534 auto &DL = I.getModule()->getDataLayout();
1535
1536 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1537 bool IsUnitStrided, bool UsePtrVal = false) {
1539 // We can't use ptrVal if the intrinsic can access memory before the
1540 // pointer. This means we can't use it for strided or indexed intrinsics.
1541 if (UsePtrVal)
1542 Info.ptrVal = I.getArgOperand(PtrOp);
1543 else
1544 Info.fallbackAddressSpace =
1545 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1546 Type *MemTy;
1547 if (IsStore) {
1548 // Store value is the first operand.
1549 MemTy = I.getArgOperand(0)->getType();
1550 } else {
1551 // Use return type. If it's segment load, return type is a struct.
1552 MemTy = I.getType();
1553 if (MemTy->isStructTy())
1554 MemTy = MemTy->getStructElementType(0);
1555 }
1556 if (!IsUnitStrided)
1557 MemTy = MemTy->getScalarType();
1558
1559 Info.memVT = getValueType(DL, MemTy);
1560 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1562 Info.flags |=
1564 return true;
1565 };
1566
1567 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1569
1571 switch (Intrinsic) {
1572 default:
1573 return false;
1574 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1575 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1576 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1577 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1578 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1579 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1580 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1581 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1582 case Intrinsic::riscv_masked_cmpxchg_i32:
1584 Info.memVT = MVT::i32;
1585 Info.ptrVal = I.getArgOperand(0);
1586 Info.offset = 0;
1587 Info.align = Align(4);
1590 return true;
1591 case Intrinsic::riscv_masked_strided_load:
1592 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1593 /*IsUnitStrided*/ false);
1594 case Intrinsic::riscv_masked_strided_store:
1595 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1596 /*IsUnitStrided*/ false);
1597 case Intrinsic::riscv_seg2_load:
1598 case Intrinsic::riscv_seg3_load:
1599 case Intrinsic::riscv_seg4_load:
1600 case Intrinsic::riscv_seg5_load:
1601 case Intrinsic::riscv_seg6_load:
1602 case Intrinsic::riscv_seg7_load:
1603 case Intrinsic::riscv_seg8_load:
1604 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1605 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1606 case Intrinsic::riscv_seg2_store:
1607 case Intrinsic::riscv_seg3_store:
1608 case Intrinsic::riscv_seg4_store:
1609 case Intrinsic::riscv_seg5_store:
1610 case Intrinsic::riscv_seg6_store:
1611 case Intrinsic::riscv_seg7_store:
1612 case Intrinsic::riscv_seg8_store:
1613 // Operands are (vec, ..., vec, ptr, vl)
1614 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1615 /*IsStore*/ true,
1616 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1617 case Intrinsic::riscv_vle:
1618 case Intrinsic::riscv_vle_mask:
1619 case Intrinsic::riscv_vleff:
1620 case Intrinsic::riscv_vleff_mask:
1621 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1622 /*IsStore*/ false,
1623 /*IsUnitStrided*/ true,
1624 /*UsePtrVal*/ true);
1625 case Intrinsic::riscv_vse:
1626 case Intrinsic::riscv_vse_mask:
1627 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1628 /*IsStore*/ true,
1629 /*IsUnitStrided*/ true,
1630 /*UsePtrVal*/ true);
1631 case Intrinsic::riscv_vlse:
1632 case Intrinsic::riscv_vlse_mask:
1633 case Intrinsic::riscv_vloxei:
1634 case Intrinsic::riscv_vloxei_mask:
1635 case Intrinsic::riscv_vluxei:
1636 case Intrinsic::riscv_vluxei_mask:
1637 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1638 /*IsStore*/ false,
1639 /*IsUnitStrided*/ false);
1640 case Intrinsic::riscv_vsse:
1641 case Intrinsic::riscv_vsse_mask:
1642 case Intrinsic::riscv_vsoxei:
1643 case Intrinsic::riscv_vsoxei_mask:
1644 case Intrinsic::riscv_vsuxei:
1645 case Intrinsic::riscv_vsuxei_mask:
1646 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1647 /*IsStore*/ true,
1648 /*IsUnitStrided*/ false);
1649 case Intrinsic::riscv_vlseg2:
1650 case Intrinsic::riscv_vlseg3:
1651 case Intrinsic::riscv_vlseg4:
1652 case Intrinsic::riscv_vlseg5:
1653 case Intrinsic::riscv_vlseg6:
1654 case Intrinsic::riscv_vlseg7:
1655 case Intrinsic::riscv_vlseg8:
1656 case Intrinsic::riscv_vlseg2ff:
1657 case Intrinsic::riscv_vlseg3ff:
1658 case Intrinsic::riscv_vlseg4ff:
1659 case Intrinsic::riscv_vlseg5ff:
1660 case Intrinsic::riscv_vlseg6ff:
1661 case Intrinsic::riscv_vlseg7ff:
1662 case Intrinsic::riscv_vlseg8ff:
1663 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1664 /*IsStore*/ false,
1665 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1666 case Intrinsic::riscv_vlseg2_mask:
1667 case Intrinsic::riscv_vlseg3_mask:
1668 case Intrinsic::riscv_vlseg4_mask:
1669 case Intrinsic::riscv_vlseg5_mask:
1670 case Intrinsic::riscv_vlseg6_mask:
1671 case Intrinsic::riscv_vlseg7_mask:
1672 case Intrinsic::riscv_vlseg8_mask:
1673 case Intrinsic::riscv_vlseg2ff_mask:
1674 case Intrinsic::riscv_vlseg3ff_mask:
1675 case Intrinsic::riscv_vlseg4ff_mask:
1676 case Intrinsic::riscv_vlseg5ff_mask:
1677 case Intrinsic::riscv_vlseg6ff_mask:
1678 case Intrinsic::riscv_vlseg7ff_mask:
1679 case Intrinsic::riscv_vlseg8ff_mask:
1680 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1681 /*IsStore*/ false,
1682 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1683 case Intrinsic::riscv_vlsseg2:
1684 case Intrinsic::riscv_vlsseg3:
1685 case Intrinsic::riscv_vlsseg4:
1686 case Intrinsic::riscv_vlsseg5:
1687 case Intrinsic::riscv_vlsseg6:
1688 case Intrinsic::riscv_vlsseg7:
1689 case Intrinsic::riscv_vlsseg8:
1690 case Intrinsic::riscv_vloxseg2:
1691 case Intrinsic::riscv_vloxseg3:
1692 case Intrinsic::riscv_vloxseg4:
1693 case Intrinsic::riscv_vloxseg5:
1694 case Intrinsic::riscv_vloxseg6:
1695 case Intrinsic::riscv_vloxseg7:
1696 case Intrinsic::riscv_vloxseg8:
1697 case Intrinsic::riscv_vluxseg2:
1698 case Intrinsic::riscv_vluxseg3:
1699 case Intrinsic::riscv_vluxseg4:
1700 case Intrinsic::riscv_vluxseg5:
1701 case Intrinsic::riscv_vluxseg6:
1702 case Intrinsic::riscv_vluxseg7:
1703 case Intrinsic::riscv_vluxseg8:
1704 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1705 /*IsStore*/ false,
1706 /*IsUnitStrided*/ false);
1707 case Intrinsic::riscv_vlsseg2_mask:
1708 case Intrinsic::riscv_vlsseg3_mask:
1709 case Intrinsic::riscv_vlsseg4_mask:
1710 case Intrinsic::riscv_vlsseg5_mask:
1711 case Intrinsic::riscv_vlsseg6_mask:
1712 case Intrinsic::riscv_vlsseg7_mask:
1713 case Intrinsic::riscv_vlsseg8_mask:
1714 case Intrinsic::riscv_vloxseg2_mask:
1715 case Intrinsic::riscv_vloxseg3_mask:
1716 case Intrinsic::riscv_vloxseg4_mask:
1717 case Intrinsic::riscv_vloxseg5_mask:
1718 case Intrinsic::riscv_vloxseg6_mask:
1719 case Intrinsic::riscv_vloxseg7_mask:
1720 case Intrinsic::riscv_vloxseg8_mask:
1721 case Intrinsic::riscv_vluxseg2_mask:
1722 case Intrinsic::riscv_vluxseg3_mask:
1723 case Intrinsic::riscv_vluxseg4_mask:
1724 case Intrinsic::riscv_vluxseg5_mask:
1725 case Intrinsic::riscv_vluxseg6_mask:
1726 case Intrinsic::riscv_vluxseg7_mask:
1727 case Intrinsic::riscv_vluxseg8_mask:
1728 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1729 /*IsStore*/ false,
1730 /*IsUnitStrided*/ false);
1731 case Intrinsic::riscv_vsseg2:
1732 case Intrinsic::riscv_vsseg3:
1733 case Intrinsic::riscv_vsseg4:
1734 case Intrinsic::riscv_vsseg5:
1735 case Intrinsic::riscv_vsseg6:
1736 case Intrinsic::riscv_vsseg7:
1737 case Intrinsic::riscv_vsseg8:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vsseg2_mask:
1742 case Intrinsic::riscv_vsseg3_mask:
1743 case Intrinsic::riscv_vsseg4_mask:
1744 case Intrinsic::riscv_vsseg5_mask:
1745 case Intrinsic::riscv_vsseg6_mask:
1746 case Intrinsic::riscv_vsseg7_mask:
1747 case Intrinsic::riscv_vsseg8_mask:
1748 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1749 /*IsStore*/ true,
1750 /*IsUnitStrided*/ false);
1751 case Intrinsic::riscv_vssseg2:
1752 case Intrinsic::riscv_vssseg3:
1753 case Intrinsic::riscv_vssseg4:
1754 case Intrinsic::riscv_vssseg5:
1755 case Intrinsic::riscv_vssseg6:
1756 case Intrinsic::riscv_vssseg7:
1757 case Intrinsic::riscv_vssseg8:
1758 case Intrinsic::riscv_vsoxseg2:
1759 case Intrinsic::riscv_vsoxseg3:
1760 case Intrinsic::riscv_vsoxseg4:
1761 case Intrinsic::riscv_vsoxseg5:
1762 case Intrinsic::riscv_vsoxseg6:
1763 case Intrinsic::riscv_vsoxseg7:
1764 case Intrinsic::riscv_vsoxseg8:
1765 case Intrinsic::riscv_vsuxseg2:
1766 case Intrinsic::riscv_vsuxseg3:
1767 case Intrinsic::riscv_vsuxseg4:
1768 case Intrinsic::riscv_vsuxseg5:
1769 case Intrinsic::riscv_vsuxseg6:
1770 case Intrinsic::riscv_vsuxseg7:
1771 case Intrinsic::riscv_vsuxseg8:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1773 /*IsStore*/ true,
1774 /*IsUnitStrided*/ false);
1775 case Intrinsic::riscv_vssseg2_mask:
1776 case Intrinsic::riscv_vssseg3_mask:
1777 case Intrinsic::riscv_vssseg4_mask:
1778 case Intrinsic::riscv_vssseg5_mask:
1779 case Intrinsic::riscv_vssseg6_mask:
1780 case Intrinsic::riscv_vssseg7_mask:
1781 case Intrinsic::riscv_vssseg8_mask:
1782 case Intrinsic::riscv_vsoxseg2_mask:
1783 case Intrinsic::riscv_vsoxseg3_mask:
1784 case Intrinsic::riscv_vsoxseg4_mask:
1785 case Intrinsic::riscv_vsoxseg5_mask:
1786 case Intrinsic::riscv_vsoxseg6_mask:
1787 case Intrinsic::riscv_vsoxseg7_mask:
1788 case Intrinsic::riscv_vsoxseg8_mask:
1789 case Intrinsic::riscv_vsuxseg2_mask:
1790 case Intrinsic::riscv_vsuxseg3_mask:
1791 case Intrinsic::riscv_vsuxseg4_mask:
1792 case Intrinsic::riscv_vsuxseg5_mask:
1793 case Intrinsic::riscv_vsuxseg6_mask:
1794 case Intrinsic::riscv_vsuxseg7_mask:
1795 case Intrinsic::riscv_vsuxseg8_mask:
1796 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1797 /*IsStore*/ true,
1798 /*IsUnitStrided*/ false);
1799 }
1800}
1801
1803 const AddrMode &AM, Type *Ty,
1804 unsigned AS,
1805 Instruction *I) const {
1806 // No global is ever allowed as a base.
1807 if (AM.BaseGV)
1808 return false;
1809
1810 // RVV instructions only support register addressing.
1811 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1812 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1813
1814 // Require a 12-bit signed offset.
1815 if (!isInt<12>(AM.BaseOffs))
1816 return false;
1817
1818 switch (AM.Scale) {
1819 case 0: // "r+i" or just "i", depending on HasBaseReg.
1820 break;
1821 case 1:
1822 if (!AM.HasBaseReg) // allow "r+i".
1823 break;
1824 return false; // disallow "r+r" or "r+r+i".
1825 default:
1826 return false;
1827 }
1828
1829 return true;
1830}
1831
1833 return isInt<12>(Imm);
1834}
1835
1837 return isInt<12>(Imm);
1838}
1839
1840// On RV32, 64-bit integers are split into their high and low parts and held
1841// in two different registers, so the trunc is free since the low register can
1842// just be used.
1843// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1844// isTruncateFree?
1846 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1847 return false;
1848 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1849 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1850 return (SrcBits == 64 && DestBits == 32);
1851}
1852
1854 // We consider i64->i32 free on RV64 since we have good selection of W
1855 // instructions that make promoting operations back to i64 free in many cases.
1856 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1857 !DstVT.isInteger())
1858 return false;
1859 unsigned SrcBits = SrcVT.getSizeInBits();
1860 unsigned DestBits = DstVT.getSizeInBits();
1861 return (SrcBits == 64 && DestBits == 32);
1862}
1863
1865 // Zexts are free if they can be combined with a load.
1866 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1867 // poorly with type legalization of compares preferring sext.
1868 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1869 EVT MemVT = LD->getMemoryVT();
1870 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1871 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1872 LD->getExtensionType() == ISD::ZEXTLOAD))
1873 return true;
1874 }
1875
1876 return TargetLowering::isZExtFree(Val, VT2);
1877}
1878
1880 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1881}
1882
1884 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1885}
1886
1888 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1889}
1890
1892 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1893 Subtarget.hasVendorXCVbitmanip();
1894}
1895
1897 const Instruction &AndI) const {
1898 // We expect to be able to match a bit extraction instruction if the Zbs
1899 // extension is supported and the mask is a power of two. However, we
1900 // conservatively return false if the mask would fit in an ANDI instruction,
1901 // on the basis that it's possible the sinking+duplication of the AND in
1902 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1903 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1904 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1905 return false;
1906 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1907 if (!Mask)
1908 return false;
1909 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1910}
1911
1913 EVT VT = Y.getValueType();
1914
1915 // FIXME: Support vectors once we have tests.
1916 if (VT.isVector())
1917 return false;
1918
1919 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1920 !isa<ConstantSDNode>(Y);
1921}
1922
1924 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1925 if (Subtarget.hasStdExtZbs())
1926 return X.getValueType().isScalarInteger();
1927 auto *C = dyn_cast<ConstantSDNode>(Y);
1928 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1929 if (Subtarget.hasVendorXTHeadBs())
1930 return C != nullptr;
1931 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1932 return C && C->getAPIntValue().ule(10);
1933}
1934
1936 EVT VT) const {
1937 // Only enable for rvv.
1938 if (!VT.isVector() || !Subtarget.hasVInstructions())
1939 return false;
1940
1941 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1942 return false;
1943
1944 return true;
1945}
1946
1948 Type *Ty) const {
1949 assert(Ty->isIntegerTy());
1950
1951 unsigned BitSize = Ty->getIntegerBitWidth();
1952 if (BitSize > Subtarget.getXLen())
1953 return false;
1954
1955 // Fast path, assume 32-bit immediates are cheap.
1956 int64_t Val = Imm.getSExtValue();
1957 if (isInt<32>(Val))
1958 return true;
1959
1960 // A constant pool entry may be more aligned thant he load we're trying to
1961 // replace. If we don't support unaligned scalar mem, prefer the constant
1962 // pool.
1963 // TODO: Can the caller pass down the alignment?
1964 if (!Subtarget.enableUnalignedScalarMem())
1965 return true;
1966
1967 // Prefer to keep the load if it would require many instructions.
1968 // This uses the same threshold we use for constant pools but doesn't
1969 // check useConstantPoolForLargeInts.
1970 // TODO: Should we keep the load only when we're definitely going to emit a
1971 // constant pool?
1972
1974 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1975}
1976
1980 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1981 SelectionDAG &DAG) const {
1982 // One interesting pattern that we'd want to form is 'bit extract':
1983 // ((1 >> Y) & 1) ==/!= 0
1984 // But we also need to be careful not to try to reverse that fold.
1985
1986 // Is this '((1 >> Y) & 1)'?
1987 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1988 return false; // Keep the 'bit extract' pattern.
1989
1990 // Will this be '((1 >> Y) & 1)' after the transform?
1991 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1992 return true; // Do form the 'bit extract' pattern.
1993
1994 // If 'X' is a constant, and we transform, then we will immediately
1995 // try to undo the fold, thus causing endless combine loop.
1996 // So only do the transform if X is not a constant. This matches the default
1997 // implementation of this function.
1998 return !XC;
1999}
2000
2001bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2002 switch (Opcode) {
2003 case Instruction::Add:
2004 case Instruction::Sub:
2005 case Instruction::Mul:
2006 case Instruction::And:
2007 case Instruction::Or:
2008 case Instruction::Xor:
2009 case Instruction::FAdd:
2010 case Instruction::FSub:
2011 case Instruction::FMul:
2012 case Instruction::FDiv:
2013 case Instruction::ICmp:
2014 case Instruction::FCmp:
2015 return true;
2016 case Instruction::Shl:
2017 case Instruction::LShr:
2018 case Instruction::AShr:
2019 case Instruction::UDiv:
2020 case Instruction::SDiv:
2021 case Instruction::URem:
2022 case Instruction::SRem:
2023 case Instruction::Select:
2024 return Operand == 1;
2025 default:
2026 return false;
2027 }
2028}
2029
2030
2032 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2033 return false;
2034
2035 if (canSplatOperand(I->getOpcode(), Operand))
2036 return true;
2037
2038 auto *II = dyn_cast<IntrinsicInst>(I);
2039 if (!II)
2040 return false;
2041
2042 switch (II->getIntrinsicID()) {
2043 case Intrinsic::fma:
2044 case Intrinsic::vp_fma:
2045 return Operand == 0 || Operand == 1;
2046 case Intrinsic::vp_shl:
2047 case Intrinsic::vp_lshr:
2048 case Intrinsic::vp_ashr:
2049 case Intrinsic::vp_udiv:
2050 case Intrinsic::vp_sdiv:
2051 case Intrinsic::vp_urem:
2052 case Intrinsic::vp_srem:
2053 case Intrinsic::ssub_sat:
2054 case Intrinsic::vp_ssub_sat:
2055 case Intrinsic::usub_sat:
2056 case Intrinsic::vp_usub_sat:
2057 return Operand == 1;
2058 // These intrinsics are commutative.
2059 case Intrinsic::vp_add:
2060 case Intrinsic::vp_mul:
2061 case Intrinsic::vp_and:
2062 case Intrinsic::vp_or:
2063 case Intrinsic::vp_xor:
2064 case Intrinsic::vp_fadd:
2065 case Intrinsic::vp_fmul:
2066 case Intrinsic::vp_icmp:
2067 case Intrinsic::vp_fcmp:
2068 case Intrinsic::smin:
2069 case Intrinsic::vp_smin:
2070 case Intrinsic::umin:
2071 case Intrinsic::vp_umin:
2072 case Intrinsic::smax:
2073 case Intrinsic::vp_smax:
2074 case Intrinsic::umax:
2075 case Intrinsic::vp_umax:
2076 case Intrinsic::sadd_sat:
2077 case Intrinsic::vp_sadd_sat:
2078 case Intrinsic::uadd_sat:
2079 case Intrinsic::vp_uadd_sat:
2080 // These intrinsics have 'vr' versions.
2081 case Intrinsic::vp_sub:
2082 case Intrinsic::vp_fsub:
2083 case Intrinsic::vp_fdiv:
2084 return Operand == 0 || Operand == 1;
2085 default:
2086 return false;
2087 }
2088}
2089
2090/// Check if sinking \p I's operands to I's basic block is profitable, because
2091/// the operands can be folded into a target instruction, e.g.
2092/// splats of scalars can fold into vector instructions.
2094 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2095 using namespace llvm::PatternMatch;
2096
2097 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2098 return false;
2099
2100 // Don't sink splat operands if the target prefers it. Some targets requires
2101 // S2V transfer buffers and we can run out of them copying the same value
2102 // repeatedly.
2103 // FIXME: It could still be worth doing if it would improve vector register
2104 // pressure and prevent a vector spill.
2105 if (!Subtarget.sinkSplatOperands())
2106 return false;
2107
2108 for (auto OpIdx : enumerate(I->operands())) {
2109 if (!canSplatOperand(I, OpIdx.index()))
2110 continue;
2111
2112 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2113 // Make sure we are not already sinking this operand
2114 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2115 continue;
2116
2117 // We are looking for a splat that can be sunk.
2119 m_Undef(), m_ZeroMask())))
2120 continue;
2121
2122 // Don't sink i1 splats.
2123 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2124 continue;
2125
2126 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2127 // and vector registers
2128 for (Use &U : Op->uses()) {
2129 Instruction *Insn = cast<Instruction>(U.getUser());
2130 if (!canSplatOperand(Insn, U.getOperandNo()))
2131 return false;
2132 }
2133
2134 Ops.push_back(&Op->getOperandUse(0));
2135 Ops.push_back(&OpIdx.value());
2136 }
2137 return true;
2138}
2139
2141 unsigned Opc = VecOp.getOpcode();
2142
2143 // Assume target opcodes can't be scalarized.
2144 // TODO - do we have any exceptions?
2145 if (Opc >= ISD::BUILTIN_OP_END)
2146 return false;
2147
2148 // If the vector op is not supported, try to convert to scalar.
2149 EVT VecVT = VecOp.getValueType();
2150 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2151 return true;
2152
2153 // If the vector op is supported, but the scalar op is not, the transform may
2154 // not be worthwhile.
2155 // Permit a vector binary operation can be converted to scalar binary
2156 // operation which is custom lowered with illegal type.
2157 EVT ScalarVT = VecVT.getScalarType();
2158 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2159 isOperationCustom(Opc, ScalarVT);
2160}
2161
2163 const GlobalAddressSDNode *GA) const {
2164 // In order to maximise the opportunity for common subexpression elimination,
2165 // keep a separate ADD node for the global address offset instead of folding
2166 // it in the global address node. Later peephole optimisations may choose to
2167 // fold it back in when profitable.
2168 return false;
2169}
2170
2171// Return one of the followings:
2172// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2173// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2174// positive counterpart, which will be materialized from the first returned
2175// element. The second returned element indicated that there should be a FNEG
2176// followed.
2177// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2178std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2179 EVT VT) const {
2180 if (!Subtarget.hasStdExtZfa())
2181 return std::make_pair(-1, false);
2182
2183 bool IsSupportedVT = false;
2184 if (VT == MVT::f16) {
2185 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2186 } else if (VT == MVT::f32) {
2187 IsSupportedVT = true;
2188 } else if (VT == MVT::f64) {
2189 assert(Subtarget.hasStdExtD() && "Expect D extension");
2190 IsSupportedVT = true;
2191 }
2192
2193 if (!IsSupportedVT)
2194 return std::make_pair(-1, false);
2195
2197 if (Index < 0 && Imm.isNegative())
2198 // Try the combination of its positive counterpart + FNEG.
2199 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2200 else
2201 return std::make_pair(Index, false);
2202}
2203
2205 bool ForCodeSize) const {
2206 bool IsLegalVT = false;
2207 if (VT == MVT::f16)
2208 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2209 else if (VT == MVT::f32)
2210 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2211 else if (VT == MVT::f64)
2212 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2213 else if (VT == MVT::bf16)
2214 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2215
2216 if (!IsLegalVT)
2217 return false;
2218
2219 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2220 return true;
2221
2222 // Cannot create a 64 bit floating-point immediate value for rv32.
2223 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2224 // td can handle +0.0 or -0.0 already.
2225 // -0.0 can be created by fmv + fneg.
2226 return Imm.isZero();
2227 }
2228
2229 // Special case: fmv + fneg
2230 if (Imm.isNegZero())
2231 return true;
2232
2233 // Building an integer and then converting requires a fmv at the end of
2234 // the integer sequence.
2235 const int Cost =
2236 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2237 Subtarget);
2238 return Cost <= FPImmCost;
2239}
2240
2241// TODO: This is very conservative.
2243 unsigned Index) const {
2245 return false;
2246
2247 // Only support extracting a fixed from a fixed vector for now.
2248 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2249 return false;
2250
2251 EVT EltVT = ResVT.getVectorElementType();
2252 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2253
2254 // The smallest type we can slide is i8.
2255 // TODO: We can extract index 0 from a mask vector without a slide.
2256 if (EltVT == MVT::i1)
2257 return false;
2258
2259 unsigned ResElts = ResVT.getVectorNumElements();
2260 unsigned SrcElts = SrcVT.getVectorNumElements();
2261
2262 unsigned MinVLen = Subtarget.getRealMinVLen();
2263 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2264
2265 // If we're extracting only data from the first VLEN bits of the source
2266 // then we can always do this with an m1 vslidedown.vx. Restricting the
2267 // Index ensures we can use a vslidedown.vi.
2268 // TODO: We can generalize this when the exact VLEN is known.
2269 if (Index + ResElts <= MinVLMAX && Index < 31)
2270 return true;
2271
2272 // Convervatively only handle extracting half of a vector.
2273 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2274 // a cheap extract. However, this case is important in practice for
2275 // shuffled extracts of longer vectors. How resolve?
2276 if ((ResElts * 2) != SrcElts)
2277 return false;
2278
2279 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2280 // cheap.
2281 if (Index >= 32)
2282 return false;
2283
2284 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2285 // the upper half of a vector until we have more test coverage.
2286 return Index == 0 || Index == ResElts;
2287}
2288
2291 EVT VT) const {
2292 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2293 // We might still end up using a GPR but that will be decided based on ABI.
2294 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2295 !Subtarget.hasStdExtZfhminOrZhinxmin())
2296 return MVT::f32;
2297
2299
2300 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2301 return MVT::i64;
2302
2303 return PartVT;
2304}
2305
2308 EVT VT) const {
2309 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2310 // We might still end up using a GPR but that will be decided based on ABI.
2311 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2312 !Subtarget.hasStdExtZfhminOrZhinxmin())
2313 return 1;
2314
2316}
2317
2319 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2320 unsigned &NumIntermediates, MVT &RegisterVT) const {
2322 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2323
2324 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2325 IntermediateVT = MVT::i64;
2326
2327 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2328 RegisterVT = MVT::i64;
2329
2330 return NumRegs;
2331}
2332
2333// Changes the condition code and swaps operands if necessary, so the SetCC
2334// operation matches one of the comparisons supported directly by branches
2335// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2336// with 1/-1.
2337static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2338 ISD::CondCode &CC, SelectionDAG &DAG) {
2339 // If this is a single bit test that can't be handled by ANDI, shift the
2340 // bit to be tested to the MSB and perform a signed compare with 0.
2341 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2342 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2343 isa<ConstantSDNode>(LHS.getOperand(1))) {
2344 uint64_t Mask = LHS.getConstantOperandVal(1);
2345 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2346 unsigned ShAmt = 0;
2347 if (isPowerOf2_64(Mask)) {
2349 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2350 } else {
2351 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2352 }
2353
2354 LHS = LHS.getOperand(0);
2355 if (ShAmt != 0)
2356 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2357 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2358 return;
2359 }
2360 }
2361
2362 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2363 int64_t C = RHSC->getSExtValue();
2364 switch (CC) {
2365 default: break;
2366 case ISD::SETGT:
2367 // Convert X > -1 to X >= 0.
2368 if (C == -1) {
2369 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2370 CC = ISD::SETGE;
2371 return;
2372 }
2373 break;
2374 case ISD::SETLT:
2375 // Convert X < 1 to 0 >= X.
2376 if (C == 1) {
2377 RHS = LHS;
2378 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2379 CC = ISD::SETGE;
2380 return;
2381 }
2382 break;
2383 }
2384 }
2385
2386 switch (CC) {
2387 default:
2388 break;
2389 case ISD::SETGT:
2390 case ISD::SETLE:
2391 case ISD::SETUGT:
2392 case ISD::SETULE:
2394 std::swap(LHS, RHS);
2395 break;
2396 }
2397}
2398
2400 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2401 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2402 if (VT.getVectorElementType() == MVT::i1)
2403 KnownSize *= 8;
2404
2405 switch (KnownSize) {
2406 default:
2407 llvm_unreachable("Invalid LMUL.");
2408 case 8:
2410 case 16:
2412 case 32:
2414 case 64:
2416 case 128:
2418 case 256:
2420 case 512:
2422 }
2423}
2424
2426 switch (LMul) {
2427 default:
2428 llvm_unreachable("Invalid LMUL.");
2433 return RISCV::VRRegClassID;
2435 return RISCV::VRM2RegClassID;
2437 return RISCV::VRM4RegClassID;
2439 return RISCV::VRM8RegClassID;
2440 }
2441}
2442
2444 RISCVII::VLMUL LMUL = getLMUL(VT);
2445 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2446 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2447 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2448 LMUL == RISCVII::VLMUL::LMUL_1) {
2449 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2450 "Unexpected subreg numbering");
2451 return RISCV::sub_vrm1_0 + Index;
2452 }
2453 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2454 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2455 "Unexpected subreg numbering");
2456 return RISCV::sub_vrm2_0 + Index;
2457 }
2458 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2459 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2460 "Unexpected subreg numbering");
2461 return RISCV::sub_vrm4_0 + Index;
2462 }
2463 llvm_unreachable("Invalid vector type.");
2464}
2465
2467 if (VT.getVectorElementType() == MVT::i1)
2468 return RISCV::VRRegClassID;
2469 return getRegClassIDForLMUL(getLMUL(VT));
2470}
2471
2472// Attempt to decompose a subvector insert/extract between VecVT and
2473// SubVecVT via subregister indices. Returns the subregister index that
2474// can perform the subvector insert/extract with the given element index, as
2475// well as the index corresponding to any leftover subvectors that must be
2476// further inserted/extracted within the register class for SubVecVT.
2477std::pair<unsigned, unsigned>
2479 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2480 const RISCVRegisterInfo *TRI) {
2481 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2482 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2483 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2484 "Register classes not ordered");
2485 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2486 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2487 // Try to compose a subregister index that takes us from the incoming
2488 // LMUL>1 register class down to the outgoing one. At each step we half
2489 // the LMUL:
2490 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2491 // Note that this is not guaranteed to find a subregister index, such as
2492 // when we are extracting from one VR type to another.
2493 unsigned SubRegIdx = RISCV::NoSubRegister;
2494 for (const unsigned RCID :
2495 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2496 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2497 VecVT = VecVT.getHalfNumVectorElementsVT();
2498 bool IsHi =
2499 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2500 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2501 getSubregIndexByMVT(VecVT, IsHi));
2502 if (IsHi)
2503 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2504 }
2505 return {SubRegIdx, InsertExtractIdx};
2506}
2507
2508// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2509// stores for those types.
2510bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2511 return !Subtarget.useRVVForFixedLengthVectors() ||
2512 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2513}
2514
2516 if (!ScalarTy.isSimple())
2517 return false;
2518 switch (ScalarTy.getSimpleVT().SimpleTy) {
2519 case MVT::iPTR:
2520 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2521 case MVT::i8:
2522 case MVT::i16:
2523 case MVT::i32:
2524 return true;
2525 case MVT::i64:
2526 return Subtarget.hasVInstructionsI64();
2527 case MVT::f16:
2528 return Subtarget.hasVInstructionsF16();
2529 case MVT::f32:
2530 return Subtarget.hasVInstructionsF32();
2531 case MVT::f64:
2532 return Subtarget.hasVInstructionsF64();
2533 default:
2534 return false;
2535 }
2536}
2537
2538
2539unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2540 return NumRepeatedDivisors;
2541}
2542
2544 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2545 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2546 "Unexpected opcode");
2547 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2548 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2550 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2551 if (!II)
2552 return SDValue();
2553 return Op.getOperand(II->VLOperand + 1 + HasChain);
2554}
2555
2557 const RISCVSubtarget &Subtarget) {
2558 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2559 if (!Subtarget.useRVVForFixedLengthVectors())
2560 return false;
2561
2562 // We only support a set of vector types with a consistent maximum fixed size
2563 // across all supported vector element types to avoid legalization issues.
2564 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2565 // fixed-length vector type we support is 1024 bytes.
2566 if (VT.getFixedSizeInBits() > 1024 * 8)
2567 return false;
2568
2569 unsigned MinVLen = Subtarget.getRealMinVLen();
2570
2571 MVT EltVT = VT.getVectorElementType();
2572
2573 // Don't use RVV for vectors we cannot scalarize if required.
2574 switch (EltVT.SimpleTy) {
2575 // i1 is supported but has different rules.
2576 default:
2577 return false;
2578 case MVT::i1:
2579 // Masks can only use a single register.
2580 if (VT.getVectorNumElements() > MinVLen)
2581 return false;
2582 MinVLen /= 8;
2583 break;
2584 case MVT::i8:
2585 case MVT::i16:
2586 case MVT::i32:
2587 break;
2588 case MVT::i64:
2589 if (!Subtarget.hasVInstructionsI64())
2590 return false;
2591 break;
2592 case MVT::f16:
2593 if (!Subtarget.hasVInstructionsF16Minimal())
2594 return false;
2595 break;
2596 case MVT::bf16:
2597 if (!Subtarget.hasVInstructionsBF16())
2598 return false;
2599 break;
2600 case MVT::f32:
2601 if (!Subtarget.hasVInstructionsF32())
2602 return false;
2603 break;
2604 case MVT::f64:
2605 if (!Subtarget.hasVInstructionsF64())
2606 return false;
2607 break;
2608 }
2609
2610 // Reject elements larger than ELEN.
2611 if (EltVT.getSizeInBits() > Subtarget.getELen())
2612 return false;
2613
2614 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2615 // Don't use RVV for types that don't fit.
2616 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2617 return false;
2618
2619 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2620 // the base fixed length RVV support in place.
2621 if (!VT.isPow2VectorType())
2622 return false;
2623
2624 return true;
2625}
2626
2627bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2628 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2629}
2630
2631// Return the largest legal scalable vector type that matches VT's element type.
2633 const RISCVSubtarget &Subtarget) {
2634 // This may be called before legal types are setup.
2635 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2636 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2637 "Expected legal fixed length vector!");
2638
2639 unsigned MinVLen = Subtarget.getRealMinVLen();
2640 unsigned MaxELen = Subtarget.getELen();
2641
2642 MVT EltVT = VT.getVectorElementType();
2643 switch (EltVT.SimpleTy) {
2644 default:
2645 llvm_unreachable("unexpected element type for RVV container");
2646 case MVT::i1:
2647 case MVT::i8:
2648 case MVT::i16:
2649 case MVT::i32:
2650 case MVT::i64:
2651 case MVT::bf16:
2652 case MVT::f16:
2653 case MVT::f32:
2654 case MVT::f64: {
2655 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2656 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2657 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2658 unsigned NumElts =
2660 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2661 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2662 return MVT::getScalableVectorVT(EltVT, NumElts);
2663 }
2664 }
2665}
2666
2668 const RISCVSubtarget &Subtarget) {
2670 Subtarget);
2671}
2672
2674 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2675}
2676
2677// Grow V to consume an entire RVV register.
2679 const RISCVSubtarget &Subtarget) {
2680 assert(VT.isScalableVector() &&
2681 "Expected to convert into a scalable vector!");
2682 assert(V.getValueType().isFixedLengthVector() &&
2683 "Expected a fixed length vector operand!");
2684 SDLoc DL(V);
2685 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2686 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2687}
2688
2689// Shrink V so it's just big enough to maintain a VT's worth of data.
2691 const RISCVSubtarget &Subtarget) {
2693 "Expected to convert into a fixed length vector!");
2694 assert(V.getValueType().isScalableVector() &&
2695 "Expected a scalable vector operand!");
2696 SDLoc DL(V);
2697 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2698 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2699}
2700
2701/// Return the type of the mask type suitable for masking the provided
2702/// vector type. This is simply an i1 element type vector of the same
2703/// (possibly scalable) length.
2704static MVT getMaskTypeFor(MVT VecVT) {
2705 assert(VecVT.isVector());
2707 return MVT::getVectorVT(MVT::i1, EC);
2708}
2709
2710/// Creates an all ones mask suitable for masking a vector of type VecTy with
2711/// vector length VL. .
2712static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2713 SelectionDAG &DAG) {
2714 MVT MaskVT = getMaskTypeFor(VecVT);
2715 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2716}
2717
2718static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2719 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2720 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2721 // canonicalize the representation. InsertVSETVLI will pick the immediate
2722 // encoding later if profitable.
2723 const auto [MinVLMAX, MaxVLMAX] =
2724 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2725 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2726 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2727
2728 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2729}
2730
2731static std::pair<SDValue, SDValue>
2733 const RISCVSubtarget &Subtarget) {
2734 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2735 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2736 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2737 return {Mask, VL};
2738}
2739
2740static std::pair<SDValue, SDValue>
2741getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2742 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2743 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2744 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2745 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2746 return {Mask, VL};
2747}
2748
2749// Gets the two common "VL" operands: an all-ones mask and the vector length.
2750// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2751// the vector type that the fixed-length vector is contained in. Otherwise if
2752// VecVT is scalable, then ContainerVT should be the same as VecVT.
2753static std::pair<SDValue, SDValue>
2754getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2755 const RISCVSubtarget &Subtarget) {
2756 if (VecVT.isFixedLengthVector())
2757 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2758 Subtarget);
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2761}
2762
2764 SelectionDAG &DAG) const {
2765 assert(VecVT.isScalableVector() && "Expected scalable vector");
2766 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2767 VecVT.getVectorElementCount());
2768}
2769
2770std::pair<unsigned, unsigned>
2772 const RISCVSubtarget &Subtarget) {
2773 assert(VecVT.isScalableVector() && "Expected scalable vector");
2774
2775 unsigned EltSize = VecVT.getScalarSizeInBits();
2776 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2777
2778 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2779 unsigned MaxVLMAX =
2780 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2781
2782 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2783 unsigned MinVLMAX =
2784 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2785
2786 return std::make_pair(MinVLMAX, MaxVLMAX);
2787}
2788
2789// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2790// of either is (currently) supported. This can get us into an infinite loop
2791// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2792// as a ..., etc.
2793// Until either (or both) of these can reliably lower any node, reporting that
2794// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2795// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2796// which is not desirable.
2798 EVT VT, unsigned DefinedValues) const {
2799 return false;
2800}
2801
2803 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2804 // implementation-defined.
2805 if (!VT.isVector())
2807 unsigned DLenFactor = Subtarget.getDLenFactor();
2808 unsigned Cost;
2809 if (VT.isScalableVector()) {
2810 unsigned LMul;
2811 bool Fractional;
2812 std::tie(LMul, Fractional) =
2814 if (Fractional)
2815 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2816 else
2817 Cost = (LMul * DLenFactor);
2818 } else {
2819 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2820 }
2821 return Cost;
2822}
2823
2824
2825/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2826/// is generally quadratic in the number of vreg implied by LMUL. Note that
2827/// operand (index and possibly mask) are handled separately.
2829 return getLMULCost(VT) * getLMULCost(VT);
2830}
2831
2832/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2833/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2834/// or may track the vrgather.vv cost. It is implementation-dependent.
2836 return getLMULCost(VT);
2837}
2838
2839/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2840/// for the type VT. (This does not cover the vslide1up or vslide1down
2841/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2842/// or may track the vrgather.vv cost. It is implementation-dependent.
2844 return getLMULCost(VT);
2845}
2846
2847/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2848/// for the type VT. (This does not cover the vslide1up or vslide1down
2849/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2856 const RISCVSubtarget &Subtarget) {
2857 // RISC-V FP-to-int conversions saturate to the destination register size, but
2858 // don't produce 0 for nan. We can use a conversion instruction and fix the
2859 // nan case with a compare and a select.
2860 SDValue Src = Op.getOperand(0);
2861
2862 MVT DstVT = Op.getSimpleValueType();
2863 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2864
2865 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2866
2867 if (!DstVT.isVector()) {
2868 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2869 // the result.
2870 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2871 Src.getValueType() == MVT::bf16) {
2872 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2873 }
2874
2875 unsigned Opc;
2876 if (SatVT == DstVT)
2877 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2878 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2880 else
2881 return SDValue();
2882 // FIXME: Support other SatVTs by clamping before or after the conversion.
2883
2884 SDLoc DL(Op);
2885 SDValue FpToInt = DAG.getNode(
2886 Opc, DL, DstVT, Src,
2888
2889 if (Opc == RISCVISD::FCVT_WU_RV64)
2890 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2891
2892 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2893 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2895 }
2896
2897 // Vectors.
2898
2899 MVT DstEltVT = DstVT.getVectorElementType();
2900 MVT SrcVT = Src.getSimpleValueType();
2901 MVT SrcEltVT = SrcVT.getVectorElementType();
2902 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2903 unsigned DstEltSize = DstEltVT.getSizeInBits();
2904
2905 // Only handle saturating to the destination type.
2906 if (SatVT != DstEltVT)
2907 return SDValue();
2908
2909 // FIXME: Don't support narrowing by more than 1 steps for now.
2910 if (SrcEltSize > (2 * DstEltSize))
2911 return SDValue();
2912
2913 MVT DstContainerVT = DstVT;
2914 MVT SrcContainerVT = SrcVT;
2915 if (DstVT.isFixedLengthVector()) {
2916 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2917 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2918 assert(DstContainerVT.getVectorElementCount() ==
2919 SrcContainerVT.getVectorElementCount() &&
2920 "Expected same element count");
2921 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2922 }
2923
2924 SDLoc DL(Op);
2925
2926 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2927
2928 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2929 {Src, Src, DAG.getCondCode(ISD::SETNE),
2930 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2931
2932 // Need to widen by more than 1 step, promote the FP type, then do a widening
2933 // convert.
2934 if (DstEltSize > (2 * SrcEltSize)) {
2935 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2936 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2937 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2938 }
2939
2940 unsigned RVVOpc =
2942 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2943
2944 SDValue SplatZero = DAG.getNode(
2945 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2946 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2947 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2948 Res, DAG.getUNDEF(DstContainerVT), VL);
2949
2950 if (DstVT.isFixedLengthVector())
2951 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2952
2953 return Res;
2954}
2955
2957 switch (Opc) {
2958 case ISD::FROUNDEVEN:
2960 case ISD::VP_FROUNDEVEN:
2961 return RISCVFPRndMode::RNE;
2962 case ISD::FTRUNC:
2963 case ISD::STRICT_FTRUNC:
2964 case ISD::VP_FROUNDTOZERO:
2965 return RISCVFPRndMode::RTZ;
2966 case ISD::FFLOOR:
2967 case ISD::STRICT_FFLOOR:
2968 case ISD::VP_FFLOOR:
2969 return RISCVFPRndMode::RDN;
2970 case ISD::FCEIL:
2971 case ISD::STRICT_FCEIL:
2972 case ISD::VP_FCEIL:
2973 return RISCVFPRndMode::RUP;
2974 case ISD::FROUND:
2975 case ISD::STRICT_FROUND:
2976 case ISD::VP_FROUND:
2977 return RISCVFPRndMode::RMM;
2978 case ISD::FRINT:
2979 return RISCVFPRndMode::DYN;
2980 }
2981
2983}
2984
2985// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2986// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2987// the integer domain and back. Taking care to avoid converting values that are
2988// nan or already correct.
2989static SDValue
2991 const RISCVSubtarget &Subtarget) {
2992 MVT VT = Op.getSimpleValueType();
2993 assert(VT.isVector() && "Unexpected type");
2994
2995 SDLoc DL(Op);
2996
2997 SDValue Src = Op.getOperand(0);
2998
2999 MVT ContainerVT = VT;
3000 if (VT.isFixedLengthVector()) {
3001 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3002 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3003 }
3004
3005 SDValue Mask, VL;
3006 if (Op->isVPOpcode()) {
3007 Mask = Op.getOperand(1);
3008 if (VT.isFixedLengthVector())
3009 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3010 Subtarget);
3011 VL = Op.getOperand(2);
3012 } else {
3013 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3014 }
3015
3016 // Freeze the source since we are increasing the number of uses.
3017 Src = DAG.getFreeze(Src);
3018
3019 // We do the conversion on the absolute value and fix the sign at the end.
3020 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3021
3022 // Determine the largest integer that can be represented exactly. This and
3023 // values larger than it don't have any fractional bits so don't need to
3024 // be converted.
3025 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3026 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3027 APFloat MaxVal = APFloat(FltSem);
3028 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3029 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3030 SDValue MaxValNode =
3031 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3032 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3033 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3034
3035 // If abs(Src) was larger than MaxVal or nan, keep it.
3036 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3037 Mask =
3038 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3039 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3040 Mask, Mask, VL});
3041
3042 // Truncate to integer and convert back to FP.
3043 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3044 MVT XLenVT = Subtarget.getXLenVT();
3045 SDValue Truncated;
3046
3047 switch (Op.getOpcode()) {
3048 default:
3049 llvm_unreachable("Unexpected opcode");
3050 case ISD::FCEIL:
3051 case ISD::VP_FCEIL:
3052 case ISD::FFLOOR:
3053 case ISD::VP_FFLOOR:
3054 case ISD::FROUND:
3055 case ISD::FROUNDEVEN:
3056 case ISD::VP_FROUND:
3057 case ISD::VP_FROUNDEVEN:
3058 case ISD::VP_FROUNDTOZERO: {
3061 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3062 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3063 break;
3064 }
3065 case ISD::FTRUNC:
3066 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3067 Mask, VL);
3068 break;
3069 case ISD::FRINT:
3070 case ISD::VP_FRINT:
3071 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3072 break;
3073 case ISD::FNEARBYINT:
3074 case ISD::VP_FNEARBYINT:
3075 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3076 Mask, VL);
3077 break;
3078 }
3079
3080 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3081 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3082 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3083 Mask, VL);
3084
3085 // Restore the original sign so that -0.0 is preserved.
3086 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3087 Src, Src, Mask, VL);
3088
3089 if (!VT.isFixedLengthVector())
3090 return Truncated;
3091
3092 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3093}
3094
3095// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3096// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3097// qNan and coverting the new source to integer and back to FP.
3098static SDValue
3100 const RISCVSubtarget &Subtarget) {
3101 SDLoc DL(Op);
3102 MVT VT = Op.getSimpleValueType();
3103 SDValue Chain = Op.getOperand(0);
3104 SDValue Src = Op.getOperand(1);
3105
3106 MVT ContainerVT = VT;
3107 if (VT.isFixedLengthVector()) {
3108 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3109 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3110 }
3111
3112 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3113
3114 // Freeze the source since we are increasing the number of uses.
3115 Src = DAG.getFreeze(Src);
3116
3117 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3118 MVT MaskVT = Mask.getSimpleValueType();
3120 DAG.getVTList(MaskVT, MVT::Other),
3121 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3122 DAG.getUNDEF(MaskVT), Mask, VL});
3123 Chain = Unorder.getValue(1);
3125 DAG.getVTList(ContainerVT, MVT::Other),
3126 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3127 Chain = Src.getValue(1);
3128
3129 // We do the conversion on the absolute value and fix the sign at the end.
3130 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3131
3132 // Determine the largest integer that can be represented exactly. This and
3133 // values larger than it don't have any fractional bits so don't need to
3134 // be converted.
3135 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3136 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3137 APFloat MaxVal = APFloat(FltSem);
3138 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3139 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3140 SDValue MaxValNode =
3141 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3142 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3143 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3144
3145 // If abs(Src) was larger than MaxVal or nan, keep it.
3146 Mask = DAG.getNode(
3147 RISCVISD::SETCC_VL, DL, MaskVT,
3148 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3149
3150 // Truncate to integer and convert back to FP.
3151 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3152 MVT XLenVT = Subtarget.getXLenVT();
3153 SDValue Truncated;
3154
3155 switch (Op.getOpcode()) {
3156 default:
3157 llvm_unreachable("Unexpected opcode");
3158 case ISD::STRICT_FCEIL:
3159 case ISD::STRICT_FFLOOR:
3160 case ISD::STRICT_FROUND:
3164 Truncated = DAG.getNode(
3165 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3166 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3167 break;
3168 }
3169 case ISD::STRICT_FTRUNC:
3170 Truncated =
3172 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3173 break;
3176 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3177 Mask, VL);
3178 break;
3179 }
3180 Chain = Truncated.getValue(1);
3181
3182 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3183 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3184 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3185 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3186 Truncated, Mask, VL);
3187 Chain = Truncated.getValue(1);
3188 }
3189
3190 // Restore the original sign so that -0.0 is preserved.
3191 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3192 Src, Src, Mask, VL);
3193
3194 if (VT.isFixedLengthVector())
3195 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3196 return DAG.getMergeValues({Truncated, Chain}, DL);
3197}
3198
3199static SDValue
3201 const RISCVSubtarget &Subtarget) {
3202 MVT VT = Op.getSimpleValueType();
3203 if (VT.isVector())
3204 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3205
3206 if (DAG.shouldOptForSize())
3207 return SDValue();
3208
3209 SDLoc DL(Op);
3210 SDValue Src = Op.getOperand(0);
3211
3212 // Create an integer the size of the mantissa with the MSB set. This and all
3213 // values larger than it don't have any fractional bits so don't need to be
3214 // converted.
3215 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3216 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3217 APFloat MaxVal = APFloat(FltSem);
3218 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3219 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3220 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3221
3223 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3224 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3225}
3226
3227// Expand vector LRINT and LLRINT by converting to the integer domain.
3229 const RISCVSubtarget &Subtarget) {
3230 MVT VT = Op.getSimpleValueType();
3231 assert(VT.isVector() && "Unexpected type");
3232
3233 SDLoc DL(Op);
3234 SDValue Src = Op.getOperand(0);
3235 MVT ContainerVT = VT;
3236
3237 if (VT.isFixedLengthVector()) {
3238 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3239 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3240 }
3241
3242 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3243 SDValue Truncated =
3244 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3245
3246 if (!VT.isFixedLengthVector())
3247 return Truncated;
3248
3249 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3250}
3251
3252static SDValue
3254 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3255 SDValue Offset, SDValue Mask, SDValue VL,
3257 if (Merge.isUndef())
3259 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3260 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3261 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3262}
3263
3264static SDValue
3265getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3267 SDValue VL,
3269 if (Merge.isUndef())
3271 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3272 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3273 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3274}
3275
3276static MVT getLMUL1VT(MVT VT) {
3278 "Unexpected vector MVT");
3282}
3283
3287 int64_t Addend;
3288};
3289
3290static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3292 // We will use a SINT_TO_FP to materialize this constant so we should use a
3293 // signed APSInt here.
3294 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3295 // We use an arbitrary rounding mode here. If a floating-point is an exact
3296 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3297 // the rounding mode changes the output value, then it is not an exact
3298 // integer.
3300 bool IsExact;
3301 // If it is out of signed integer range, it will return an invalid operation.
3302 // If it is not an exact integer, IsExact is false.
3303 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3305 !IsExact)
3306 return std::nullopt;
3307 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3308}
3309
3310// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3311// to the (non-zero) step S and start value X. This can be then lowered as the
3312// RVV sequence (VID * S) + X, for example.
3313// The step S is represented as an integer numerator divided by a positive
3314// denominator. Note that the implementation currently only identifies
3315// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3316// cannot detect 2/3, for example.
3317// Note that this method will also match potentially unappealing index
3318// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3319// determine whether this is worth generating code for.
3320static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3321 unsigned EltSizeInBits) {
3322 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3323 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3324 return std::nullopt;
3325 bool IsInteger = Op.getValueType().isInteger();
3326
3327 std::optional<unsigned> SeqStepDenom;
3328 std::optional<int64_t> SeqStepNum, SeqAddend;
3329 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3330 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3331
3332 // First extract the ops into a list of constant integer values. This may not
3333 // be possible for floats if they're not all representable as integers.
3335 const unsigned OpSize = Op.getScalarValueSizeInBits();
3336 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3337 if (Elt.isUndef()) {
3338 Elts[Idx] = std::nullopt;
3339 continue;
3340 }
3341 if (IsInteger) {
3342 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3343 } else {
3344 auto ExactInteger =
3345 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3346 if (!ExactInteger)
3347 return std::nullopt;
3348 Elts[Idx] = *ExactInteger;
3349 }
3350 }
3351
3352 for (auto [Idx, Elt] : enumerate(Elts)) {
3353 // Assume undef elements match the sequence; we just have to be careful
3354 // when interpolating across them.
3355 if (!Elt)
3356 continue;
3357
3358 if (PrevElt) {
3359 // Calculate the step since the last non-undef element, and ensure
3360 // it's consistent across the entire sequence.
3361 unsigned IdxDiff = Idx - PrevElt->second;
3362 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3363
3364 // A zero-value value difference means that we're somewhere in the middle
3365 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3366 // step change before evaluating the sequence.
3367 if (ValDiff == 0)
3368 continue;
3369
3370 int64_t Remainder = ValDiff % IdxDiff;
3371 // Normalize the step if it's greater than 1.
3372 if (Remainder != ValDiff) {
3373 // The difference must cleanly divide the element span.
3374 if (Remainder != 0)
3375 return std::nullopt;
3376 ValDiff /= IdxDiff;
3377 IdxDiff = 1;
3378 }
3379
3380 if (!SeqStepNum)
3381 SeqStepNum = ValDiff;
3382 else if (ValDiff != SeqStepNum)
3383 return std::nullopt;
3384
3385 if (!SeqStepDenom)
3386 SeqStepDenom = IdxDiff;
3387 else if (IdxDiff != *SeqStepDenom)
3388 return std::nullopt;
3389 }
3390
3391 // Record this non-undef element for later.
3392 if (!PrevElt || PrevElt->first != *Elt)
3393 PrevElt = std::make_pair(*Elt, Idx);
3394 }
3395
3396 // We need to have logged a step for this to count as a legal index sequence.
3397 if (!SeqStepNum || !SeqStepDenom)
3398 return std::nullopt;
3399
3400 // Loop back through the sequence and validate elements we might have skipped
3401 // while waiting for a valid step. While doing this, log any sequence addend.
3402 for (auto [Idx, Elt] : enumerate(Elts)) {
3403 if (!Elt)
3404 continue;
3405 uint64_t ExpectedVal =
3406 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3407 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3408 if (!SeqAddend)
3409 SeqAddend = Addend;
3410 else if (Addend != SeqAddend)
3411 return std::nullopt;
3412 }
3413
3414 assert(SeqAddend && "Must have an addend if we have a step");
3415
3416 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3417}
3418
3419// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3420// and lower it as a VRGATHER_VX_VL from the source vector.
3421static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3422 SelectionDAG &DAG,
3423 const RISCVSubtarget &Subtarget) {
3424 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3425 return SDValue();
3426 SDValue Vec = SplatVal.getOperand(0);
3427 // Only perform this optimization on vectors of the same size for simplicity.
3428 // Don't perform this optimization for i1 vectors.
3429 // FIXME: Support i1 vectors, maybe by promoting to i8?
3430 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3431 return SDValue();
3432 SDValue Idx = SplatVal.getOperand(1);
3433 // The index must be a legal type.
3434 if (Idx.getValueType() != Subtarget.getXLenVT())
3435 return SDValue();
3436
3437 MVT ContainerVT = VT;
3438 if (VT.isFixedLengthVector()) {
3439 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3440 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3441 }
3442
3443 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3444
3445 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3446 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3447
3448 if (!VT.isFixedLengthVector())
3449 return Gather;
3450
3451 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3452}
3453
3454
3455/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3456/// which constitute a large proportion of the elements. In such cases we can
3457/// splat a vector with the dominant element and make up the shortfall with
3458/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3459/// Note that this includes vectors of 2 elements by association. The
3460/// upper-most element is the "dominant" one, allowing us to use a splat to
3461/// "insert" the upper element, and an insert of the lower element at position
3462/// 0, which improves codegen.
3464 const RISCVSubtarget &Subtarget) {
3465 MVT VT = Op.getSimpleValueType();
3466 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3467
3468 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3469
3470 SDLoc DL(Op);
3471 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3472
3473 MVT XLenVT = Subtarget.getXLenVT();
3474 unsigned NumElts = Op.getNumOperands();
3475
3476 SDValue DominantValue;
3477 unsigned MostCommonCount = 0;
3478 DenseMap<SDValue, unsigned> ValueCounts;
3479 unsigned NumUndefElts =
3480 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3481
3482 // Track the number of scalar loads we know we'd be inserting, estimated as
3483 // any non-zero floating-point constant. Other kinds of element are either
3484 // already in registers or are materialized on demand. The threshold at which
3485 // a vector load is more desirable than several scalar materializion and
3486 // vector-insertion instructions is not known.
3487 unsigned NumScalarLoads = 0;
3488
3489 for (SDValue V : Op->op_values()) {
3490 if (V.isUndef())
3491 continue;
3492
3493 ValueCounts.insert(std::make_pair(V, 0));
3494 unsigned &Count = ValueCounts[V];
3495 if (0 == Count)
3496 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3497 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3498
3499 // Is this value dominant? In case of a tie, prefer the highest element as
3500 // it's cheaper to insert near the beginning of a vector than it is at the
3501 // end.
3502 if (++Count >= MostCommonCount) {
3503 DominantValue = V;
3504 MostCommonCount = Count;
3505 }
3506 }
3507
3508 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3509 unsigned NumDefElts = NumElts - NumUndefElts;
3510 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3511
3512 // Don't perform this optimization when optimizing for size, since
3513 // materializing elements and inserting them tends to cause code bloat.
3514 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3515 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3516 ((MostCommonCount > DominantValueCountThreshold) ||
3517 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3518 // Start by splatting the most common element.
3519 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3520
3521 DenseSet<SDValue> Processed{DominantValue};
3522
3523 // We can handle an insert into the last element (of a splat) via
3524 // v(f)slide1down. This is slightly better than the vslideup insert
3525 // lowering as it avoids the need for a vector group temporary. It
3526 // is also better than using vmerge.vx as it avoids the need to
3527 // materialize the mask in a vector register.
3528 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3529 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3530 LastOp != DominantValue) {
3531 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3532 auto OpCode =
3534 if (!VT.isFloatingPoint())
3535 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3536 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3537 LastOp, Mask, VL);
3538 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3539 Processed.insert(LastOp);
3540 }
3541
3542 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3543 for (const auto &OpIdx : enumerate(Op->ops())) {
3544 const SDValue &V = OpIdx.value();
3545 if (V.isUndef() || !Processed.insert(V).second)
3546 continue;
3547 if (ValueCounts[V] == 1) {
3548 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3549 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3550 } else {
3551 // Blend in all instances of this value using a VSELECT, using a
3552 // mask where each bit signals whether that element is the one
3553 // we're after.
3555 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3556 return DAG.getConstant(V == V1, DL, XLenVT);
3557 });
3558 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3559 DAG.getBuildVector(SelMaskTy, DL, Ops),
3560 DAG.getSplatBuildVector(VT, DL, V), Vec);
3561 }
3562 }
3563
3564 return Vec;
3565 }
3566
3567 return SDValue();
3568}
3569
3571 const RISCVSubtarget &Subtarget) {
3572 MVT VT = Op.getSimpleValueType();
3573 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3574
3575 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3576
3577 SDLoc DL(Op);
3578 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3579
3580 MVT XLenVT = Subtarget.getXLenVT();
3581 unsigned NumElts = Op.getNumOperands();
3582
3583 if (VT.getVectorElementType() == MVT::i1) {
3584 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3585 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3586 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3587 }
3588
3589 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3590 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3591 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3592 }
3593
3594 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3595 // scalar integer chunks whose bit-width depends on the number of mask
3596 // bits and XLEN.
3597 // First, determine the most appropriate scalar integer type to use. This
3598 // is at most XLenVT, but may be shrunk to a smaller vector element type
3599 // according to the size of the final vector - use i8 chunks rather than
3600 // XLenVT if we're producing a v8i1. This results in more consistent
3601 // codegen across RV32 and RV64.
3602 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3603 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3604 // If we have to use more than one INSERT_VECTOR_ELT then this
3605 // optimization is likely to increase code size; avoid peforming it in
3606 // such a case. We can use a load from a constant pool in this case.
3607 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3608 return SDValue();
3609 // Now we can create our integer vector type. Note that it may be larger
3610 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3611 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3612 MVT IntegerViaVecVT =
3613 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3614 IntegerViaVecElts);
3615
3616 uint64_t Bits = 0;
3617 unsigned BitPos = 0, IntegerEltIdx = 0;
3618 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3619
3620 for (unsigned I = 0; I < NumElts;) {
3621 SDValue V = Op.getOperand(I);
3622 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3623 Bits |= ((uint64_t)BitValue << BitPos);
3624 ++BitPos;
3625 ++I;
3626
3627 // Once we accumulate enough bits to fill our scalar type or process the
3628 // last element, insert into our vector and clear our accumulated data.
3629 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3630 if (NumViaIntegerBits <= 32)
3631 Bits = SignExtend64<32>(Bits);
3632 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3633 Elts[IntegerEltIdx] = Elt;
3634 Bits = 0;
3635 BitPos = 0;
3636 IntegerEltIdx++;
3637 }
3638 }
3639
3640 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3641
3642 if (NumElts < NumViaIntegerBits) {
3643 // If we're producing a smaller vector than our minimum legal integer
3644 // type, bitcast to the equivalent (known-legal) mask type, and extract
3645 // our final mask.
3646 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3647 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3648 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3649 DAG.getConstant(0, DL, XLenVT));
3650 } else {
3651 // Else we must have produced an integer type with the same size as the
3652 // mask type; bitcast for the final result.
3653 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3654 Vec = DAG.getBitcast(VT, Vec);
3655 }
3656
3657 return Vec;
3658 }
3659
3660 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3661 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3663 if (!VT.isFloatingPoint())
3664 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3665 Splat =
3666 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3667 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3668 }
3669
3670 // Try and match index sequences, which we can lower to the vid instruction
3671 // with optional modifications. An all-undef vector is matched by
3672 // getSplatValue, above.
3673 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3674 int64_t StepNumerator = SimpleVID->StepNumerator;
3675 unsigned StepDenominator = SimpleVID->StepDenominator;
3676 int64_t Addend = SimpleVID->Addend;
3677
3678 assert(StepNumerator != 0 && "Invalid step");
3679 bool Negate = false;
3680 int64_t SplatStepVal = StepNumerator;
3681 unsigned StepOpcode = ISD::MUL;
3682 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3683 // anyway as the shift of 63 won't fit in uimm5.
3684 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3685 isPowerOf2_64(std::abs(StepNumerator))) {
3686 Negate = StepNumerator < 0;
3687 StepOpcode = ISD::SHL;
3688 SplatStepVal = Log2_64(std::abs(StepNumerator));
3689 }
3690
3691 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3692 // threshold since it's the immediate value many RVV instructions accept.
3693 // There is no vmul.vi instruction so ensure multiply constant can fit in
3694 // a single addi instruction.
3695 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3696 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3697 isPowerOf2_32(StepDenominator) &&
3698 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3699 MVT VIDVT =
3701 MVT VIDContainerVT =
3702 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3703 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3704 // Convert right out of the scalable type so we can use standard ISD
3705 // nodes for the rest of the computation. If we used scalable types with
3706 // these, we'd lose the fixed-length vector info and generate worse
3707 // vsetvli code.
3708 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3709 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3710 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3711 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3712 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3713 }
3714 if (StepDenominator != 1) {
3715 SDValue SplatStep =
3716 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3717 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3718 }
3719 if (Addend != 0 || Negate) {
3720 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3721 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3722 VID);
3723 }
3724 if (VT.isFloatingPoint()) {
3725 // TODO: Use vfwcvt to reduce register pressure.
3726 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3727 }
3728 return VID;
3729 }
3730 }
3731
3732 // For very small build_vectors, use a single scalar insert of a constant.
3733 // TODO: Base this on constant rematerialization cost, not size.
3734 const unsigned EltBitSize = VT.getScalarSizeInBits();
3735 if (VT.getSizeInBits() <= 32 &&
3737 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3738 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3739 "Unexpected sequence type");
3740 // If we can use the original VL with the modified element type, this
3741 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3742 // be moved into InsertVSETVLI?
3743 unsigned ViaVecLen =
3744 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3745 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3746
3747 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3748 uint64_t SplatValue = 0;
3749 // Construct the amalgamated value at this larger vector type.
3750 for (const auto &OpIdx : enumerate(Op->op_values())) {
3751 const auto &SeqV = OpIdx.value();
3752 if (!SeqV.isUndef())
3753 SplatValue |=
3754 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3755 }
3756
3757 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3758 // achieve better constant materializion.
3759 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3760 SplatValue = SignExtend64<32>(SplatValue);
3761
3762 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3763 DAG.getUNDEF(ViaVecVT),
3764 DAG.getConstant(SplatValue, DL, XLenVT),
3765 DAG.getVectorIdxConstant(0, DL));
3766 if (ViaVecLen != 1)
3768 MVT::getVectorVT(ViaIntVT, 1), Vec,
3769 DAG.getConstant(0, DL, XLenVT));
3770 return DAG.getBitcast(VT, Vec);
3771 }
3772
3773
3774 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3775 // when re-interpreted as a vector with a larger element type. For example,
3776 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3777 // could be instead splat as
3778 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3779 // TODO: This optimization could also work on non-constant splats, but it
3780 // would require bit-manipulation instructions to construct the splat value.
3781 SmallVector<SDValue> Sequence;
3782 const auto *BV = cast<BuildVectorSDNode>(Op);
3783 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3785 BV->getRepeatedSequence(Sequence) &&
3786 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3787 unsigned SeqLen = Sequence.size();
3788 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3789 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3790 ViaIntVT == MVT::i64) &&
3791 "Unexpected sequence type");
3792
3793 // If we can use the original VL with the modified element type, this
3794 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3795 // be moved into InsertVSETVLI?
3796 const unsigned RequiredVL = NumElts / SeqLen;
3797 const unsigned ViaVecLen =
3798 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3799 NumElts : RequiredVL;
3800 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3801
3802 unsigned EltIdx = 0;
3803 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3804 uint64_t SplatValue = 0;
3805 // Construct the amalgamated value which can be splatted as this larger
3806 // vector type.
3807 for (const auto &SeqV : Sequence) {
3808 if (!SeqV.isUndef())
3809 SplatValue |=
3810 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3811 EltIdx++;
3812 }
3813
3814 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3815 // achieve better constant materializion.
3816 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3817 SplatValue = SignExtend64<32>(SplatValue);
3818
3819 // Since we can't introduce illegal i64 types at this stage, we can only
3820 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3821 // way we can use RVV instructions to splat.
3822 assert((ViaIntVT.bitsLE(XLenVT) ||
3823 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3824 "Unexpected bitcast sequence");
3825 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3826 SDValue ViaVL =
3827 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3828 MVT ViaContainerVT =
3829 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3830 SDValue Splat =
3831 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3832 DAG.getUNDEF(ViaContainerVT),
3833 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3834 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3835 if (ViaVecLen != RequiredVL)
3837 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3838 DAG.getConstant(0, DL, XLenVT));
3839 return DAG.getBitcast(VT, Splat);
3840 }
3841 }
3842
3843 // If the number of signbits allows, see if we can lower as a <N x i8>.
3844 // Our main goal here is to reduce LMUL (and thus work) required to
3845 // build the constant, but we will also narrow if the resulting
3846 // narrow vector is known to materialize cheaply.
3847 // TODO: We really should be costing the smaller vector. There are
3848 // profitable cases this misses.
3849 if (EltBitSize > 8 && VT.isInteger() &&
3850 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3851 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3852 if (EltBitSize - SignBits < 8) {
3853 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3854 DL, Op->ops());
3855 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3856 Source, DAG, Subtarget);
3857 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3858 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3859 }
3860 }
3861
3862 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3863 return Res;
3864
3865 // For constant vectors, use generic constant pool lowering. Otherwise,
3866 // we'd have to materialize constants in GPRs just to move them into the
3867 // vector.
3868 return SDValue();
3869}
3870
3872 const RISCVSubtarget &Subtarget) {
3873 MVT VT = Op.getSimpleValueType();
3874 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3875
3876 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3878 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3879
3880 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3881
3882 SDLoc DL(Op);
3883 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3884
3885 MVT XLenVT = Subtarget.getXLenVT();
3886
3887 if (VT.getVectorElementType() == MVT::i1) {
3888 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3889 // vector type, we have a legal equivalently-sized i8 type, so we can use
3890 // that.
3891 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3892 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3893
3894 SDValue WideVec;
3895 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3896 // For a splat, perform a scalar truncate before creating the wider
3897 // vector.
3898 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3899 DAG.getConstant(1, DL, Splat.getValueType()));
3900 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3901 } else {
3902 SmallVector<SDValue, 8> Ops(Op->op_values());
3903 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3904 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3905 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3906 }
3907
3908 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3909 }
3910
3911 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3912 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3913 return Gather;
3914 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3916 if (!VT.isFloatingPoint())
3917 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3918 Splat =
3919 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3920 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3921 }
3922
3923 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3924 return Res;
3925
3926 // If we're compiling for an exact VLEN value, we can split our work per
3927 // register in the register group.
3928 if (const auto VLen = Subtarget.getRealVLen();
3929 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3930 MVT ElemVT = VT.getVectorElementType();
3931 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3932 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3933 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3934 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3935 assert(M1VT == getLMUL1VT(M1VT));
3936
3937 // The following semantically builds up a fixed length concat_vector
3938 // of the component build_vectors. We eagerly lower to scalable and
3939 // insert_subvector here to avoid DAG combining it back to a large
3940 // build_vector.
3941 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3942 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3943 SDValue Vec = DAG.getUNDEF(ContainerVT);
3944 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3945 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3946 SDValue SubBV =
3947 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3948 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3949 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3950 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3951 DAG.getVectorIdxConstant(InsertIdx, DL));
3952 }
3953 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3954 }
3955
3956 // For m1 vectors, if we have non-undef values in both halves of our vector,
3957 // split the vector into low and high halves, build them separately, then
3958 // use a vselect to combine them. For long vectors, this cuts the critical
3959 // path of the vslide1down sequence in half, and gives us an opportunity
3960 // to special case each half independently. Note that we don't change the
3961 // length of the sub-vectors here, so if both fallback to the generic
3962 // vslide1down path, we should be able to fold the vselect into the final
3963 // vslidedown (for the undef tail) for the first half w/ masking.
3964 unsigned NumElts = VT.getVectorNumElements();
3965 unsigned NumUndefElts =
3966 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3967 unsigned NumDefElts = NumElts - NumUndefElts;
3968 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3969 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3970 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3971 SmallVector<SDValue> MaskVals;
3972 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3973 SubVecAOps.reserve(NumElts);
3974 SubVecBOps.reserve(NumElts);
3975 for (unsigned i = 0; i < NumElts; i++) {
3976 SDValue Elem = Op->getOperand(i);
3977 if (i < NumElts / 2) {
3978 SubVecAOps.push_back(Elem);
3979 SubVecBOps.push_back(UndefElem);
3980 } else {
3981 SubVecAOps.push_back(UndefElem);
3982 SubVecBOps.push_back(Elem);
3983 }
3984 bool SelectMaskVal = (i < NumElts / 2);
3985 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3986 }
3987 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3988 MaskVals.size() == NumElts);
3989
3990 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3991 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3992 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3993 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3994 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3995 }
3996
3997 // Cap the cost at a value linear to the number of elements in the vector.
3998 // The default lowering is to use the stack. The vector store + scalar loads
3999 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4000 // being (at least) linear in LMUL. As a result, using the vslidedown
4001 // lowering for every element ends up being VL*LMUL..
4002 // TODO: Should we be directly costing the stack alternative? Doing so might
4003 // give us a more accurate upper bound.
4004 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4005
4006 // TODO: unify with TTI getSlideCost.
4007 InstructionCost PerSlideCost = 1;
4008 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4009 default: break;
4011 PerSlideCost = 2;
4012 break;
4014 PerSlideCost = 4;
4015 break;
4017 PerSlideCost = 8;
4018 break;
4019 }
4020
4021 // TODO: Should we be using the build instseq then cost + evaluate scheme
4022 // we use for integer constants here?
4023 unsigned UndefCount = 0;
4024 for (const SDValue &V : Op->ops()) {
4025 if (V.isUndef()) {
4026 UndefCount++;
4027 continue;
4028 }
4029 if (UndefCount) {
4030 LinearBudget -= PerSlideCost;
4031 UndefCount = 0;
4032 }
4033 LinearBudget -= PerSlideCost;
4034 }
4035 if (UndefCount) {
4036 LinearBudget -= PerSlideCost;
4037 }
4038
4039 if (LinearBudget < 0)
4040 return SDValue();
4041
4042 assert((!VT.isFloatingPoint() ||
4043 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4044 "Illegal type which will result in reserved encoding");
4045
4046 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4047
4048 SDValue Vec;
4049 UndefCount = 0;
4050 for (SDValue V : Op->ops()) {
4051 if (V.isUndef()) {
4052 UndefCount++;
4053 continue;
4054 }
4055
4056 // Start our sequence with a TA splat in the hopes that hardware is able to
4057 // recognize there's no dependency on the prior value of our temporary
4058 // register.
4059 if (!Vec) {
4060 Vec = DAG.getSplatVector(VT, DL, V);
4061 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4062 UndefCount = 0;
4063 continue;
4064 }
4065
4066 if (UndefCount) {
4067 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4068 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4069 Vec, Offset, Mask, VL, Policy);
4070 UndefCount = 0;
4071 }
4072 auto OpCode =
4074 if (!VT.isFloatingPoint())
4075 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4076 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4077 V, Mask, VL);
4078 }
4079 if (UndefCount) {
4080 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4081 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4082 Vec, Offset, Mask, VL, Policy);
4083 }
4084 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4085}
4086
4087static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4089 SelectionDAG &DAG) {
4090 if (!Passthru)
4091 Passthru = DAG.getUNDEF(VT);
4092 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4093 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4094 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4095 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4096 // node in order to try and match RVV vector/scalar instructions.
4097 if ((LoC >> 31) == HiC)
4098 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4099
4100 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4101 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4102 // vlmax vsetvli or vsetivli to change the VL.
4103 // FIXME: Support larger constants?
4104 // FIXME: Support non-constant VLs by saturating?
4105 if (LoC == HiC) {
4106 SDValue NewVL;
4107 if (isAllOnesConstant(VL) ||
4108 (isa<RegisterSDNode>(VL) &&
4109 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4110 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4111 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4112 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4113
4114 if (NewVL) {
4115 MVT InterVT =
4116 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4117 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4118 DAG.getUNDEF(InterVT), Lo, NewVL);
4119 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4120 }
4121 }
4122 }
4123
4124 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4125 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4126 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4127 Hi.getConstantOperandVal(1) == 31)
4128 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4129
4130 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4131 // even if it might be sign extended.
4132 if (Hi.isUndef())
4133 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4134
4135 // Fall back to a stack store and stride x0 vector load.
4136 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4137 Hi, VL);
4138}
4139
4140// Called by type legalization to handle splat of i64 on RV32.
4141// FIXME: We can optimize this when the type has sign or zero bits in one
4142// of the halves.
4143static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4144 SDValue Scalar, SDValue VL,
4145 SelectionDAG &DAG) {
4146 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4147 SDValue Lo, Hi;
4148 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4149 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4150}
4151
4152// This function lowers a splat of a scalar operand Splat with the vector
4153// length VL. It ensures the final sequence is type legal, which is useful when
4154// lowering a splat after type legalization.
4155static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4156 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4157 const RISCVSubtarget &Subtarget) {
4158 bool HasPassthru = Passthru && !Passthru.isUndef();
4159 if (!HasPassthru && !Passthru)
4160 Passthru = DAG.getUNDEF(VT);
4161 if (VT.isFloatingPoint())
4162 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4163
4164 MVT XLenVT = Subtarget.getXLenVT();
4165
4166 // Simplest case is that the operand needs to be promoted to XLenVT.
4167 if (Scalar.getValueType().bitsLE(XLenVT)) {
4168 // If the operand is a constant, sign extend to increase our chances
4169 // of being able to use a .vi instruction. ANY_EXTEND would become a
4170 // a zero extend and the simm5 check in isel would fail.
4171 // FIXME: Should we ignore the upper bits in isel instead?
4172 unsigned ExtOpc =
4173 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4174 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4175 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4176 }
4177
4178 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4179 "Unexpected scalar for splat lowering!");
4180
4181 if (isOneConstant(VL) && isNullConstant(Scalar))
4182 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4183 DAG.getConstant(0, DL, XLenVT), VL);
4184
4185 // Otherwise use the more complicated splatting algorithm.
4186 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4187}
4188
4189// This function lowers an insert of a scalar operand Scalar into lane
4190// 0 of the vector regardless of the value of VL. The contents of the
4191// remaining lanes of the result vector are unspecified. VL is assumed
4192// to be non-zero.
4194 const SDLoc &DL, SelectionDAG &DAG,
4195 const RISCVSubtarget &Subtarget) {
4196 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4197
4198 const MVT XLenVT = Subtarget.getXLenVT();
4199 SDValue Passthru = DAG.getUNDEF(VT);
4200
4201 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4202 isNullConstant(Scalar.getOperand(1))) {
4203 SDValue ExtractedVal = Scalar.getOperand(0);
4204 // The element types must be the same.
4205 if (ExtractedVal.getValueType().getVectorElementType() ==
4206 VT.getVectorElementType()) {
4207 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4208 MVT ExtractedContainerVT = ExtractedVT;
4209 if (ExtractedContainerVT.isFixedLengthVector()) {
4210 ExtractedContainerVT = getContainerForFixedLengthVector(
4211 DAG, ExtractedContainerVT, Subtarget);
4212 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4213 ExtractedVal, DAG, Subtarget);
4214 }
4215 if (ExtractedContainerVT.bitsLE(VT))
4216 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4217 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4218 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4219 DAG.getVectorIdxConstant(0, DL));
4220 }
4221 }
4222
4223
4224 if (VT.isFloatingPoint())
4225 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4226 DAG.getUNDEF(VT), Scalar, VL);
4227
4228 // Avoid the tricky legalization cases by falling back to using the
4229 // splat code which already handles it gracefully.
4230 if (!Scalar.getValueType().bitsLE(XLenVT))
4231 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4232 DAG.getConstant(1, DL, XLenVT),
4233 VT, DL, DAG, Subtarget);
4234
4235 // If the operand is a constant, sign extend to increase our chances
4236 // of being able to use a .vi instruction. ANY_EXTEND would become a
4237 // a zero extend and the simm5 check in isel would fail.
4238 // FIXME: Should we ignore the upper bits in isel instead?
4239 unsigned ExtOpc =
4240 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4241 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4242 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4243 DAG.getUNDEF(VT), Scalar, VL);
4244}
4245
4246// Is this a shuffle extracts either the even or odd elements of a vector?
4247// That is, specifically, either (a) or (b) below.
4248// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4249// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4250// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4251// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4252// Returns {Src Vector, Even Elements} om success
4253static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4254 SDValue V2, ArrayRef<int> Mask,
4255 const RISCVSubtarget &Subtarget) {
4256 // Need to be able to widen the vector.
4257 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4258 return false;
4259
4260 // Both input must be extracts.
4261 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4262 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4263 return false;
4264
4265 // Extracting from the same source.
4266 SDValue Src = V1.getOperand(0);
4267 if (Src != V2.getOperand(0))
4268 return false;
4269
4270 // Src needs to have twice the number of elements.
4271 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4272 return false;
4273
4274 // The extracts must extract the two halves of the source.
4275 if (V1.getConstantOperandVal(1) != 0 ||
4276 V2.getConstantOperandVal(1) != Mask.size())
4277 return false;
4278
4279 // First index must be the first even or odd element from V1.
4280 if (Mask[0] != 0 && Mask[0] != 1)
4281 return false;
4282
4283 // The others must increase by 2 each time.
4284 // TODO: Support undef elements?
4285 for (unsigned i = 1; i != Mask.size(); ++i)
4286 if (Mask[i] != Mask[i - 1] + 2)
4287 return false;
4288
4289 return true;
4290}
4291
4292/// Is this shuffle interleaving contiguous elements from one vector into the
4293/// even elements and contiguous elements from another vector into the odd
4294/// elements. \p EvenSrc will contain the element that should be in the first
4295/// even element. \p OddSrc will contain the element that should be in the first
4296/// odd element. These can be the first element in a source or the element half
4297/// way through the source.
4298static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4299 int &OddSrc, const RISCVSubtarget &Subtarget) {
4300 // We need to be able to widen elements to the next larger integer type.
4301 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4302 return false;
4303
4304 int Size = Mask.size();
4305 int NumElts = VT.getVectorNumElements();
4306 assert(Size == (int)NumElts && "Unexpected mask size");
4307
4308 SmallVector<unsigned, 2> StartIndexes;
4309 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4310 return false;
4311
4312 EvenSrc = StartIndexes[0];
4313 OddSrc = StartIndexes[1];
4314
4315 // One source should be low half of first vector.
4316 if (EvenSrc != 0 && OddSrc != 0)
4317 return false;
4318
4319 // Subvectors will be subtracted from either at the start of the two input
4320 // vectors, or at the start and middle of the first vector if it's an unary
4321 // interleave.
4322 // In both cases, HalfNumElts will be extracted.
4323 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4324 // we'll create an illegal extract_subvector.
4325 // FIXME: We could support other values using a slidedown first.
4326 int HalfNumElts = NumElts / 2;
4327 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4328}
4329
4330/// Match shuffles that concatenate two vectors, rotate the concatenation,
4331/// and then extract the original number of elements from the rotated result.
4332/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4333/// returned rotation amount is for a rotate right, where elements move from
4334/// higher elements to lower elements. \p LoSrc indicates the first source
4335/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4336/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4337/// 0 or 1 if a rotation is found.
4338///
4339/// NOTE: We talk about rotate to the right which matches how bit shift and
4340/// rotate instructions are described where LSBs are on the right, but LLVM IR
4341/// and the table below write vectors with the lowest elements on the left.
4342static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4343 int Size = Mask.size();
4344
4345 // We need to detect various ways of spelling a rotation:
4346 // [11, 12, 13, 14, 15, 0, 1, 2]
4347 // [-1, 12, 13, 14, -1, -1, 1, -1]
4348 // [-1, -1, -1, -1, -1, -1, 1, 2]
4349 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4350 // [-1, 4, 5, 6, -1, -1, 9, -1]
4351 // [-1, 4, 5, 6, -1, -1, -1, -1]
4352 int Rotation = 0;
4353 LoSrc = -1;
4354 HiSrc = -1;
4355 for (int i = 0; i != Size; ++i) {
4356 int M = Mask[i];
4357 if (M < 0)
4358 continue;
4359
4360 // Determine where a rotate vector would have started.
4361 int StartIdx = i - (M % Size);
4362 // The identity rotation isn't interesting, stop.
4363 if (StartIdx == 0)
4364 return -1;
4365
4366 // If we found the tail of a vector the rotation must be the missing
4367 // front. If we found the head of a vector, it must be how much of the
4368 // head.
4369 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4370
4371 if (Rotation == 0)
4372 Rotation = CandidateRotation;
4373 else if (Rotation != CandidateRotation)
4374 // The rotations don't match, so we can't match this mask.
4375 return -1;
4376
4377 // Compute which value this mask is pointing at.
4378 int MaskSrc = M < Size ? 0 : 1;
4379
4380 // Compute which of the two target values this index should be assigned to.
4381 // This reflects whether the high elements are remaining or the low elemnts
4382 // are remaining.
4383 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4384
4385 // Either set up this value if we've not encountered it before, or check
4386 // that it remains consistent.
4387 if (TargetSrc < 0)
4388 TargetSrc = MaskSrc;
4389 else if (TargetSrc != MaskSrc)
4390 // This may be a rotation, but it pulls from the inputs in some
4391 // unsupported interleaving.
4392 return -1;
4393 }
4394
4395 // Check that we successfully analyzed the mask, and normalize the results.
4396 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4397 assert((LoSrc >= 0 || HiSrc >= 0) &&
4398 "Failed to find a rotated input vector!");
4399
4400 return Rotation;
4401}
4402
4403// Lower a deinterleave shuffle to vnsrl.
4404// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4405// -> [p, q, r, s] (EvenElts == false)
4406// VT is the type of the vector to return, <[vscale x ]n x ty>
4407// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4409 bool EvenElts,
4410 const RISCVSubtarget &Subtarget,
4411 SelectionDAG &DAG) {
4412 // The result is a vector of type <m x n x ty>
4413 MVT ContainerVT = VT;
4414 // Convert fixed vectors to scalable if needed
4415 if (ContainerVT.isFixedLengthVector()) {
4416 assert(Src.getSimpleValueType().isFixedLengthVector());
4417 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4418
4419 // The source is a vector of type <m x n*2 x ty>
4420 MVT SrcContainerVT =
4422 ContainerVT.getVectorElementCount() * 2);
4423 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4424 }
4425
4426 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4427
4428 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4429 // This also converts FP to int.
4430 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4431 MVT WideSrcContainerVT = MVT::getVectorVT(
4432 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4433 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4434
4435 // The integer version of the container type.
4436 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4437
4438 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4439 // the original element size.
4440 unsigned Shift = EvenElts ? 0 : EltBits;
4441 SDValue SplatShift = DAG.getNode(
4442 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4443 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4444 SDValue Res =
4445 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4446 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4447 // Cast back to FP if needed.
4448 Res = DAG.getBitcast(ContainerVT, Res);
4449
4450 if (VT.isFixedLengthVector())
4451 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4452 return Res;
4453}
4454
4455// Lower the following shuffle to vslidedown.
4456// a)
4457// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4458// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4459// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4460// b)
4461// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4462// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4463// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4464// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4465// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4466// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4468 SDValue V1, SDValue V2,
4469 ArrayRef<int> Mask,
4470 const RISCVSubtarget &Subtarget,
4471 SelectionDAG &DAG) {
4472 auto findNonEXTRACT_SUBVECTORParent =
4473 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4474 uint64_t Offset = 0;
4475 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4476 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4477 // a scalable vector. But we don't want to match the case.
4478 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4479 Offset += Parent.getConstantOperandVal(1);
4480 Parent = Parent.getOperand(0);
4481 }
4482 return std::make_pair(Parent, Offset);
4483 };
4484
4485 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4486 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4487
4488 // Extracting from the same source.
4489 SDValue Src = V1Src;
4490 if (Src != V2Src)
4491 return SDValue();
4492
4493 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4494 SmallVector<int, 16> NewMask(Mask);
4495 for (size_t i = 0; i != NewMask.size(); ++i) {
4496 if (NewMask[i] == -1)
4497 continue;
4498
4499 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4500 NewMask[i] = NewMask[i] + V1IndexOffset;
4501 } else {
4502 // Minus NewMask.size() is needed. Otherwise, the b case would be
4503 // <5,6,7,12> instead of <5,6,7,8>.
4504 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4505 }
4506 }
4507
4508 // First index must be known and non-zero. It will be used as the slidedown
4509 // amount.
4510 if (NewMask[0] <= 0)
4511 return SDValue();
4512
4513 // NewMask is also continuous.
4514 for (unsigned i = 1; i != NewMask.size(); ++i)
4515 if (NewMask[i - 1] + 1 != NewMask[i])
4516 return SDValue();
4517
4518 MVT XLenVT = Subtarget.getXLenVT();
4519 MVT SrcVT = Src.getSimpleValueType();
4520 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4521 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4522 SDValue Slidedown =
4523 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4524 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4525 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4526 return DAG.getNode(
4528 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4529 DAG.getConstant(0, DL, XLenVT));
4530}
4531
4532// Because vslideup leaves the destination elements at the start intact, we can
4533// use it to perform shuffles that insert subvectors:
4534//
4535// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4536// ->
4537// vsetvli zero, 8, e8, mf2, ta, ma
4538// vslideup.vi v8, v9, 4
4539//
4540// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4541// ->
4542// vsetvli zero, 5, e8, mf2, tu, ma
4543// vslideup.v1 v8, v9, 2
4545 SDValue V1, SDValue V2,
4546 ArrayRef<int> Mask,
4547 const RISCVSubtarget &Subtarget,
4548 SelectionDAG &DAG) {
4549 unsigned NumElts = VT.getVectorNumElements();
4550 int NumSubElts, Index;
4551 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4552 Index))
4553 return SDValue();
4554
4555 bool OpsSwapped = Mask[Index] < (int)NumElts;
4556 SDValue InPlace = OpsSwapped ? V2 : V1;
4557 SDValue ToInsert = OpsSwapped ? V1 : V2;
4558
4559 MVT XLenVT = Subtarget.getXLenVT();
4560 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4561 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4562 // We slide up by the index that the subvector is being inserted at, and set
4563 // VL to the index + the number of elements being inserted.
4565 // If the we're adding a suffix to the in place vector, i.e. inserting right
4566 // up to the very end of it, then we don't actually care about the tail.
4567 if (NumSubElts + Index >= (int)NumElts)
4568 Policy |= RISCVII::TAIL_AGNOSTIC;
4569
4570 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4571 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4572 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4573
4574 SDValue Res;
4575 // If we're inserting into the lowest elements, use a tail undisturbed
4576 // vmv.v.v.
4577 if (Index == 0)
4578 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4579 VL);
4580 else
4581 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4582 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4583 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4584}
4585
4586/// Match v(f)slide1up/down idioms. These operations involve sliding
4587/// N-1 elements to make room for an inserted scalar at one end.
4589 SDValue V1, SDValue V2,
4590 ArrayRef<int> Mask,
4591 const RISCVSubtarget &Subtarget,
4592 SelectionDAG &DAG) {
4593 bool OpsSwapped = false;
4594 if (!isa<BuildVectorSDNode>(V1)) {
4595 if (!isa<BuildVectorSDNode>(V2))
4596 return SDValue();
4597 std::swap(V1, V2);
4598 OpsSwapped = true;
4599 }
4600 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4601 if (!Splat)
4602 return SDValue();
4603
4604 // Return true if the mask could describe a slide of Mask.size() - 1
4605 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4606 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4607 const unsigned S = (Offset > 0) ? 0 : -Offset;
4608 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4609 for (unsigned i = S; i != E; ++i)
4610 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4611 return false;
4612 return true;
4613 };
4614
4615 const unsigned NumElts = VT.getVectorNumElements();
4616 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4617 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4618 return SDValue();
4619
4620 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4621 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4622 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4623 return SDValue();
4624
4625 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4626 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4627 auto OpCode = IsVSlidedown ?
4630 if (!VT.isFloatingPoint())
4631 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4632 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4633 DAG.getUNDEF(ContainerVT),
4634 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4635 Splat, TrueMask, VL);
4636 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4637}
4638
4639// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4640// to create an interleaved vector of <[vscale x] n*2 x ty>.
4641// This requires that the size of ty is less than the subtarget's maximum ELEN.
4643 const SDLoc &DL, SelectionDAG &DAG,
4644 const RISCVSubtarget &Subtarget) {
4645 MVT VecVT = EvenV.getSimpleValueType();
4646 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4647 // Convert fixed vectors to scalable if needed
4648 if (VecContainerVT.isFixedLengthVector()) {
4649 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4650 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4651 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4652 }
4653
4654 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4655
4656 // We're working with a vector of the same size as the resulting
4657 // interleaved vector, but with half the number of elements and
4658 // twice the SEW (Hence the restriction on not using the maximum
4659 // ELEN)
4660 MVT WideVT =
4662 VecVT.getVectorElementCount());
4663 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4664 if (WideContainerVT.isFixedLengthVector())
4665 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4666
4667 // Bitcast the input vectors to integers in case they are FP
4668 VecContainerVT = VecContainerVT.changeTypeToInteger();
4669 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4670 OddV = DAG.getBitcast(VecContainerVT, OddV);
4671
4672 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4673 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4674
4675 SDValue Interleaved;
4676 if (OddV.isUndef()) {
4677 // If OddV is undef, this is a zero extend.
4678 // FIXME: Not only does this optimize the code, it fixes some correctness
4679 // issues because MIR does not have freeze.
4680 Interleaved =
4681 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4682 } else if (Subtarget.hasStdExtZvbb()) {
4683 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4684 SDValue OffsetVec =
4685 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4686 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4687 OffsetVec, Passthru, Mask, VL);
4688 if (!EvenV.isUndef())
4689 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4690 Interleaved, EvenV, Passthru, Mask, VL);
4691 } else if (EvenV.isUndef()) {
4692 Interleaved =
4693 DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4694
4695 SDValue OffsetVec =
4696 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4697 Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4698 Interleaved, OffsetVec, Passthru, Mask, VL);
4699 } else {
4700 // FIXME: We should freeze the odd vector here. We already handled the case
4701 // of provably undef/poison above.
4702
4703 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4704 // vwaddu.vv
4705 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4706 OddV, Passthru, Mask, VL);
4707
4708 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4709 SDValue AllOnesVec = DAG.getSplatVector(
4710 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4711 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4712 OddV, AllOnesVec, Passthru, Mask, VL);
4713
4714 // Add the two together so we get
4715 // (OddV * 0xff...ff) + (OddV + EvenV)
4716 // = (OddV * 0x100...00) + EvenV
4717 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4718 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4719 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4720 Interleaved, OddsMul, Passthru, Mask, VL);
4721 }
4722
4723 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4724 MVT ResultContainerVT = MVT::getVectorVT(
4725 VecVT.getVectorElementType(), // Make sure to use original type
4726 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4727 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4728
4729 // Convert back to a fixed vector if needed
4730 MVT ResultVT =
4733 if (ResultVT.isFixedLengthVector())
4734 Interleaved =
4735 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4736
4737 return Interleaved;
4738}
4739
4740// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4741// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4743 SelectionDAG &DAG,
4744 const RISCVSubtarget &Subtarget) {
4745 SDLoc DL(SVN);
4746 MVT VT = SVN->getSimpleValueType(0);
4747 SDValue V = SVN->getOperand(0);
4748 unsigned NumElts = VT.getVectorNumElements();
4749
4750 assert(VT.getVectorElementType() == MVT::i1);
4751
4753 SVN->getMask().size()) ||
4754 !SVN->getOperand(1).isUndef())
4755 return SDValue();
4756
4757 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4758 EVT ViaVT = EVT::getVectorVT(
4759 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4760 EVT ViaBitVT =
4761 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4762
4763 // If we don't have zvbb or the larger element type > ELEN, the operation will
4764 // be illegal.
4766 ViaVT) ||
4767 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4768 return SDValue();
4769
4770 // If the bit vector doesn't fit exactly into the larger element type, we need
4771 // to insert it into the larger vector and then shift up the reversed bits
4772 // afterwards to get rid of the gap introduced.
4773 if (ViaEltSize > NumElts)
4774 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4775 V, DAG.getVectorIdxConstant(0, DL));
4776
4777 SDValue Res =
4778 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4779
4780 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4781 // element type.
4782 if (ViaEltSize > NumElts)
4783 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4784 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4785
4786 Res = DAG.getBitcast(ViaBitVT, Res);
4787
4788 if (ViaEltSize > NumElts)
4789 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4790 DAG.getVectorIdxConstant(0, DL));
4791 return Res;
4792}
4793
4795 SelectionDAG &DAG,
4796 const RISCVSubtarget &Subtarget,
4797 MVT &RotateVT, unsigned &RotateAmt) {
4798 SDLoc DL(SVN);
4799
4800 EVT VT = SVN->getValueType(0);
4801 unsigned NumElts = VT.getVectorNumElements();
4802 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4803 unsigned NumSubElts;
4804 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4805 NumElts, NumSubElts, RotateAmt))
4806 return false;
4807 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4808 NumElts / NumSubElts);
4809
4810 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4811 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4812}
4813
4814// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4815// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4816// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4818 SelectionDAG &DAG,
4819 const RISCVSubtarget &Subtarget) {
4820 SDLoc DL(SVN);
4821
4822 EVT VT = SVN->getValueType(0);
4823 unsigned RotateAmt;
4824 MVT RotateVT;
4825 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4826 return SDValue();
4827
4828 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4829
4830 SDValue Rotate;
4831 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4832 // so canonicalize to vrev8.
4833 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4834 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4835 else
4836 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4837 DAG.getConstant(RotateAmt, DL, RotateVT));
4838
4839 return DAG.getBitcast(VT, Rotate);
4840}
4841
4842// If compiling with an exactly known VLEN, see if we can split a
4843// shuffle on m2 or larger into a small number of m1 sized shuffles
4844// which write each destination registers exactly once.
4846 SelectionDAG &DAG,
4847 const RISCVSubtarget &Subtarget) {
4848 SDLoc DL(SVN);
4849 MVT VT = SVN->getSimpleValueType(0);
4850 SDValue V1 = SVN->getOperand(0);
4851 SDValue V2 = SVN->getOperand(1);
4852 ArrayRef<int> Mask = SVN->getMask();
4853 unsigned NumElts = VT.getVectorNumElements();
4854
4855 // If we don't know exact data layout, not much we can do. If this
4856 // is already m1 or smaller, no point in splitting further.
4857 const auto VLen = Subtarget.getRealVLen();
4858 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4859 return SDValue();
4860
4861 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4862 // expansion for.
4863 unsigned RotateAmt;
4864 MVT RotateVT;
4865 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4866 return SDValue();
4867
4868 MVT ElemVT = VT.getVectorElementType();
4869 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4870 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4871
4873 OutMasks(VRegsPerSrc, {-1, {}});
4874
4875 // Check if our mask can be done as a 1-to-1 mapping from source
4876 // to destination registers in the group without needing to
4877 // write each destination more than once.
4878 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4879 int DstVecIdx = DstIdx / ElemsPerVReg;
4880 int DstSubIdx = DstIdx % ElemsPerVReg;
4881 int SrcIdx = Mask[DstIdx];
4882 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4883 continue;
4884 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4885 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4886 if (OutMasks[DstVecIdx].first == -1)
4887 OutMasks[DstVecIdx].first = SrcVecIdx;
4888 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4889 // Note: This case could easily be handled by keeping track of a chain
4890 // of source values and generating two element shuffles below. This is
4891 // less an implementation question, and more a profitability one.
4892 return SDValue();
4893
4894 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4895 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4896 }
4897
4898 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4899 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4900 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4901 assert(M1VT == getLMUL1VT(M1VT));
4902 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4903 SDValue Vec = DAG.getUNDEF(ContainerVT);
4904 // The following semantically builds up a fixed length concat_vector
4905 // of the component shuffle_vectors. We eagerly lower to scalable here
4906 // to avoid DAG combining it back to a large shuffle_vector again.
4907 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4908 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4909 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4910 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4911 if (SrcVecIdx == -1)
4912 continue;
4913 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4914 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4915 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4916 DAG.getVectorIdxConstant(ExtractIdx, DL));
4917 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4918 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4919 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4920 unsigned InsertIdx = DstVecIdx * NumOpElts;
4921 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4922 DAG.getVectorIdxConstant(InsertIdx, DL));
4923 }
4924 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4925}
4926
4928 const RISCVSubtarget &Subtarget) {
4929 SDValue V1 = Op.getOperand(0);
4930 SDValue V2 = Op.getOperand(1);
4931 SDLoc DL(Op);
4932 MVT XLenVT = Subtarget.getXLenVT();
4933 MVT VT = Op.getSimpleValueType();
4934 unsigned NumElts = VT.getVectorNumElements();
4935 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4936
4937 if (VT.getVectorElementType() == MVT::i1) {
4938 // Lower to a vror.vi of a larger element type if possible before we promote
4939 // i1s to i8s.
4940 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4941 return V;
4942 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4943 return V;
4944
4945 // Promote i1 shuffle to i8 shuffle.
4946 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4947 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4948 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4949 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4950 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4951 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4952 ISD::SETNE);
4953 }
4954
4955 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4956
4957 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4958
4959 if (SVN->isSplat()) {
4960 const int Lane = SVN->getSplatIndex();
4961 if (Lane >= 0) {
4962 MVT SVT = VT.getVectorElementType();
4963
4964 // Turn splatted vector load into a strided load with an X0 stride.
4965 SDValue V = V1;
4966 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4967 // with undef.
4968 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4969 int Offset = Lane;
4970 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4971 int OpElements =
4972 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4973 V = V.getOperand(Offset / OpElements);
4974 Offset %= OpElements;
4975 }
4976
4977 // We need to ensure the load isn't atomic or volatile.
4978 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4979 auto *Ld = cast<LoadSDNode>(V);
4980 Offset *= SVT.getStoreSize();
4981 SDValue NewAddr = DAG.getMemBasePlusOffset(
4982 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4983
4984 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4985 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4986 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4987 SDValue IntID =
4988 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4989 SDValue Ops[] = {Ld->getChain(),
4990 IntID,
4991 DAG.getUNDEF(ContainerVT),
4992 NewAddr,
4993 DAG.getRegister(RISCV::X0, XLenVT),
4994 VL};
4995 SDValue NewLoad = DAG.getMemIntrinsicNode(
4996 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4998 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4999 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5000 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5001 }
5002
5003 // Otherwise use a scalar load and splat. This will give the best
5004 // opportunity to fold a splat into the operation. ISel can turn it into
5005 // the x0 strided load if we aren't able to fold away the select.
5006 if (SVT.isFloatingPoint())
5007 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5008 Ld->getPointerInfo().getWithOffset(Offset),
5009 Ld->getOriginalAlign(),
5010 Ld->getMemOperand()->getFlags());
5011 else
5012 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5013 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5014 Ld->getOriginalAlign(),
5015 Ld->getMemOperand()->getFlags());
5017
5018 unsigned Opc =
5020 SDValue Splat =
5021 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
5022 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5023 }
5024
5025 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5026 assert(Lane < (int)NumElts && "Unexpected lane!");
5027 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5028 V1, DAG.getConstant(Lane, DL, XLenVT),
5029 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5030 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5031 }
5032 }
5033
5034 // For exact VLEN m2 or greater, try to split to m1 operations if we
5035 // can split cleanly.
5036 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5037 return V;
5038
5039 ArrayRef<int> Mask = SVN->getMask();
5040
5041 if (SDValue V =
5042 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5043 return V;
5044
5045 if (SDValue V =
5046 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5047 return V;
5048
5049 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5050 // available.
5051 if (Subtarget.hasStdExtZvkb())
5052 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5053 return V;
5054
5055 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5056 // be undef which can be handled with a single SLIDEDOWN/UP.
5057 int LoSrc, HiSrc;
5058 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5059 if (Rotation > 0) {
5060 SDValue LoV, HiV;
5061 if (LoSrc >= 0) {
5062 LoV = LoSrc == 0 ? V1 : V2;
5063 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5064 }
5065 if (HiSrc >= 0) {
5066 HiV = HiSrc == 0 ? V1 : V2;
5067 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5068 }
5069
5070 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5071 // to slide LoV up by (NumElts - Rotation).
5072 unsigned InvRotate = NumElts - Rotation;
5073
5074 SDValue Res = DAG.getUNDEF(ContainerVT);
5075 if (HiV) {
5076 // Even though we could use a smaller VL, don't to avoid a vsetivli
5077 // toggle.
5078 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5079 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5080 }
5081 if (LoV)
5082 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5083 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5085
5086 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5087 }
5088
5089 // If this is a deinterleave and we can widen the vector, then we can use
5090 // vnsrl to deinterleave.
5091 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5092 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5093 Subtarget, DAG);
5094 }
5095
5096 if (SDValue V =
5097 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5098 return V;
5099
5100 // Detect an interleave shuffle and lower to
5101 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5102 int EvenSrc, OddSrc;
5103 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5104 // Extract the halves of the vectors.
5105 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5106
5107 int Size = Mask.size();
5108 SDValue EvenV, OddV;
5109 assert(EvenSrc >= 0 && "Undef source?");
5110 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5111 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5112 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5113
5114 assert(OddSrc >= 0 && "Undef source?");
5115 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5116 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5117 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5118
5119 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5120 }
5121
5122
5123 // Handle any remaining single source shuffles
5124 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5125 if (V2.isUndef()) {
5126 // We might be able to express the shuffle as a bitrotate. But even if we
5127 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5128 // shifts and a vor will have a higher throughput than a vrgather.
5129 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5130 return V;
5131
5132 if (VT.getScalarSizeInBits() == 8 &&
5133 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5134 // On such a vector we're unable to use i8 as the index type.
5135 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5136 // may involve vector splitting if we're already at LMUL=8, or our
5137 // user-supplied maximum fixed-length LMUL.
5138 return SDValue();
5139 }
5140
5141 // Base case for the two operand recursion below - handle the worst case
5142 // single source shuffle.
5143 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5144 MVT IndexVT = VT.changeTypeToInteger();
5145 // Since we can't introduce illegal index types at this stage, use i16 and
5146 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5147 // than XLenVT.
5148 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5149 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5150 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5151 }
5152
5153 // If the mask allows, we can do all the index computation in 16 bits. This
5154 // requires less work and less register pressure at high LMUL, and creates
5155 // smaller constants which may be cheaper to materialize.
5156 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5157 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5158 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5159 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5160 }
5161
5162 MVT IndexContainerVT =
5163 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5164
5165 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5166 SmallVector<SDValue> GatherIndicesLHS;
5167 for (int MaskIndex : Mask) {
5168 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5169 GatherIndicesLHS.push_back(IsLHSIndex
5170 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5171 : DAG.getUNDEF(XLenVT));
5172 }
5173 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5174 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5175 Subtarget);
5176 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5177 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5178 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5179 }
5180
5181 // By default we preserve the original operand order, and use a mask to
5182 // select LHS as true and RHS as false. However, since RVV vector selects may
5183 // feature splats but only on the LHS, we may choose to invert our mask and
5184 // instead select between RHS and LHS.
5185 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5186
5187 // Detect shuffles which can be re-expressed as vector selects; these are
5188 // shuffles in which each element in the destination is taken from an element
5189 // at the corresponding index in either source vectors.
5190 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5191 int MaskIndex = MaskIdx.value();
5192 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5193 });
5194 if (IsSelect) {
5195 // Now construct the mask that will be used by the vselect operation.
5196 SmallVector<SDValue> MaskVals;
5197 for (int MaskIndex : Mask) {
5198 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5199 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5200 }
5201
5202 if (SwapOps)
5203 std::swap(V1, V2);
5204
5205 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5206 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5207 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5208 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5209 }
5210
5211 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5212 // merged with a second vrgather.
5213 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5214 SmallVector<SDValue> MaskVals;
5215
5216 // Now construct the mask that will be used by the blended vrgather operation.
5217 // Cconstruct the appropriate indices into each vector.
5218 for (int MaskIndex : Mask) {
5219 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5220 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5221 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5222 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5223 ? MaskIndex : -1);
5224 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5225 }
5226
5227 if (SwapOps) {
5228 std::swap(V1, V2);
5229 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5230 }
5231
5232 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5233 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5234 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5235
5236 // Recursively invoke lowering for each operand if we had two
5237 // independent single source shuffles, and then combine the result via a
5238 // vselect. Note that the vselect will likely be folded back into the
5239 // second permute (vrgather, or other) by the post-isel combine.
5240 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5241 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5242 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5243}
5244
5246 // Support splats for any type. These should type legalize well.
5247 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5248 return true;
5249
5250 // Only support legal VTs for other shuffles for now.
5251 if (!isTypeLegal(VT))
5252 return false;
5253
5254 MVT SVT = VT.getSimpleVT();
5255
5256 // Not for i1 vectors.
5257 if (SVT.getScalarType() == MVT::i1)
5258 return false;
5259
5260 int Dummy1, Dummy2;
5261 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5262 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5263}
5264
5265// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5266// the exponent.
5267SDValue
5268RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5269 SelectionDAG &DAG) const {
5270 MVT VT = Op.getSimpleValueType();
5271 unsigned EltSize = VT.getScalarSizeInBits();
5272 SDValue Src = Op.getOperand(0);
5273 SDLoc DL(Op);
5274 MVT ContainerVT = VT;
5275
5276 SDValue Mask, VL;
5277 if (Op->isVPOpcode()) {
5278 Mask = Op.getOperand(1);
5279 if (VT.isFixedLengthVector())
5280 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5281 Subtarget);
5282 VL = Op.getOperand(2);
5283 }
5284
5285 // We choose FP type that can represent the value if possible. Otherwise, we
5286 // use rounding to zero conversion for correct exponent of the result.
5287 // TODO: Use f16 for i8 when possible?
5288 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5289 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5290 FloatEltVT = MVT::f32;
5291 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5292
5293 // Legal types should have been checked in the RISCVTargetLowering
5294 // constructor.
5295 // TODO: Splitting may make sense in some cases.
5296 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5297 "Expected legal float type!");
5298
5299 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5300 // The trailing zero count is equal to log2 of this single bit value.
5301 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5302 SDValue Neg = DAG.getNegative(Src, DL, VT);
5303 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5304 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5305 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5306 Src, Mask, VL);
5307 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5308 }
5309
5310 // We have a legal FP type, convert to it.
5311 SDValue FloatVal;
5312 if (FloatVT.bitsGT(VT)) {
5313 if (Op->isVPOpcode())
5314 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5315 else
5316 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5317 } else {
5318 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5319 if (VT.isFixedLengthVector()) {
5320 ContainerVT = getContainerForFixedLengthVector(VT);
5321 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5322 }
5323 if (!Op->isVPOpcode())
5324 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5325 SDValue RTZRM =
5327 MVT ContainerFloatVT =
5328 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5329 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5330 Src, Mask, RTZRM, VL);
5331 if (VT.isFixedLengthVector())
5332 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5333 }
5334 // Bitcast to integer and shift the exponent to the LSB.
5335 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5336 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5337 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5338
5339 SDValue Exp;
5340 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5341 if (Op->isVPOpcode()) {
5342 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5343 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5344 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5345 } else {
5346 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5347 DAG.getConstant(ShiftAmt, DL, IntVT));
5348 if (IntVT.bitsLT(VT))
5349 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5350 else if (IntVT.bitsGT(VT))
5351 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5352 }
5353
5354 // The exponent contains log2 of the value in biased form.
5355 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5356 // For trailing zeros, we just need to subtract the bias.
5357 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5358 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5359 DAG.getConstant(ExponentBias, DL, VT));
5360 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5361 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5362 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5363
5364 // For leading zeros, we need to remove the bias and convert from log2 to
5365 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5366 unsigned Adjust = ExponentBias + (EltSize - 1);
5367 SDValue Res;
5368 if (Op->isVPOpcode())
5369 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5370 Mask, VL);
5371 else
5372 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5373
5374 // The above result with zero input equals to Adjust which is greater than
5375 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5376 if (Op.getOpcode() == ISD::CTLZ)
5377 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5378 else if (Op.getOpcode() == ISD::VP_CTLZ)
5379 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5380 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5381 return Res;
5382}
5383
5384SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5385 SelectionDAG &DAG) const {
5386 SDLoc DL(Op);
5387 MVT XLenVT = Subtarget.getXLenVT();
5388 SDValue Source = Op->getOperand(0);
5389 MVT SrcVT = Source.getSimpleValueType();
5390 SDValue Mask = Op->getOperand(1);
5391 SDValue EVL = Op->getOperand(2);
5392
5393 if (SrcVT.isFixedLengthVector()) {
5394 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5395 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5396 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5397 Subtarget);
5398 SrcVT = ContainerVT;
5399 }
5400
5401 // Convert to boolean vector.
5402 if (SrcVT.getScalarType() != MVT::i1) {
5403 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5404 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5405 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5406 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5407 DAG.getUNDEF(SrcVT), Mask, EVL});
5408 }
5409
5410 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5411 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5412 // In this case, we can interpret poison as -1, so nothing to do further.
5413 return Res;
5414
5415 // Convert -1 to VL.
5416 SDValue SetCC =
5417 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5418 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5419 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5420}
5421
5422// While RVV has alignment restrictions, we should always be able to load as a
5423// legal equivalently-sized byte-typed vector instead. This method is
5424// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5425// the load is already correctly-aligned, it returns SDValue().
5426SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5427 SelectionDAG &DAG) const {
5428 auto *Load = cast<LoadSDNode>(Op);
5429 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5430
5432 Load->getMemoryVT(),
5433 *Load->getMemOperand()))
5434 return SDValue();
5435
5436 SDLoc DL(Op);
5437 MVT VT = Op.getSimpleValueType();
5438 unsigned EltSizeBits = VT.getScalarSizeInBits();
5439 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5440 "Unexpected unaligned RVV load type");
5441 MVT NewVT =
5442 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5443 assert(NewVT.isValid() &&
5444 "Expecting equally-sized RVV vector types to be legal");
5445 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5446 Load->getPointerInfo(), Load->getOriginalAlign(),
5447 Load->getMemOperand()->getFlags());
5448 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5449}
5450
5451// While RVV has alignment restrictions, we should always be able to store as a
5452// legal equivalently-sized byte-typed vector instead. This method is
5453// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5454// returns SDValue() if the store is already correctly aligned.
5455SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5456 SelectionDAG &DAG) const {
5457 auto *Store = cast<StoreSDNode>(Op);
5458 assert(Store && Store->getValue().getValueType().isVector() &&
5459 "Expected vector store");
5460
5462 Store->getMemoryVT(),
5463 *Store->getMemOperand()))
5464 return SDValue();
5465
5466 SDLoc DL(Op);
5467 SDValue StoredVal = Store->getValue();
5468 MVT VT = StoredVal.getSimpleValueType();
5469 unsigned EltSizeBits = VT.getScalarSizeInBits();
5470 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5471 "Unexpected unaligned RVV store type");
5472 MVT NewVT =
5473 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5474 assert(NewVT.isValid() &&
5475 "Expecting equally-sized RVV vector types to be legal");
5476 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5477 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5478 Store->getPointerInfo(), Store->getOriginalAlign(),
5479 Store->getMemOperand()->getFlags());
5480}
5481
5483 const RISCVSubtarget &Subtarget) {
5484 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5485
5486 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5487
5488 // All simm32 constants should be handled by isel.
5489 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5490 // this check redundant, but small immediates are common so this check
5491 // should have better compile time.
5492 if (isInt<32>(Imm))
5493 return Op;
5494
5495 // We only need to cost the immediate, if constant pool lowering is enabled.
5496 if (!Subtarget.useConstantPoolForLargeInts())
5497 return Op;
5498
5500 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5501 return Op;
5502
5503 // Optimizations below are disabled for opt size. If we're optimizing for
5504 // size, use a constant pool.
5505 if (DAG.shouldOptForSize())
5506 return SDValue();
5507
5508 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5509 // that if it will avoid a constant pool.
5510 // It will require an extra temporary register though.
5511 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5512 // low and high 32 bits are the same and bit 31 and 63 are set.
5513 unsigned ShiftAmt, AddOpc;
5514 RISCVMatInt::InstSeq SeqLo =
5515 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5516 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5517 return Op;
5518
5519 return SDValue();
5520}
5521
5523 const RISCVSubtarget &Subtarget) {
5524 SDLoc dl(Op);
5525 AtomicOrdering FenceOrdering =
5526 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5527 SyncScope::ID FenceSSID =
5528 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5529
5530 if (Subtarget.hasStdExtZtso()) {
5531 // The only fence that needs an instruction is a sequentially-consistent
5532 // cross-thread fence.
5533 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5534 FenceSSID == SyncScope::System)
5535 return Op;
5536
5537 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5538 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5539 }
5540
5541 // singlethread fences only synchronize with signal handlers on the same
5542 // thread and thus only need to preserve instruction order, not actually
5543 // enforce memory ordering.
5544 if (FenceSSID == SyncScope::SingleThread)
5545 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5546 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5547
5548 return Op;
5549}
5550
5552 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5553 "Unexpected custom legalisation");
5554
5555 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5556 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5557 SDLoc DL(Op);
5558 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5559 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5560 SDValue Result =
5561 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5562
5563 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5564 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5565 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5566 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5567 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5568 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5569 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5570}
5571
5573 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5574 "Unexpected custom legalisation");
5575
5576 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5577 // sign extend allows overflow of the lower 32 bits to be detected on
5578 // the promoted size.
5579 SDLoc DL(Op);
5580 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5581 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5582 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5583 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5584}
5585
5586// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5588 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5589 "Unexpected custom legalisation");
5590 if (isa<ConstantSDNode>(Op.getOperand(1)))
5591 return SDValue();
5592
5593 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5594 SDLoc DL(Op);
5595 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5596 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5597 SDValue WideOp =
5598 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5599 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5600 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5601 DAG.getValueType(MVT::i32));
5602 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5603 ISD::SETNE);
5604 return DAG.getMergeValues({Res, Ovf}, DL);
5605}
5606
5607// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5609 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5610 "Unexpected custom legalisation");
5611 SDLoc DL(Op);
5612 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5613 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5614 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5615 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5616 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5617 DAG.getValueType(MVT::i32));
5618 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5619 ISD::SETNE);
5620 return DAG.getMergeValues({Res, Ovf}, DL);
5621}
5622
5623SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5624 SelectionDAG &DAG) const {
5625 SDLoc DL(Op);
5626 MVT VT = Op.getSimpleValueType();
5627 MVT XLenVT = Subtarget.getXLenVT();
5628 unsigned Check = Op.getConstantOperandVal(1);
5629 unsigned TDCMask = 0;
5630 if (Check & fcSNan)
5631 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5632 if (Check & fcQNan)
5633 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5634 if (Check & fcPosInf)
5636 if (Check & fcNegInf)
5638 if (Check & fcPosNormal)
5640 if (Check & fcNegNormal)
5642 if (Check & fcPosSubnormal)
5644 if (Check & fcNegSubnormal)
5646 if (Check & fcPosZero)
5647 TDCMask |= RISCV::FPMASK_Positive_Zero;
5648 if (Check & fcNegZero)
5649 TDCMask |= RISCV::FPMASK_Negative_Zero;
5650
5651 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5652
5653 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5654
5655 if (VT.isVector()) {
5656 SDValue Op0 = Op.getOperand(0);
5657 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5658
5659 if (VT.isScalableVector()) {
5661 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5662 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5663 Mask = Op.getOperand(2);
5664 VL = Op.getOperand(3);
5665 }
5666 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5667 VL, Op->getFlags());
5668 if (IsOneBitMask)
5669 return DAG.getSetCC(DL, VT, FPCLASS,
5670 DAG.getConstant(TDCMask, DL, DstVT),
5672 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5673 DAG.getConstant(TDCMask, DL, DstVT));
5674 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5675 ISD::SETNE);
5676 }
5677
5678 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5679 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5680 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5681 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5682 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5683 Mask = Op.getOperand(2);
5684 MVT MaskContainerVT =
5685 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5686 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5687 VL = Op.getOperand(3);
5688 }
5689 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5690
5691 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5692 Mask, VL, Op->getFlags());
5693
5694 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5695 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5696 if (IsOneBitMask) {
5697 SDValue VMSEQ =
5698 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5699 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5700 DAG.getUNDEF(ContainerVT), Mask, VL});
5701 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5702 }
5703 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5704 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5705
5706 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5707 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5708 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5709
5710 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5711 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5712 DAG.getUNDEF(ContainerVT), Mask, VL});
5713 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5714 }
5715
5716 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5717 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5718 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5720 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5721}
5722
5723// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5724// operations propagate nans.
5726 const RISCVSubtarget &Subtarget) {
5727 SDLoc DL(Op);
5728 MVT VT = Op.getSimpleValueType();
5729
5730 SDValue X = Op.getOperand(0);
5731 SDValue Y = Op.getOperand(1);
5732
5733 if (!VT.isVector()) {
5734 MVT XLenVT = Subtarget.getXLenVT();
5735
5736 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5737 // ensures that when one input is a nan, the other will also be a nan
5738 // allowing the nan to propagate. If both inputs are nan, this will swap the
5739 // inputs which is harmless.
5740
5741 SDValue NewY = Y;
5742 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5743 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5744 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5745 }
5746
5747 SDValue NewX = X;
5748 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5749 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5750 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5751 }
5752
5753 unsigned Opc =
5754 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5755 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5756 }
5757
5758 // Check no NaNs before converting to fixed vector scalable.
5759 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5760 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5761
5762 MVT ContainerVT = VT;
5763 if (VT.isFixedLengthVector()) {
5764 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5765 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5766 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5767 }
5768
5769 SDValue Mask, VL;
5770 if (Op->isVPOpcode()) {
5771 Mask = Op.getOperand(2);
5772 if (VT.isFixedLengthVector())
5773 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5774 Subtarget);
5775 VL = Op.getOperand(3);
5776 } else {
5777 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5778 }
5779
5780 SDValue NewY = Y;
5781 if (!XIsNeverNan) {
5782 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5783 {X, X, DAG.getCondCode(ISD::SETOEQ),
5784 DAG.getUNDEF(ContainerVT), Mask, VL});
5785 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5786 DAG.getUNDEF(ContainerVT), VL);
5787 }
5788
5789 SDValue NewX = X;
5790 if (!YIsNeverNan) {
5791 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5792 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5793 DAG.getUNDEF(ContainerVT), Mask, VL});
5794 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5795 DAG.getUNDEF(ContainerVT), VL);
5796 }
5797
5798 unsigned Opc =
5799 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5802 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5803 DAG.getUNDEF(ContainerVT), Mask, VL);
5804 if (VT.isFixedLengthVector())
5805 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5806 return Res;
5807}
5808
5809/// Get a RISC-V target specified VL op for a given SDNode.
5810static unsigned getRISCVVLOp(SDValue Op) {
5811#define OP_CASE(NODE) \
5812 case ISD::NODE: \
5813 return RISCVISD::NODE##_VL;
5814#define VP_CASE(NODE) \
5815 case ISD::VP_##NODE: \
5816 return RISCVISD::NODE##_VL;
5817 // clang-format off
5818 switch (Op.getOpcode()) {
5819 default:
5820 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5821 OP_CASE(ADD)
5822 OP_CASE(SUB)
5823 OP_CASE(MUL)
5824 OP_CASE(MULHS)
5825 OP_CASE(MULHU)
5826 OP_CASE(SDIV)
5827 OP_CASE(SREM)
5828 OP_CASE(UDIV)
5829 OP_CASE(UREM)
5830 OP_CASE(SHL)
5831 OP_CASE(SRA)
5832 OP_CASE(SRL)
5833 OP_CASE(ROTL)
5834 OP_CASE(ROTR)
5835 OP_CASE(BSWAP)
5836 OP_CASE(CTTZ)
5837 OP_CASE(CTLZ)
5838 OP_CASE(CTPOP)
5839 OP_CASE(BITREVERSE)
5840 OP_CASE(SADDSAT)
5841 OP_CASE(UADDSAT)
5842 OP_CASE(SSUBSAT)
5843 OP_CASE(USUBSAT)
5844 OP_CASE(AVGFLOORU)
5845 OP_CASE(AVGCEILU)
5846 OP_CASE(FADD)
5847 OP_CASE(FSUB)
5848 OP_CASE(FMUL)
5849 OP_CASE(FDIV)
5850 OP_CASE(FNEG)
5851 OP_CASE(FABS)
5852 OP_CASE(FSQRT)
5853 OP_CASE(SMIN)
5854 OP_CASE(SMAX)
5855 OP_CASE(UMIN)
5856 OP_CASE(UMAX)
5857 OP_CASE(STRICT_FADD)
5858 OP_CASE(STRICT_FSUB)
5859 OP_CASE(STRICT_FMUL)
5860 OP_CASE(STRICT_FDIV)
5861 OP_CASE(STRICT_FSQRT)
5862 VP_CASE(ADD) // VP_ADD
5863 VP_CASE(SUB) // VP_SUB
5864 VP_CASE(MUL) // VP_MUL
5865 VP_CASE(SDIV) // VP_SDIV
5866 VP_CASE(SREM) // VP_SREM
5867 VP_CASE(UDIV) // VP_UDIV
5868 VP_CASE(UREM) // VP_UREM
5869 VP_CASE(SHL) // VP_SHL
5870 VP_CASE(FADD) // VP_FADD
5871 VP_CASE(FSUB) // VP_FSUB
5872 VP_CASE(FMUL) // VP_FMUL
5873 VP_CASE(FDIV) // VP_FDIV
5874 VP_CASE(FNEG) // VP_FNEG
5875 VP_CASE(FABS) // VP_FABS
5876 VP_CASE(SMIN) // VP_SMIN
5877 VP_CASE(SMAX) // VP_SMAX
5878 VP_CASE(UMIN) // VP_UMIN
5879 VP_CASE(UMAX) // VP_UMAX
5880 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5881 VP_CASE(SETCC) // VP_SETCC
5882 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5883 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5884 VP_CASE(BITREVERSE) // VP_BITREVERSE
5885 VP_CASE(SADDSAT) // VP_SADDSAT
5886 VP_CASE(UADDSAT) // VP_UADDSAT
5887 VP_CASE(SSUBSAT) // VP_SSUBSAT
5888 VP_CASE(USUBSAT) // VP_USUBSAT
5889 VP_CASE(BSWAP) // VP_BSWAP
5890 VP_CASE(CTLZ) // VP_CTLZ
5891 VP_CASE(CTTZ) // VP_CTTZ
5892 VP_CASE(CTPOP) // VP_CTPOP
5894 case ISD::VP_CTLZ_ZERO_UNDEF:
5895 return RISCVISD::CTLZ_VL;
5897 case ISD::VP_CTTZ_ZERO_UNDEF:
5898 return RISCVISD::CTTZ_VL;
5899 case ISD::FMA:
5900 case ISD::VP_FMA:
5901 return RISCVISD::VFMADD_VL;
5902 case ISD::STRICT_FMA:
5904 case ISD::AND:
5905 case ISD::VP_AND:
5906 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5907 return RISCVISD::VMAND_VL;
5908 return RISCVISD::AND_VL;
5909 case ISD::OR:
5910 case ISD::VP_OR:
5911 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5912 return RISCVISD::VMOR_VL;
5913 return RISCVISD::OR_VL;
5914 case ISD::XOR:
5915 case ISD::VP_XOR:
5916 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5917 return RISCVISD::VMXOR_VL;
5918 return RISCVISD::XOR_VL;
5919 case ISD::VP_SELECT:
5920 case ISD::VP_MERGE:
5921 return RISCVISD::VMERGE_VL;
5922 case ISD::VP_ASHR:
5923 return RISCVISD::SRA_VL;
5924 case ISD::VP_LSHR:
5925 return RISCVISD::SRL_VL;
5926 case ISD::VP_SQRT:
5927 return RISCVISD::FSQRT_VL;
5928 case ISD::VP_SIGN_EXTEND:
5929 return RISCVISD::VSEXT_VL;
5930 case ISD::VP_ZERO_EXTEND:
5931 return RISCVISD::VZEXT_VL;
5932 case ISD::VP_FP_TO_SINT:
5934 case ISD::VP_FP_TO_UINT:
5936 case ISD::FMINNUM:
5937 case ISD::VP_FMINNUM:
5938 return RISCVISD::VFMIN_VL;
5939 case ISD::FMAXNUM:
5940 case ISD::VP_FMAXNUM:
5941 return RISCVISD::VFMAX_VL;
5942 case ISD::LRINT:
5943 case ISD::VP_LRINT:
5944 case ISD::LLRINT:
5945 case ISD::VP_LLRINT:
5947 }
5948 // clang-format on
5949#undef OP_CASE
5950#undef VP_CASE
5951}
5952
5953/// Return true if a RISC-V target specified op has a merge operand.
5954static bool hasMergeOp(unsigned Opcode) {
5955 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5957 "not a RISC-V target specific op");
5959 126 &&
5962 21 &&
5963 "adding target specific op should update this function");
5964 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5965 return true;
5966 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5967 return true;
5968 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5969 return true;
5970 if (Opcode == RISCVISD::SETCC_VL)
5971 return true;
5972 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5973 return true;
5974 if (Opcode == RISCVISD::VMERGE_VL)
5975 return true;
5976 return false;
5977}
5978
5979/// Return true if a RISC-V target specified op has a mask operand.
5980static bool hasMaskOp(unsigned Opcode) {
5981 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5983 "not a RISC-V target specific op");
5985 126 &&
5988 21 &&
5989 "adding target specific op should update this function");
5990 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5991 return true;
5992 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5993 return true;
5994 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5996 return true;
5997 return false;
5998}
5999
6001 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6002 SDLoc DL(Op);
6003
6006
6007 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6008 if (!Op.getOperand(j).getValueType().isVector()) {
6009 LoOperands[j] = Op.getOperand(j);
6010 HiOperands[j] = Op.getOperand(j);
6011 continue;
6012 }
6013 std::tie(LoOperands[j], HiOperands[j]) =
6014 DAG.SplitVector(Op.getOperand(j), DL);
6015 }
6016
6017 SDValue LoRes =
6018 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6019 SDValue HiRes =
6020 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6021
6022 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6023}
6024
6026 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6027 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6028 SDLoc DL(Op);
6029
6032
6033 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6034 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6035 std::tie(LoOperands[j], HiOperands[j]) =
6036 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6037 continue;
6038 }
6039 if (!Op.getOperand(j).getValueType().isVector()) {
6040 LoOperands[j] = Op.getOperand(j);
6041 HiOperands[j] = Op.getOperand(j);
6042 continue;
6043 }
6044 std::tie(LoOperands[j], HiOperands[j]) =
6045 DAG.SplitVector(Op.getOperand(j), DL);
6046 }
6047
6048 SDValue LoRes =
6049 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6050 SDValue HiRes =
6051 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6052
6053 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6054}
6055
6057 SDLoc DL(Op);
6058
6059 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6060 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6061 auto [EVLLo, EVLHi] =
6062 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6063
6064 SDValue ResLo =
6065 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6066 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6067 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6068 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6069}
6070
6072
6073 assert(Op->isStrictFPOpcode());
6074
6075 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6076
6077 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6078 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6079
6080 SDLoc DL(Op);
6081
6084
6085 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6086 if (!Op.getOperand(j).getValueType().isVector()) {
6087 LoOperands[j] = Op.getOperand(j);
6088 HiOperands[j] = Op.getOperand(j);
6089 continue;
6090 }
6091 std::tie(LoOperands[j], HiOperands[j]) =
6092 DAG.SplitVector(Op.getOperand(j), DL);
6093 }
6094
6095 SDValue LoRes =
6096 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6097 HiOperands[0] = LoRes.getValue(1);
6098 SDValue HiRes =
6099 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6100
6101 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6102 LoRes.getValue(0), HiRes.getValue(0));
6103 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6104}
6105
6107 SelectionDAG &DAG) const {
6108 switch (Op.getOpcode()) {
6109 default:
6110 report_fatal_error("unimplemented operand");
6111 case ISD::ATOMIC_FENCE:
6112 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6113 case ISD::GlobalAddress:
6114 return lowerGlobalAddress(Op, DAG);
6115 case ISD::BlockAddress:
6116 return lowerBlockAddress(Op, DAG);
6117 case ISD::ConstantPool:
6118 return lowerConstantPool(Op, DAG);
6119 case ISD::JumpTable:
6120 return lowerJumpTable(Op, DAG);
6122 return lowerGlobalTLSAddress(Op, DAG);
6123 case ISD::Constant:
6124 return lowerConstant(Op, DAG, Subtarget);
6125 case ISD::SELECT:
6126 return lowerSELECT(Op, DAG);
6127 case ISD::BRCOND:
6128 return lowerBRCOND(Op, DAG);
6129 case ISD::VASTART:
6130 return lowerVASTART(Op, DAG);
6131 case ISD::FRAMEADDR:
6132 return lowerFRAMEADDR(Op, DAG);
6133 case ISD::RETURNADDR:
6134 return lowerRETURNADDR(Op, DAG);
6135 case ISD::SADDO:
6136 case ISD::SSUBO:
6137 return lowerSADDO_SSUBO(Op, DAG);
6138 case ISD::SMULO:
6139 return lowerSMULO(Op, DAG);
6140 case ISD::SHL_PARTS:
6141 return lowerShiftLeftParts(Op, DAG);
6142 case ISD::SRA_PARTS:
6143 return lowerShiftRightParts(Op, DAG, true);
6144 case ISD::SRL_PARTS:
6145 return lowerShiftRightParts(Op, DAG, false);
6146 case ISD::ROTL:
6147 case ISD::ROTR:
6148 if (Op.getValueType().isFixedLengthVector()) {
6149 assert(Subtarget.hasStdExtZvkb());
6150 return lowerToScalableOp(Op, DAG);
6151 }
6152 assert(Subtarget.hasVendorXTHeadBb() &&
6153 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6154 "Unexpected custom legalization");
6155 // XTHeadBb only supports rotate by constant.
6156 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6157 return SDValue();
6158 return Op;
6159 case ISD::BITCAST: {
6160 SDLoc DL(Op);
6161 EVT VT = Op.getValueType();
6162 SDValue Op0 = Op.getOperand(0);
6163 EVT Op0VT = Op0.getValueType();
6164 MVT XLenVT = Subtarget.getXLenVT();
6165 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6166 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6167 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6168 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6169 return FPConv;
6170 }
6171 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6172 Subtarget.hasStdExtZfbfmin()) {
6173 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6174 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6175 return FPConv;
6176 }
6177 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6178 Subtarget.hasStdExtFOrZfinx()) {
6179 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6180 SDValue FPConv =
6181 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6182 return FPConv;
6183 }
6184 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6185 SDValue Lo, Hi;
6186 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6187 SDValue RetReg =
6188 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6189 return RetReg;
6190 }
6191
6192 // Consider other scalar<->scalar casts as legal if the types are legal.
6193 // Otherwise expand them.
6194 if (!VT.isVector() && !Op0VT.isVector()) {
6195 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6196 return Op;
6197 return SDValue();
6198 }
6199
6200 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6201 "Unexpected types");
6202
6203 if (VT.isFixedLengthVector()) {
6204 // We can handle fixed length vector bitcasts with a simple replacement
6205 // in isel.
6206 if (Op0VT.isFixedLengthVector())
6207 return Op;
6208 // When bitcasting from scalar to fixed-length vector, insert the scalar
6209 // into a one-element vector of the result type, and perform a vector
6210 // bitcast.
6211 if (!Op0VT.isVector()) {
6212 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6213 if (!isTypeLegal(BVT))
6214 return SDValue();
6215 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6216 DAG.getUNDEF(BVT), Op0,
6217 DAG.getVectorIdxConstant(0, DL)));
6218 }
6219 return SDValue();
6220 }
6221 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6222 // thus: bitcast the vector to a one-element vector type whose element type
6223 // is the same as the result type, and extract the first element.
6224 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6225 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6226 if (!isTypeLegal(BVT))
6227 return SDValue();
6228 SDValue BVec = DAG.getBitcast(BVT, Op0);
6229 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6230 DAG.getVectorIdxConstant(0, DL));
6231 }
6232 return SDValue();
6233 }
6235 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6237 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6239 return LowerINTRINSIC_VOID(Op, DAG);
6240 case ISD::IS_FPCLASS:
6241 return LowerIS_FPCLASS(Op, DAG);
6242 case ISD::BITREVERSE: {
6243 MVT VT = Op.getSimpleValueType();
6244 if (VT.isFixedLengthVector()) {
6245 assert(Subtarget.hasStdExtZvbb());
6246 return lowerToScalableOp(Op, DAG);
6247 }
6248 SDLoc DL(Op);
6249 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6250 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6251 // Expand bitreverse to a bswap(rev8) followed by brev8.
6252 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6253 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6254 }
6255 case ISD::TRUNCATE:
6256 // Only custom-lower vector truncates
6257 if (!Op.getSimpleValueType().isVector())
6258 return Op;
6259 return lowerVectorTruncLike(Op, DAG);
6260 case ISD::ANY_EXTEND:
6261 case ISD::ZERO_EXTEND:
6262 if (Op.getOperand(0).getValueType().isVector() &&
6263 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6264 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6265 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6266 case ISD::SIGN_EXTEND:
6267 if (Op.getOperand(0).getValueType().isVector() &&
6268 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6269 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6270 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6272 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6274 return lowerINSERT_VECTOR_ELT(Op, DAG);
6276 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6277 case ISD::SCALAR_TO_VECTOR: {
6278 MVT VT = Op.getSimpleValueType();
6279 SDLoc DL(Op);
6280 SDValue Scalar = Op.getOperand(0);
6281 if (VT.getVectorElementType() == MVT::i1) {
6282 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6283 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6284 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6285 }
6286 MVT ContainerVT = VT;
6287 if (VT.isFixedLengthVector())
6288 ContainerVT = getContainerForFixedLengthVector(VT);
6289 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6290 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6291 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6292 DAG.getUNDEF(ContainerVT), Scalar, VL);
6293 if (VT.isFixedLengthVector())
6294 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6295 return V;
6296 }
6297 case ISD::VSCALE: {
6298 MVT XLenVT = Subtarget.getXLenVT();
6299 MVT VT = Op.getSimpleValueType();
6300 SDLoc DL(Op);
6301 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6302 // We define our scalable vector types for lmul=1 to use a 64 bit known
6303 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6304 // vscale as VLENB / 8.
6305 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6306 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6307 report_fatal_error("Support for VLEN==32 is incomplete.");
6308 // We assume VLENB is a multiple of 8. We manually choose the best shift
6309 // here because SimplifyDemandedBits isn't always able to simplify it.
6310 uint64_t Val = Op.getConstantOperandVal(0);
6311 if (isPowerOf2_64(Val)) {
6312 uint64_t Log2 = Log2_64(Val);
6313 if (Log2 < 3)
6314 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6315 DAG.getConstant(3 - Log2, DL, VT));
6316 else if (Log2 > 3)
6317 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6318 DAG.getConstant(Log2 - 3, DL, XLenVT));
6319 } else if ((Val % 8) == 0) {
6320 // If the multiplier is a multiple of 8, scale it down to avoid needing
6321 // to shift the VLENB value.
6322 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6323 DAG.getConstant(Val / 8, DL, XLenVT));
6324 } else {
6325 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6326 DAG.getConstant(3, DL, XLenVT));
6327 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6328 DAG.getConstant(Val, DL, XLenVT));
6329 }
6330 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6331 }
6332 case ISD::FPOWI: {
6333 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6334 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6335 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6336 Op.getOperand(1).getValueType() == MVT::i32) {
6337 SDLoc DL(Op);
6338 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6339 SDValue Powi =
6340 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6341 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6342 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6343 }
6344 return SDValue();
6345 }
6346 case ISD::FMAXIMUM:
6347 case ISD::FMINIMUM:
6348 if (Op.getValueType() == MVT::nxv32f16 &&
6349 (Subtarget.hasVInstructionsF16Minimal() &&
6350 !Subtarget.hasVInstructionsF16()))
6351 return SplitVectorOp(Op, DAG);
6352 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6353 case ISD::FP_EXTEND: {
6354 SDLoc DL(Op);
6355 EVT VT = Op.getValueType();
6356 SDValue Op0 = Op.getOperand(0);
6357 EVT Op0VT = Op0.getValueType();
6358 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6359 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6360 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6361 SDValue FloatVal =
6362 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6363 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6364 }
6365
6366 if (!Op.getValueType().isVector())
6367 return Op;
6368 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6369 }
6370 case ISD::FP_ROUND: {
6371 SDLoc DL(Op);
6372 EVT VT = Op.getValueType();
6373 SDValue Op0 = Op.getOperand(0);
6374 EVT Op0VT = Op0.getValueType();
6375 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6376 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6377 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6378 Subtarget.hasStdExtDOrZdinx()) {
6379 SDValue FloatVal =
6380 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6381 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6382 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6383 }
6384
6385 if (!Op.getValueType().isVector())
6386 return Op;
6387 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6388 }
6391 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6392 case ISD::SINT_TO_FP:
6393 case ISD::UINT_TO_FP:
6394 if (Op.getValueType().isVector() &&
6395 Op.getValueType().getScalarType() == MVT::f16 &&
6396 (Subtarget.hasVInstructionsF16Minimal() &&
6397 !Subtarget.hasVInstructionsF16())) {
6398 if (Op.getValueType() == MVT::nxv32f16)
6399 return SplitVectorOp(Op, DAG);
6400 // int -> f32
6401 SDLoc DL(Op);
6402 MVT NVT =
6403 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6404 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6405 // f32 -> f16
6406 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6407 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6408 }
6409 [[fallthrough]];
6410 case ISD::FP_TO_SINT:
6411 case ISD::FP_TO_UINT:
6412 if (SDValue Op1 = Op.getOperand(0);
6413 Op1.getValueType().isVector() &&
6414 Op1.getValueType().getScalarType() == MVT::f16 &&
6415 (Subtarget.hasVInstructionsF16Minimal() &&
6416 !Subtarget.hasVInstructionsF16())) {
6417 if (Op1.getValueType() == MVT::nxv32f16)
6418 return SplitVectorOp(Op, DAG);
6419 // f16 -> f32
6420 SDLoc DL(Op);
6421 MVT NVT = MVT::getVectorVT(MVT::f32,
6422 Op1.getValueType().getVectorElementCount());
6423 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6424 // f32 -> int
6425 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6426 }
6427 [[fallthrough]];
6432 // RVV can only do fp<->int conversions to types half/double the size as
6433 // the source. We custom-lower any conversions that do two hops into
6434 // sequences.
6435 MVT VT = Op.getSimpleValueType();
6436 if (!VT.isVector())
6437 return Op;
6438 SDLoc DL(Op);
6439 bool IsStrict = Op->isStrictFPOpcode();
6440 SDValue Src = Op.getOperand(0 + IsStrict);
6441 MVT EltVT = VT.getVectorElementType();
6442 MVT SrcVT = Src.getSimpleValueType();
6443 MVT SrcEltVT = SrcVT.getVectorElementType();
6444 unsigned EltSize = EltVT.getSizeInBits();
6445 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6446 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6447 "Unexpected vector element types");
6448
6449 bool IsInt2FP = SrcEltVT.isInteger();
6450 // Widening conversions
6451 if (EltSize > (2 * SrcEltSize)) {
6452 if (IsInt2FP) {
6453 // Do a regular integer sign/zero extension then convert to float.
6454 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6456 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6457 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6460 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6461 if (IsStrict)
6462 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6463 Op.getOperand(0), Ext);
6464 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6465 }
6466 // FP2Int
6467 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6468 // Do one doubling fp_extend then complete the operation by converting
6469 // to int.
6470 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6471 if (IsStrict) {
6472 auto [FExt, Chain] =
6473 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6474 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6475 }
6476 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6477 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6478 }
6479
6480 // Narrowing conversions
6481 if (SrcEltSize > (2 * EltSize)) {
6482 if (IsInt2FP) {
6483 // One narrowing int_to_fp, then an fp_round.
6484 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6485 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6486 if (IsStrict) {
6487 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6488 DAG.getVTList(InterimFVT, MVT::Other),
6489 Op.getOperand(0), Src);
6490 SDValue Chain = Int2FP.getValue(1);
6491 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6492 }
6493 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6494 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6495 }
6496 // FP2Int
6497 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6498 // representable by the integer, the result is poison.
6499 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6501 if (IsStrict) {
6502 SDValue FP2Int =
6503 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6504 Op.getOperand(0), Src);
6505 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6506 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6507 }
6508 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6509 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6510 }
6511
6512 // Scalable vectors can exit here. Patterns will handle equally-sized
6513 // conversions halving/doubling ones.
6514 if (!VT.isFixedLengthVector())
6515 return Op;
6516
6517 // For fixed-length vectors we lower to a custom "VL" node.
6518 unsigned RVVOpc = 0;
6519 switch (Op.getOpcode()) {
6520 default:
6521 llvm_unreachable("Impossible opcode");
6522 case ISD::FP_TO_SINT:
6524 break;
6525 case ISD::FP_TO_UINT:
6527 break;
6528 case ISD::SINT_TO_FP:
6529 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6530 break;
6531 case ISD::UINT_TO_FP:
6532 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6533 break;
6536 break;
6539 break;
6542 break;
6545 break;
6546 }
6547
6548 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6549 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6550 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6551 "Expected same element count");
6552
6553 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6554
6555 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6556 if (IsStrict) {
6557 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6558 Op.getOperand(0), Src, Mask, VL);
6559 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6560 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6561 }
6562 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6563 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6564 }
6567 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6568 case ISD::FP_TO_BF16: {
6569 // Custom lower to ensure the libcall return is passed in an FPR on hard
6570 // float ABIs.
6571 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6572 SDLoc DL(Op);
6573 MakeLibCallOptions CallOptions;
6574 RTLIB::Libcall LC =
6575 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6576 SDValue Res =
6577 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6578 if (Subtarget.is64Bit() && !RV64LegalI32)
6579 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6580 return DAG.getBitcast(MVT::i32, Res);
6581 }
6582 case ISD::BF16_TO_FP: {
6583 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6584 MVT VT = Op.getSimpleValueType();
6585 SDLoc DL(Op);
6586 Op = DAG.getNode(
6587 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6588 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6589 SDValue Res = Subtarget.is64Bit()
6590 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6591 : DAG.getBitcast(MVT::f32, Op);
6592 // fp_extend if the target VT is bigger than f32.
6593 if (VT != MVT::f32)
6594 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6595 return Res;
6596 }
6597 case ISD::FP_TO_FP16: {
6598 // Custom lower to ensure the libcall return is passed in an FPR on hard
6599 // float ABIs.
6600 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6601 SDLoc DL(Op);
6602 MakeLibCallOptions CallOptions;
6603 RTLIB::Libcall LC =
6604 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6605 SDValue Res =
6606 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6607 if (Subtarget.is64Bit() && !RV64LegalI32)
6608 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6609 return DAG.getBitcast(MVT::i32, Res);
6610 }
6611 case ISD::FP16_TO_FP: {
6612 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6613 // float ABIs.
6614 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6615 SDLoc DL(Op);
6616 MakeLibCallOptions CallOptions;
6617 SDValue Arg = Subtarget.is64Bit()
6618 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6619 Op.getOperand(0))
6620 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6621 SDValue Res =
6622 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6623 .first;
6624 return Res;
6625 }
6626 case ISD::FTRUNC:
6627 case ISD::FCEIL:
6628 case ISD::FFLOOR:
6629 case ISD::FNEARBYINT:
6630 case ISD::FRINT:
6631 case ISD::FROUND:
6632 case ISD::FROUNDEVEN:
6633 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6634 case ISD::LRINT:
6635 case ISD::LLRINT:
6636 return lowerVectorXRINT(Op, DAG, Subtarget);
6637 case ISD::VECREDUCE_ADD:
6642 return lowerVECREDUCE(Op, DAG);
6643 case ISD::VECREDUCE_AND:
6644 case ISD::VECREDUCE_OR:
6645 case ISD::VECREDUCE_XOR:
6646 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6647 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6648 return lowerVECREDUCE(Op, DAG);
6655 return lowerFPVECREDUCE(Op, DAG);
6656 case ISD::VP_REDUCE_ADD:
6657 case ISD::VP_REDUCE_UMAX:
6658 case ISD::VP_REDUCE_SMAX:
6659 case ISD::VP_REDUCE_UMIN:
6660 case ISD::VP_REDUCE_SMIN:
6661 case ISD::VP_REDUCE_FADD:
6662 case ISD::VP_REDUCE_SEQ_FADD:
6663 case ISD::VP_REDUCE_FMIN:
6664 case ISD::VP_REDUCE_FMAX:
6665 case ISD::VP_REDUCE_FMINIMUM:
6666 case ISD::VP_REDUCE_FMAXIMUM:
6667 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6668 (Subtarget.hasVInstructionsF16Minimal() &&
6669 !Subtarget.hasVInstructionsF16()))
6670 return SplitVectorReductionOp(Op, DAG);
6671 return lowerVPREDUCE(Op, DAG);
6672 case ISD::VP_REDUCE_AND:
6673 case ISD::VP_REDUCE_OR:
6674 case ISD::VP_REDUCE_XOR:
6675 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6676 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6677 return lowerVPREDUCE(Op, DAG);
6678 case ISD::VP_CTTZ_ELTS:
6679 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
6680 return lowerVPCttzElements(Op, DAG);
6681 case ISD::UNDEF: {
6682 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6683 return convertFromScalableVector(Op.getSimpleValueType(),
6684 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6685 }
6687 return lowerINSERT_SUBVECTOR(Op, DAG);
6689 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6691 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6693 return lowerVECTOR_INTERLEAVE(Op, DAG);
6694 case ISD::STEP_VECTOR:
6695 return lowerSTEP_VECTOR(Op, DAG);
6697 return lowerVECTOR_REVERSE(Op, DAG);
6698 case ISD::VECTOR_SPLICE:
6699 return lowerVECTOR_SPLICE(Op, DAG);
6700 case ISD::BUILD_VECTOR:
6701 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6702 case ISD::SPLAT_VECTOR:
6703 if (Op.getValueType().getScalarType() == MVT::f16 &&
6704 (Subtarget.hasVInstructionsF16Minimal() &&
6705 !Subtarget.hasVInstructionsF16())) {
6706 if (Op.getValueType() == MVT::nxv32f16)
6707 return SplitVectorOp(Op, DAG);
6708 SDLoc DL(Op);
6709 SDValue NewScalar =
6710 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6711 SDValue NewSplat = DAG.getNode(
6713 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6714 NewScalar);
6715 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6716 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6717 }
6718 if (Op.getValueType().getVectorElementType() == MVT::i1)
6719 return lowerVectorMaskSplat(Op, DAG);
6720 return SDValue();
6722 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6723 case ISD::CONCAT_VECTORS: {
6724 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6725 // better than going through the stack, as the default expansion does.
6726 SDLoc DL(Op);
6727 MVT VT = Op.getSimpleValueType();
6728 MVT ContainerVT = VT;
6729 if (VT.isFixedLengthVector())
6730 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6731
6732 // Recursively split concat_vectors with more than 2 operands:
6733 //
6734 // concat_vector op1, op2, op3, op4
6735 // ->
6736 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6737 //
6738 // This reduces the length of the chain of vslideups and allows us to
6739 // perform the vslideups at a smaller LMUL, limited to MF2.
6740 if (Op.getNumOperands() > 2 &&
6741 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6742 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6744 size_t HalfNumOps = Op.getNumOperands() / 2;
6745 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6746 Op->ops().take_front(HalfNumOps));
6747 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6748 Op->ops().drop_front(HalfNumOps));
6749 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6750 }
6751
6752 unsigned NumOpElts =
6753 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6754 SDValue Vec = DAG.getUNDEF(VT);
6755 for (const auto &OpIdx : enumerate(Op->ops())) {
6756 SDValue SubVec = OpIdx.value();
6757 // Don't insert undef subvectors.
6758 if (SubVec.isUndef())
6759 continue;
6760 Vec =
6761 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6762 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6763 }
6764 return Vec;
6765 }
6766 case ISD::LOAD:
6767 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6768 return V;
6769 if (Op.getValueType().isFixedLengthVector())
6770 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6771 return Op;
6772 case ISD::STORE:
6773 if (auto V = expandUnalignedRVVStore(Op, DAG))
6774 return V;
6775 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6776 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6777 return Op;
6778 case ISD::MLOAD:
6779 case ISD::VP_LOAD:
6780 return lowerMaskedLoad(Op, DAG);
6781 case ISD::MSTORE:
6782 case ISD::VP_STORE:
6783 return lowerMaskedStore(Op, DAG);
6784 case ISD::SELECT_CC: {
6785 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6786 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6787 // into separate SETCC+SELECT just like LegalizeDAG.
6788 SDValue Tmp1 = Op.getOperand(0);
6789 SDValue Tmp2 = Op.getOperand(1);
6790 SDValue True = Op.getOperand(2);
6791 SDValue False = Op.getOperand(3);
6792 EVT VT = Op.getValueType();
6793 SDValue CC = Op.getOperand(4);
6794 EVT CmpVT = Tmp1.getValueType();
6795 EVT CCVT =
6796 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6797 SDLoc DL(Op);
6798 SDValue Cond =
6799 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6800 return DAG.getSelect(DL, VT, Cond, True, False);
6801 }
6802 case ISD::SETCC: {
6803 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6804 if (OpVT.isScalarInteger()) {
6805 MVT VT = Op.getSimpleValueType();
6806 SDValue LHS = Op.getOperand(0);
6807 SDValue RHS = Op.getOperand(1);
6808 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6809 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6810 "Unexpected CondCode");
6811
6812 SDLoc DL(Op);
6813
6814 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6815 // convert this to the equivalent of (set(u)ge X, C+1) by using
6816 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6817 // in a register.
6818 if (isa<ConstantSDNode>(RHS)) {
6819 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6820 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6821 // If this is an unsigned compare and the constant is -1, incrementing
6822 // the constant would change behavior. The result should be false.
6823 if (CCVal == ISD::SETUGT && Imm == -1)
6824 return DAG.getConstant(0, DL, VT);
6825 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6826 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6827 SDValue SetCC = DAG.getSetCC(
6828 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6829 return DAG.getLogicalNOT(DL, SetCC, VT);
6830 }
6831 }
6832
6833 // Not a constant we could handle, swap the operands and condition code to
6834 // SETLT/SETULT.
6835 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6836 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6837 }
6838
6839 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6840 (Subtarget.hasVInstructionsF16Minimal() &&
6841 !Subtarget.hasVInstructionsF16()))
6842 return SplitVectorOp(Op, DAG);
6843
6844 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6845 }
6846 case ISD::ADD:
6847 case ISD::SUB:
6848 case ISD::MUL:
6849 case ISD::MULHS:
6850 case ISD::MULHU:
6851 case ISD::AND:
6852 case ISD::OR:
6853 case ISD::XOR:
6854 case ISD::SDIV:
6855 case ISD::SREM:
6856 case ISD::UDIV:
6857 case ISD::UREM:
6858 case ISD::BSWAP:
6859 case ISD::CTPOP:
6860 return lowerToScalableOp(Op, DAG);
6861 case ISD::SHL:
6862 case ISD::SRA:
6863 case ISD::SRL:
6864 if (Op.getSimpleValueType().isFixedLengthVector())
6865 return lowerToScalableOp(Op, DAG);
6866 // This can be called for an i32 shift amount that needs to be promoted.
6867 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6868 "Unexpected custom legalisation");
6869 return SDValue();
6870 case ISD::FADD:
6871 case ISD::FSUB:
6872 case ISD::FMUL:
6873 case ISD::FDIV:
6874 case ISD::FNEG:
6875 case ISD::FABS:
6876 case ISD::FSQRT:
6877 case ISD::FMA:
6878 case ISD::FMINNUM:
6879 case ISD::FMAXNUM:
6880 if (Op.getValueType() == MVT::nxv32f16 &&
6881 (Subtarget.hasVInstructionsF16Minimal() &&
6882 !Subtarget.hasVInstructionsF16()))
6883 return SplitVectorOp(Op, DAG);
6884 [[fallthrough]];
6885 case ISD::AVGFLOORU:
6886 case ISD::AVGCEILU:
6887 case ISD::SMIN:
6888 case ISD::SMAX:
6889 case ISD::UMIN:
6890 case ISD::UMAX:
6891 return lowerToScalableOp(Op, DAG);
6892 case ISD::UADDSAT:
6893 case ISD::USUBSAT:
6894 if (!Op.getValueType().isVector())
6895 return lowerUADDSAT_USUBSAT(Op, DAG);
6896 return lowerToScalableOp(Op, DAG);
6897 case ISD::SADDSAT:
6898 case ISD::SSUBSAT:
6899 if (!Op.getValueType().isVector())
6900 return lowerSADDSAT_SSUBSAT(Op, DAG);
6901 return lowerToScalableOp(Op, DAG);
6902 case ISD::ABDS:
6903 case ISD::ABDU: {
6904 SDLoc dl(Op);
6905 EVT VT = Op->getValueType(0);
6906 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6907 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6908 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6909
6910 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6911 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6912 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6913 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6914 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6915 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6916 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6917 }
6918 case ISD::ABS:
6919 case ISD::VP_ABS:
6920 return lowerABS(Op, DAG);
6921 case ISD::CTLZ:
6923 case ISD::CTTZ:
6925 if (Subtarget.hasStdExtZvbb())
6926 return lowerToScalableOp(Op, DAG);
6927 assert(Op.getOpcode() != ISD::CTTZ);
6928 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6929 case ISD::VSELECT:
6930 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6931 case ISD::FCOPYSIGN:
6932 if (Op.getValueType() == MVT::nxv32f16 &&
6933 (Subtarget.hasVInstructionsF16Minimal() &&
6934 !Subtarget.hasVInstructionsF16()))
6935 return SplitVectorOp(Op, DAG);
6936 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6937 case ISD::STRICT_FADD:
6938 case ISD::STRICT_FSUB:
6939 case ISD::STRICT_FMUL:
6940 case ISD::STRICT_FDIV:
6941 case ISD::STRICT_FSQRT:
6942 case ISD::STRICT_FMA:
6943 if (Op.getValueType() == MVT::nxv32f16 &&
6944 (Subtarget.hasVInstructionsF16Minimal() &&
6945 !Subtarget.hasVInstructionsF16()))
6946 return SplitStrictFPVectorOp(Op, DAG);
6947 return lowerToScalableOp(Op, DAG);
6948 case ISD::STRICT_FSETCC:
6950 return lowerVectorStrictFSetcc(Op, DAG);
6951 case ISD::STRICT_FCEIL:
6952 case ISD::STRICT_FRINT:
6953 case ISD::STRICT_FFLOOR:
6954 case ISD::STRICT_FTRUNC:
6956 case ISD::STRICT_FROUND:
6958 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6959 case ISD::MGATHER:
6960 case ISD::VP_GATHER:
6961 return lowerMaskedGather(Op, DAG);
6962 case ISD::MSCATTER:
6963 case ISD::VP_SCATTER:
6964 return lowerMaskedScatter(Op, DAG);
6965 case ISD::GET_ROUNDING:
6966 return lowerGET_ROUNDING(Op, DAG);
6967 case ISD::SET_ROUNDING:
6968 return lowerSET_ROUNDING(Op, DAG);
6969 case ISD::EH_DWARF_CFA:
6970 return lowerEH_DWARF_CFA(Op, DAG);
6971 case ISD::VP_SELECT:
6972 case ISD::VP_MERGE:
6973 case ISD::VP_ADD:
6974 case ISD::VP_SUB:
6975 case ISD::VP_MUL:
6976 case ISD::VP_SDIV:
6977 case ISD::VP_UDIV:
6978 case ISD::VP_SREM:
6979 case ISD::VP_UREM:
6980 case ISD::VP_UADDSAT:
6981 case ISD::VP_USUBSAT:
6982 case ISD::VP_SADDSAT:
6983 case ISD::VP_SSUBSAT:
6984 case ISD::VP_LRINT:
6985 case ISD::VP_LLRINT:
6986 return lowerVPOp(Op, DAG);
6987 case ISD::VP_AND:
6988 case ISD::VP_OR:
6989 case ISD::VP_XOR:
6990 return lowerLogicVPOp(Op, DAG);
6991 case ISD::VP_FADD:
6992 case ISD::VP_FSUB:
6993 case ISD::VP_FMUL:
6994 case ISD::VP_FDIV:
6995 case ISD::VP_FNEG:
6996 case ISD::VP_FABS:
6997 case ISD::VP_SQRT:
6998 case ISD::VP_FMA:
6999 case ISD::VP_FMINNUM:
7000 case ISD::VP_FMAXNUM:
7001 case ISD::VP_FCOPYSIGN:
7002 if (Op.getValueType() == MVT::nxv32f16 &&
7003 (Subtarget.hasVInstructionsF16Minimal() &&
7004 !Subtarget.hasVInstructionsF16()))
7005 return SplitVPOp(Op, DAG);
7006 [[fallthrough]];
7007 case ISD::VP_ASHR:
7008 case ISD::VP_LSHR:
7009 case ISD::VP_SHL:
7010 return lowerVPOp(Op, DAG);
7011 case ISD::VP_IS_FPCLASS:
7012 return LowerIS_FPCLASS(Op, DAG);
7013 case ISD::VP_SIGN_EXTEND:
7014 case ISD::VP_ZERO_EXTEND:
7015 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7016 return lowerVPExtMaskOp(Op, DAG);
7017 return lowerVPOp(Op, DAG);
7018 case ISD::VP_TRUNCATE:
7019 return lowerVectorTruncLike(Op, DAG);
7020 case ISD::VP_FP_EXTEND:
7021 case ISD::VP_FP_ROUND:
7022 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7023 case ISD::VP_SINT_TO_FP:
7024 case ISD::VP_UINT_TO_FP:
7025 if (Op.getValueType().isVector() &&
7026 Op.getValueType().getScalarType() == MVT::f16 &&
7027 (Subtarget.hasVInstructionsF16Minimal() &&
7028 !Subtarget.hasVInstructionsF16())) {
7029 if (Op.getValueType() == MVT::nxv32f16)
7030 return SplitVPOp(Op, DAG);
7031 // int -> f32
7032 SDLoc DL(Op);
7033 MVT NVT =
7034 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7035 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7036 // f32 -> f16
7037 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7038 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7039 }
7040 [[fallthrough]];
7041 case ISD::VP_FP_TO_SINT:
7042 case ISD::VP_FP_TO_UINT:
7043 if (SDValue Op1 = Op.getOperand(0);
7044 Op1.getValueType().isVector() &&
7045 Op1.getValueType().getScalarType() == MVT::f16 &&
7046 (Subtarget.hasVInstructionsF16Minimal() &&
7047 !Subtarget.hasVInstructionsF16())) {
7048 if (Op1.getValueType() == MVT::nxv32f16)
7049 return SplitVPOp(Op, DAG);
7050 // f16 -> f32
7051 SDLoc DL(Op);
7052 MVT NVT = MVT::getVectorVT(MVT::f32,
7053 Op1.getValueType().getVectorElementCount());
7054 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7055 // f32 -> int
7056 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7057 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7058 }
7059 return lowerVPFPIntConvOp(Op, DAG);
7060 case ISD::VP_SETCC:
7061 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
7062 (Subtarget.hasVInstructionsF16Minimal() &&
7063 !Subtarget.hasVInstructionsF16()))
7064 return SplitVPOp(Op, DAG);
7065 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7066 return lowerVPSetCCMaskOp(Op, DAG);
7067 [[fallthrough]];
7068 case ISD::VP_SMIN:
7069 case ISD::VP_SMAX:
7070 case ISD::VP_UMIN:
7071 case ISD::VP_UMAX:
7072 case ISD::VP_BITREVERSE:
7073 case ISD::VP_BSWAP:
7074 return lowerVPOp(Op, DAG);
7075 case ISD::VP_CTLZ:
7076 case ISD::VP_CTLZ_ZERO_UNDEF:
7077 if (Subtarget.hasStdExtZvbb())
7078 return lowerVPOp(Op, DAG);
7079 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7080 case ISD::VP_CTTZ:
7081 case ISD::VP_CTTZ_ZERO_UNDEF:
7082 if (Subtarget.hasStdExtZvbb())
7083 return lowerVPOp(Op, DAG);
7084 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7085 case ISD::VP_CTPOP:
7086 return lowerVPOp(Op, DAG);
7087 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7088 return lowerVPStridedLoad(Op, DAG);
7089 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7090 return lowerVPStridedStore(Op, DAG);
7091 case ISD::VP_FCEIL:
7092 case ISD::VP_FFLOOR:
7093 case ISD::VP_FRINT:
7094 case ISD::VP_FNEARBYINT:
7095 case ISD::VP_FROUND:
7096 case ISD::VP_FROUNDEVEN:
7097 case ISD::VP_FROUNDTOZERO:
7098 if (Op.getValueType() == MVT::nxv32f16 &&
7099 (Subtarget.hasVInstructionsF16Minimal() &&
7100 !Subtarget.hasVInstructionsF16()))
7101 return SplitVPOp(Op, DAG);
7102 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7103 case ISD::VP_FMAXIMUM:
7104 case ISD::VP_FMINIMUM:
7105 if (Op.getValueType() == MVT::nxv32f16 &&
7106 (Subtarget.hasVInstructionsF16Minimal() &&
7107 !Subtarget.hasVInstructionsF16()))
7108 return SplitVPOp(Op, DAG);
7109 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7110 case ISD::EXPERIMENTAL_VP_SPLICE:
7111 return lowerVPSpliceExperimental(Op, DAG);
7112 case ISD::EXPERIMENTAL_VP_REVERSE:
7113 return lowerVPReverseExperimental(Op, DAG);
7114 }
7115}
7116
7118 SelectionDAG &DAG, unsigned Flags) {
7119 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7120}
7121
7123 SelectionDAG &DAG, unsigned Flags) {
7124 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7125 Flags);
7126}
7127
7129 SelectionDAG &DAG, unsigned Flags) {
7130 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7131 N->getOffset(), Flags);
7132}
7133
7135 SelectionDAG &DAG, unsigned Flags) {
7136 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7137}
7138
7139template <class NodeTy>
7140SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7141 bool IsLocal, bool IsExternWeak) const {
7142 SDLoc DL(N);
7143 EVT Ty = getPointerTy(DAG.getDataLayout());
7144
7145 // When HWASAN is used and tagging of global variables is enabled
7146 // they should be accessed via the GOT, since the tagged address of a global
7147 // is incompatible with existing code models. This also applies to non-pic
7148 // mode.
7149 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7150 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7151 if (IsLocal && !Subtarget.allowTaggedGlobals())
7152 // Use PC-relative addressing to access the symbol. This generates the
7153 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7154 // %pcrel_lo(auipc)).
7155 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7156
7157 // Use PC-relative addressing to access the GOT for this symbol, then load
7158 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7159 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7160 SDValue Load =
7161 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7167 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7168 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7169 return Load;
7170 }
7171
7172 switch (getTargetMachine().getCodeModel()) {
7173 default:
7174 report_fatal_error("Unsupported code model for lowering");
7175 case CodeModel::Small: {
7176 // Generate a sequence for accessing addresses within the first 2 GiB of
7177 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7178 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7179 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7180 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7181 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7182 }
7183 case CodeModel::Medium: {
7184 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7185 if (IsExternWeak) {
7186 // An extern weak symbol may be undefined, i.e. have value 0, which may
7187 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7188 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7189 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7190 SDValue Load =
7191 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7197 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7198 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7199 return Load;
7200 }
7201
7202 // Generate a sequence for accessing addresses within any 2GiB range within
7203 // the address space. This generates the pattern (PseudoLLA sym), which
7204 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7205 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7206 }
7207 }
7208}
7209
7210SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7211 SelectionDAG &DAG) const {
7212 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7213 assert(N->getOffset() == 0 && "unexpected offset in global node");
7214 const GlobalValue *GV = N->getGlobal();
7215 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7216}
7217
7218SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7219 SelectionDAG &DAG) const {
7220 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7221
7222 return getAddr(N, DAG);
7223}
7224
7225SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7226 SelectionDAG &DAG) const {
7227 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7228
7229 return getAddr(N, DAG);
7230}
7231
7232SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7233 SelectionDAG &DAG) const {
7234 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7235
7236 return getAddr(N, DAG);
7237}
7238
7239SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7240 SelectionDAG &DAG,
7241 bool UseGOT) const {
7242 SDLoc DL(N);
7243 EVT Ty = getPointerTy(DAG.getDataLayout());
7244 const GlobalValue *GV = N->getGlobal();
7245 MVT XLenVT = Subtarget.getXLenVT();
7246
7247 if (UseGOT) {
7248 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7249 // load the address from the GOT and add the thread pointer. This generates
7250 // the pattern (PseudoLA_TLS_IE sym), which expands to
7251 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7252 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7253 SDValue Load =
7254 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7260 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7261 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7262
7263 // Add the thread pointer.
7264 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7265 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7266 }
7267
7268 // Generate a sequence for accessing the address relative to the thread
7269 // pointer, with the appropriate adjustment for the thread pointer offset.
7270 // This generates the pattern
7271 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7272 SDValue AddrHi =
7274 SDValue AddrAdd =
7276 SDValue AddrLo =
7278
7279 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7280 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7281 SDValue MNAdd =
7282 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7283 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7284}
7285
7286SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7287 SelectionDAG &DAG) const {
7288 SDLoc DL(N);
7289 EVT Ty = getPointerTy(DAG.getDataLayout());
7290 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7291 const GlobalValue *GV = N->getGlobal();
7292
7293 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7294 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7295 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7296 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7297 SDValue Load =
7298 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7299
7300 // Prepare argument list to generate call.
7302 ArgListEntry Entry;
7303 Entry.Node = Load;
7304 Entry.Ty = CallTy;
7305 Args.push_back(Entry);
7306
7307 // Setup call to __tls_get_addr.
7309 CLI.setDebugLoc(DL)
7310 .setChain(DAG.getEntryNode())
7311 .setLibCallee(CallingConv::C, CallTy,
7312 DAG.getExternalSymbol("__tls_get_addr", Ty),
7313 std::move(Args));
7314
7315 return LowerCallTo(CLI).first;
7316}
7317
7318SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7319 SelectionDAG &DAG) const {
7320 SDLoc DL(N);
7321 EVT Ty = getPointerTy(DAG.getDataLayout());
7322 const GlobalValue *GV = N->getGlobal();
7323
7324 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7325 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7326 //
7327 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7328 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7329 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7330 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7331 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7332 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7333}
7334
7335SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7336 SelectionDAG &DAG) const {
7337 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7338 assert(N->getOffset() == 0 && "unexpected offset in global node");
7339
7340 if (DAG.getTarget().useEmulatedTLS())
7341 return LowerToTLSEmulatedModel(N, DAG);
7342
7344
7347 report_fatal_error("In GHC calling convention TLS is not supported");
7348
7349 SDValue Addr;
7350 switch (Model) {
7352 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7353 break;
7355 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7356 break;
7359 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7360 : getDynamicTLSAddr(N, DAG);
7361 break;
7362 }
7363
7364 return Addr;
7365}
7366
7367// Return true if Val is equal to (setcc LHS, RHS, CC).
7368// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7369// Otherwise, return std::nullopt.
7370static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7371 ISD::CondCode CC, SDValue Val) {
7372 assert(Val->getOpcode() == ISD::SETCC);
7373 SDValue LHS2 = Val.getOperand(0);
7374 SDValue RHS2 = Val.getOperand(1);
7375 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7376
7377 if (LHS == LHS2 && RHS == RHS2) {
7378 if (CC == CC2)
7379 return true;
7380 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7381 return false;
7382 } else if (LHS == RHS2 && RHS == LHS2) {
7384 if (CC == CC2)
7385 return true;
7386 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7387 return false;
7388 }
7389
7390 return std::nullopt;
7391}
7392
7394 const RISCVSubtarget &Subtarget) {
7395 SDValue CondV = N->getOperand(0);
7396 SDValue TrueV = N->getOperand(1);
7397 SDValue FalseV = N->getOperand(2);
7398 MVT VT = N->getSimpleValueType(0);
7399 SDLoc DL(N);
7400
7401 if (!Subtarget.hasConditionalMoveFusion()) {
7402 // (select c, -1, y) -> -c | y
7403 if (isAllOnesConstant(TrueV)) {
7404 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7405 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7406 }
7407 // (select c, y, -1) -> (c-1) | y
7408 if (isAllOnesConstant(FalseV)) {
7409 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7410 DAG.getAllOnesConstant(DL, VT));
7411 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7412 }
7413
7414 // (select c, 0, y) -> (c-1) & y
7415 if (isNullConstant(TrueV)) {
7416 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7417 DAG.getAllOnesConstant(DL, VT));
7418 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7419 }
7420 // (select c, y, 0) -> -c & y
7421 if (isNullConstant(FalseV)) {
7422 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7423 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7424 }
7425 }
7426
7427 // select c, ~x, x --> xor -c, x
7428 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7429 const APInt &TrueVal = TrueV->getAsAPIntVal();
7430 const APInt &FalseVal = FalseV->getAsAPIntVal();
7431 if (~TrueVal == FalseVal) {
7432 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7433 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7434 }
7435 }
7436
7437 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7438 // when both truev and falsev are also setcc.
7439 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7440 FalseV.getOpcode() == ISD::SETCC) {
7441 SDValue LHS = CondV.getOperand(0);
7442 SDValue RHS = CondV.getOperand(1);
7443 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7444
7445 // (select x, x, y) -> x | y
7446 // (select !x, x, y) -> x & y
7447 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7448 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7449 DAG.getFreeze(FalseV));
7450 }
7451 // (select x, y, x) -> x & y
7452 // (select !x, y, x) -> x | y
7453 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7454 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7455 DAG.getFreeze(TrueV), FalseV);
7456 }
7457 }
7458
7459 return SDValue();
7460}
7461
7462// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7463// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7464// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7465// being `0` or `-1`. In such cases we can replace `select` with `and`.
7466// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7467// than `c0`?
7468static SDValue
7470 const RISCVSubtarget &Subtarget) {
7471 if (Subtarget.hasShortForwardBranchOpt())
7472 return SDValue();
7473
7474 unsigned SelOpNo = 0;
7475 SDValue Sel = BO->getOperand(0);
7476 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7477 SelOpNo = 1;
7478 Sel = BO->getOperand(1);
7479 }
7480
7481 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7482 return SDValue();
7483
7484 unsigned ConstSelOpNo = 1;
7485 unsigned OtherSelOpNo = 2;
7486 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7487 ConstSelOpNo = 2;
7488 OtherSelOpNo = 1;
7489 }
7490 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7491 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7492 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7493 return SDValue();
7494
7495 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7496 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7497 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7498 return SDValue();
7499
7500 SDLoc DL(Sel);
7501 EVT VT = BO->getValueType(0);
7502
7503 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7504 if (SelOpNo == 1)
7505 std::swap(NewConstOps[0], NewConstOps[1]);
7506
7507 SDValue NewConstOp =
7508 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7509 if (!NewConstOp)
7510 return SDValue();
7511
7512 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7513 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7514 return SDValue();
7515
7516 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7517 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7518 if (SelOpNo == 1)
7519 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7520 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7521
7522 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7523 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7524 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7525}
7526
7527SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7528 SDValue CondV = Op.getOperand(0);
7529 SDValue TrueV = Op.getOperand(1);
7530 SDValue FalseV = Op.getOperand(2);
7531 SDLoc DL(Op);
7532 MVT VT = Op.getSimpleValueType();
7533 MVT XLenVT = Subtarget.getXLenVT();
7534
7535 // Lower vector SELECTs to VSELECTs by splatting the condition.
7536 if (VT.isVector()) {
7537 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7538 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7539 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7540 }
7541
7542 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7543 // nodes to implement the SELECT. Performing the lowering here allows for
7544 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7545 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7546 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7547 VT.isScalarInteger()) {
7548 // (select c, t, 0) -> (czero_eqz t, c)
7549 if (isNullConstant(FalseV))
7550 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7551 // (select c, 0, f) -> (czero_nez f, c)
7552 if (isNullConstant(TrueV))
7553 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7554
7555 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7556 if (TrueV.getOpcode() == ISD::AND &&
7557 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7558 return DAG.getNode(
7559 ISD::OR, DL, VT, TrueV,
7560 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7561 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7562 if (FalseV.getOpcode() == ISD::AND &&
7563 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7564 return DAG.getNode(
7565 ISD::OR, DL, VT, FalseV,
7566 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7567
7568 // Try some other optimizations before falling back to generic lowering.
7569 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7570 return V;
7571
7572 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7573 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7574 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7575 const APInt &TrueVal = TrueV->getAsAPIntVal();
7576 const APInt &FalseVal = FalseV->getAsAPIntVal();
7577 const int TrueValCost = RISCVMatInt::getIntMatCost(
7578 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7579 const int FalseValCost = RISCVMatInt::getIntMatCost(
7580 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7581 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7582 SDValue LHSVal = DAG.getConstant(
7583 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7584 SDValue RHSVal =
7585 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7586 SDValue CMOV =
7588 DL, VT, LHSVal, CondV);
7589 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7590 }
7591
7592 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7593 // Unless we have the short forward branch optimization.
7594 if (!Subtarget.hasConditionalMoveFusion())
7595 return DAG.getNode(
7596 ISD::OR, DL, VT,
7597 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7598 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7599 }
7600
7601 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7602 return V;
7603
7604 if (Op.hasOneUse()) {
7605 unsigned UseOpc = Op->use_begin()->getOpcode();
7606 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7607 SDNode *BinOp = *Op->use_begin();
7608 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7609 DAG, Subtarget)) {
7610 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7611 return lowerSELECT(NewSel, DAG);
7612 }
7613 }
7614 }
7615
7616 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7617 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7618 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7619 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7620 if (FPTV && FPFV) {
7621 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7622 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7623 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7624 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7625 DAG.getConstant(1, DL, XLenVT));
7626 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7627 }
7628 }
7629
7630 // If the condition is not an integer SETCC which operates on XLenVT, we need
7631 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7632 // (select condv, truev, falsev)
7633 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7634 if (CondV.getOpcode() != ISD::SETCC ||
7635 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7636 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7637 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7638
7639 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7640
7641 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7642 }
7643
7644 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7645 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7646 // advantage of the integer compare+branch instructions. i.e.:
7647 // (select (setcc lhs, rhs, cc), truev, falsev)
7648 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7649 SDValue LHS = CondV.getOperand(0);
7650 SDValue RHS = CondV.getOperand(1);
7651 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7652
7653 // Special case for a select of 2 constants that have a diffence of 1.
7654 // Normally this is done by DAGCombine, but if the select is introduced by
7655 // type legalization or op legalization, we miss it. Restricting to SETLT
7656 // case for now because that is what signed saturating add/sub need.
7657 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7658 // but we would probably want to swap the true/false values if the condition
7659 // is SETGE/SETLE to avoid an XORI.
7660 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7661 CCVal == ISD::SETLT) {
7662 const APInt &TrueVal = TrueV->getAsAPIntVal();
7663 const APInt &FalseVal = FalseV->getAsAPIntVal();
7664 if (TrueVal - 1 == FalseVal)
7665 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7666 if (TrueVal + 1 == FalseVal)
7667 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7668 }
7669
7670 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7671 // 1 < x ? x : 1 -> 0 < x ? x : 1
7672 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7673 RHS == TrueV && LHS == FalseV) {
7674 LHS = DAG.getConstant(0, DL, VT);
7675 // 0 <u x is the same as x != 0.
7676 if (CCVal == ISD::SETULT) {
7677 std::swap(LHS, RHS);
7678 CCVal = ISD::SETNE;
7679 }
7680 }
7681
7682 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7683 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7684 RHS == FalseV) {
7685 RHS = DAG.getConstant(0, DL, VT);
7686 }
7687
7688 SDValue TargetCC = DAG.getCondCode(CCVal);
7689
7690 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7691 // (select (setcc lhs, rhs, CC), constant, falsev)
7692 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7693 std::swap(TrueV, FalseV);
7694 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7695 }
7696
7697 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7698 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7699}
7700
7701SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7702 SDValue CondV = Op.getOperand(1);
7703 SDLoc DL(Op);
7704 MVT XLenVT = Subtarget.getXLenVT();
7705
7706 if (CondV.getOpcode() == ISD::SETCC &&
7707 CondV.getOperand(0).getValueType() == XLenVT) {
7708 SDValue LHS = CondV.getOperand(0);
7709 SDValue RHS = CondV.getOperand(1);
7710 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7711
7712 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7713
7714 SDValue TargetCC = DAG.getCondCode(CCVal);
7715 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7716 LHS, RHS, TargetCC, Op.getOperand(2));
7717 }
7718
7719 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7720 CondV, DAG.getConstant(0, DL, XLenVT),
7721 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7722}
7723
7724SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7727
7728 SDLoc DL(Op);
7729 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7731
7732 // vastart just stores the address of the VarArgsFrameIndex slot into the
7733 // memory location argument.
7734 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7735 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7736 MachinePointerInfo(SV));
7737}
7738
7739SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7740 SelectionDAG &DAG) const {
7741 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7743 MachineFrameInfo &MFI = MF.getFrameInfo();
7744 MFI.setFrameAddressIsTaken(true);
7745 Register FrameReg = RI.getFrameRegister(MF);
7746 int XLenInBytes = Subtarget.getXLen() / 8;
7747
7748 EVT VT = Op.getValueType();
7749 SDLoc DL(Op);
7750 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7751 unsigned Depth = Op.getConstantOperandVal(0);
7752 while (Depth--) {
7753 int Offset = -(XLenInBytes * 2);
7754 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7756 FrameAddr =
7757 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7758 }
7759 return FrameAddr;
7760}
7761
7762SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7763 SelectionDAG &DAG) const {
7764 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7766 MachineFrameInfo &MFI = MF.getFrameInfo();
7767 MFI.setReturnAddressIsTaken(true);
7768 MVT XLenVT = Subtarget.getXLenVT();
7769 int XLenInBytes = Subtarget.getXLen() / 8;
7770
7772 return SDValue();
7773
7774 EVT VT = Op.getValueType();
7775 SDLoc DL(Op);
7776 unsigned Depth = Op.getConstantOperandVal(0);
7777 if (Depth) {
7778 int Off = -XLenInBytes;
7779 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7780 SDValue Offset = DAG.getConstant(Off, DL, VT);
7781 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7782 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7784 }
7785
7786 // Return the value of the return address register, marking it an implicit
7787 // live-in.
7788 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7789 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7790}
7791
7792SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7793 SelectionDAG &DAG) const {
7794 SDLoc DL(Op);
7795 SDValue Lo = Op.getOperand(0);
7796 SDValue Hi = Op.getOperand(1);
7797 SDValue Shamt = Op.getOperand(2);
7798 EVT VT = Lo.getValueType();
7799
7800 // if Shamt-XLEN < 0: // Shamt < XLEN
7801 // Lo = Lo << Shamt
7802 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7803 // else:
7804 // Lo = 0
7805 // Hi = Lo << (Shamt-XLEN)
7806
7807 SDValue Zero = DAG.getConstant(0, DL, VT);
7808 SDValue One = DAG.getConstant(1, DL, VT);
7809 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7810 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7811 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7812 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7813
7814 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7815 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7816 SDValue ShiftRightLo =
7817 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7818 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7819 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7820 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7821
7822 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7823
7824 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7825 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7826
7827 SDValue Parts[2] = {Lo, Hi};
7828 return DAG.getMergeValues(Parts, DL);
7829}
7830
7831SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7832 bool IsSRA) const {
7833 SDLoc DL(Op);
7834 SDValue Lo = Op.getOperand(0);
7835 SDValue Hi = Op.getOperand(1);
7836 SDValue Shamt = Op.getOperand(2);
7837 EVT VT = Lo.getValueType();
7838
7839 // SRA expansion:
7840 // if Shamt-XLEN < 0: // Shamt < XLEN
7841 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7842 // Hi = Hi >>s Shamt
7843 // else:
7844 // Lo = Hi >>s (Shamt-XLEN);
7845 // Hi = Hi >>s (XLEN-1)
7846 //
7847 // SRL expansion:
7848 // if Shamt-XLEN < 0: // Shamt < XLEN
7849 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7850 // Hi = Hi >>u Shamt
7851 // else:
7852 // Lo = Hi >>u (Shamt-XLEN);
7853 // Hi = 0;
7854
7855 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7856
7857 SDValue Zero = DAG.getConstant(0, DL, VT);
7858 SDValue One = DAG.getConstant(1, DL, VT);
7859 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7860 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7861 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7862 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7863
7864 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7865 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7866 SDValue ShiftLeftHi =
7867 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7868 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7869 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7870 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7871 SDValue HiFalse =
7872 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7873
7874 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7875
7876 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7877 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7878
7879 SDValue Parts[2] = {Lo, Hi};
7880 return DAG.getMergeValues(Parts, DL);
7881}
7882
7883// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7884// legal equivalently-sized i8 type, so we can use that as a go-between.
7885SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7886 SelectionDAG &DAG) const {
7887 SDLoc DL(Op);
7888 MVT VT = Op.getSimpleValueType();
7889 SDValue SplatVal = Op.getOperand(0);
7890 // All-zeros or all-ones splats are handled specially.
7891 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7892 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7893 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7894 }
7895 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7896 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7897 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7898 }
7899 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7900 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7901 DAG.getConstant(1, DL, SplatVal.getValueType()));
7902 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7903 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7904 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7905}
7906
7907// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7908// illegal (currently only vXi64 RV32).
7909// FIXME: We could also catch non-constant sign-extended i32 values and lower
7910// them to VMV_V_X_VL.
7911SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7912 SelectionDAG &DAG) const {
7913 SDLoc DL(Op);
7914 MVT VecVT = Op.getSimpleValueType();
7915 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7916 "Unexpected SPLAT_VECTOR_PARTS lowering");
7917
7918 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7919 SDValue Lo = Op.getOperand(0);
7920 SDValue Hi = Op.getOperand(1);
7921
7922 MVT ContainerVT = VecVT;
7923 if (VecVT.isFixedLengthVector())
7924 ContainerVT = getContainerForFixedLengthVector(VecVT);
7925
7926 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7927
7928 SDValue Res =
7929 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7930
7931 if (VecVT.isFixedLengthVector())
7932 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7933
7934 return Res;
7935}
7936
7937// Custom-lower extensions from mask vectors by using a vselect either with 1
7938// for zero/any-extension or -1 for sign-extension:
7939// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7940// Note that any-extension is lowered identically to zero-extension.
7941SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7942 int64_t ExtTrueVal) const {
7943 SDLoc DL(Op);
7944 MVT VecVT = Op.getSimpleValueType();
7945 SDValue Src = Op.getOperand(0);
7946 // Only custom-lower extensions from mask types
7947 assert(Src.getValueType().isVector() &&
7948 Src.getValueType().getVectorElementType() == MVT::i1);
7949
7950 if (VecVT.isScalableVector()) {
7951 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7952 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7953 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7954 }
7955
7956 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7957 MVT I1ContainerVT =
7958 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7959
7960 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7961
7962 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7963
7964 MVT XLenVT = Subtarget.getXLenVT();
7965 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7966 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7967
7968 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7969 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7970 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7971 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7972 SDValue Select =
7973 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7974 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7975
7976 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7977}
7978
7979SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7980 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7981 MVT ExtVT = Op.getSimpleValueType();
7982 // Only custom-lower extensions from fixed-length vector types.
7983 if (!ExtVT.isFixedLengthVector())
7984 return Op;
7985 MVT VT = Op.getOperand(0).getSimpleValueType();
7986 // Grab the canonical container type for the extended type. Infer the smaller
7987 // type from that to ensure the same number of vector elements, as we know
7988 // the LMUL will be sufficient to hold the smaller type.
7989 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7990 // Get the extended container type manually to ensure the same number of
7991 // vector elements between source and dest.
7992 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7993 ContainerExtVT.getVectorElementCount());
7994
7995 SDValue Op1 =
7996 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7997
7998 SDLoc DL(Op);
7999 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8000
8001 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8002
8003 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8004}
8005
8006// Custom-lower truncations from vectors to mask vectors by using a mask and a
8007// setcc operation:
8008// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8009SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8010 SelectionDAG &DAG) const {
8011 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8012 SDLoc DL(Op);
8013 EVT MaskVT = Op.getValueType();
8014 // Only expect to custom-lower truncations to mask types
8015 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8016 "Unexpected type for vector mask lowering");
8017 SDValue Src = Op.getOperand(0);
8018 MVT VecVT = Src.getSimpleValueType();
8019 SDValue Mask, VL;
8020 if (IsVPTrunc) {
8021 Mask = Op.getOperand(1);
8022 VL = Op.getOperand(2);
8023 }
8024 // If this is a fixed vector, we need to convert it to a scalable vector.
8025 MVT ContainerVT = VecVT;
8026
8027 if (VecVT.isFixedLengthVector()) {
8028 ContainerVT = getContainerForFixedLengthVector(VecVT);
8029 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8030 if (IsVPTrunc) {
8031 MVT MaskContainerVT =
8032 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8033 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8034 }
8035 }
8036
8037 if (!IsVPTrunc) {
8038 std::tie(Mask, VL) =
8039 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8040 }
8041
8042 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8043 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8044
8045 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8046 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8047 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8048 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8049
8050 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8051 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8052 DAG.getUNDEF(ContainerVT), Mask, VL);
8053 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8054 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8055 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8056 if (MaskVT.isFixedLengthVector())
8057 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8058 return Trunc;
8059}
8060
8061SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8062 SelectionDAG &DAG) const {
8063 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8064 SDLoc DL(Op);
8065
8066 MVT VT = Op.getSimpleValueType();
8067 // Only custom-lower vector truncates
8068 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8069
8070 // Truncates to mask types are handled differently
8071 if (VT.getVectorElementType() == MVT::i1)
8072 return lowerVectorMaskTruncLike(Op, DAG);
8073
8074 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8075 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8076 // truncate by one power of two at a time.
8077 MVT DstEltVT = VT.getVectorElementType();
8078
8079 SDValue Src = Op.getOperand(0);
8080 MVT SrcVT = Src.getSimpleValueType();
8081 MVT SrcEltVT = SrcVT.getVectorElementType();
8082
8083 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8084 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8085 "Unexpected vector truncate lowering");
8086
8087 MVT ContainerVT = SrcVT;
8088 SDValue Mask, VL;
8089 if (IsVPTrunc) {
8090 Mask = Op.getOperand(1);
8091 VL = Op.getOperand(2);
8092 }
8093 if (SrcVT.isFixedLengthVector()) {
8094 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8095 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8096 if (IsVPTrunc) {
8097 MVT MaskVT = getMaskTypeFor(ContainerVT);
8098 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8099 }
8100 }
8101
8102 SDValue Result = Src;
8103 if (!IsVPTrunc) {
8104 std::tie(Mask, VL) =
8105 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8106 }
8107
8108 LLVMContext &Context = *DAG.getContext();
8109 const ElementCount Count = ContainerVT.getVectorElementCount();
8110 do {
8111 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8112 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8113 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8114 Mask, VL);
8115 } while (SrcEltVT != DstEltVT);
8116
8117 if (SrcVT.isFixedLengthVector())
8118 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8119
8120 return Result;
8121}
8122
8123SDValue
8124RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8125 SelectionDAG &DAG) const {
8126 SDLoc DL(Op);
8127 SDValue Chain = Op.getOperand(0);
8128 SDValue Src = Op.getOperand(1);
8129 MVT VT = Op.getSimpleValueType();
8130 MVT SrcVT = Src.getSimpleValueType();
8131 MVT ContainerVT = VT;
8132 if (VT.isFixedLengthVector()) {
8133 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8134 ContainerVT =
8135 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8136 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8137 }
8138
8139 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8140
8141 // RVV can only widen/truncate fp to types double/half the size as the source.
8142 if ((VT.getVectorElementType() == MVT::f64 &&
8143 (SrcVT.getVectorElementType() == MVT::f16 ||
8144 SrcVT.getVectorElementType() == MVT::bf16)) ||
8145 ((VT.getVectorElementType() == MVT::f16 ||
8146 VT.getVectorElementType() == MVT::bf16) &&
8147 SrcVT.getVectorElementType() == MVT::f64)) {
8148 // For double rounding, the intermediate rounding should be round-to-odd.
8149 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8152 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8153 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8154 Chain, Src, Mask, VL);
8155 Chain = Src.getValue(1);
8156 }
8157
8158 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8161 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8162 Chain, Src, Mask, VL);
8163 if (VT.isFixedLengthVector()) {
8164 // StrictFP operations have two result values. Their lowered result should
8165 // have same result count.
8166 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8167 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8168 }
8169 return Res;
8170}
8171
8172SDValue
8173RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8174 SelectionDAG &DAG) const {
8175 bool IsVP =
8176 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8177 bool IsExtend =
8178 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8179 // RVV can only do truncate fp to types half the size as the source. We
8180 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8181 // conversion instruction.
8182 SDLoc DL(Op);
8183 MVT VT = Op.getSimpleValueType();
8184
8185 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8186
8187 SDValue Src = Op.getOperand(0);
8188 MVT SrcVT = Src.getSimpleValueType();
8189
8190 bool IsDirectExtend =
8191 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8192 (SrcVT.getVectorElementType() != MVT::f16 &&
8193 SrcVT.getVectorElementType() != MVT::bf16));
8194 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8195 VT.getVectorElementType() != MVT::bf16) ||
8196 SrcVT.getVectorElementType() != MVT::f64);
8197
8198 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8199
8200 // Prepare any fixed-length vector operands.
8201 MVT ContainerVT = VT;
8202 SDValue Mask, VL;
8203 if (IsVP) {
8204 Mask = Op.getOperand(1);
8205 VL = Op.getOperand(2);
8206 }
8207 if (VT.isFixedLengthVector()) {
8208 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8209 ContainerVT =
8210 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8211 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8212 if (IsVP) {
8213 MVT MaskVT = getMaskTypeFor(ContainerVT);
8214 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8215 }
8216 }
8217
8218 if (!IsVP)
8219 std::tie(Mask, VL) =
8220 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8221
8222 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8223
8224 if (IsDirectConv) {
8225 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8226 if (VT.isFixedLengthVector())
8227 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8228 return Src;
8229 }
8230
8231 unsigned InterConvOpc =
8233
8234 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8235 SDValue IntermediateConv =
8236 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8237 SDValue Result =
8238 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8239 if (VT.isFixedLengthVector())
8240 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8241 return Result;
8242}
8243
8244// Given a scalable vector type and an index into it, returns the type for the
8245// smallest subvector that the index fits in. This can be used to reduce LMUL
8246// for operations like vslidedown.
8247//
8248// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8249static std::optional<MVT>
8250getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8251 const RISCVSubtarget &Subtarget) {
8252 assert(VecVT.isScalableVector());
8253 const unsigned EltSize = VecVT.getScalarSizeInBits();
8254 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8255 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8256 MVT SmallerVT;
8257 if (MaxIdx < MinVLMAX)
8258 SmallerVT = getLMUL1VT(VecVT);
8259 else if (MaxIdx < MinVLMAX * 2)
8260 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8261 else if (MaxIdx < MinVLMAX * 4)
8262 SmallerVT = getLMUL1VT(VecVT)
8265 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8266 return std::nullopt;
8267 return SmallerVT;
8268}
8269
8270// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8271// first position of a vector, and that vector is slid up to the insert index.
8272// By limiting the active vector length to index+1 and merging with the
8273// original vector (with an undisturbed tail policy for elements >= VL), we
8274// achieve the desired result of leaving all elements untouched except the one
8275// at VL-1, which is replaced with the desired value.
8276SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8277 SelectionDAG &DAG) const {
8278 SDLoc DL(Op);
8279 MVT VecVT = Op.getSimpleValueType();
8280 SDValue Vec = Op.getOperand(0);
8281 SDValue Val = Op.getOperand(1);
8282 SDValue Idx = Op.getOperand(2);
8283
8284 if (VecVT.getVectorElementType() == MVT::i1) {
8285 // FIXME: For now we just promote to an i8 vector and insert into that,
8286 // but this is probably not optimal.
8287 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8288 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8289 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8290 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8291 }
8292
8293 MVT ContainerVT = VecVT;
8294 // If the operand is a fixed-length vector, convert to a scalable one.
8295 if (VecVT.isFixedLengthVector()) {
8296 ContainerVT = getContainerForFixedLengthVector(VecVT);
8297 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8298 }
8299
8300 // If we know the index we're going to insert at, we can shrink Vec so that
8301 // we're performing the scalar inserts and slideup on a smaller LMUL.
8302 MVT OrigContainerVT = ContainerVT;
8303 SDValue OrigVec = Vec;
8304 SDValue AlignedIdx;
8305 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8306 const unsigned OrigIdx = IdxC->getZExtValue();
8307 // Do we know an upper bound on LMUL?
8308 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8309 DL, DAG, Subtarget)) {
8310 ContainerVT = *ShrunkVT;
8311 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8312 }
8313
8314 // If we're compiling for an exact VLEN value, we can always perform
8315 // the insert in m1 as we can determine the register corresponding to
8316 // the index in the register group.
8317 const MVT M1VT = getLMUL1VT(ContainerVT);
8318 if (auto VLEN = Subtarget.getRealVLen();
8319 VLEN && ContainerVT.bitsGT(M1VT)) {
8320 EVT ElemVT = VecVT.getVectorElementType();
8321 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8322 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8323 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8324 unsigned ExtractIdx =
8325 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8326 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8327 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8328 ContainerVT = M1VT;
8329 }
8330
8331 if (AlignedIdx)
8332 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8333 AlignedIdx);
8334 }
8335
8336 MVT XLenVT = Subtarget.getXLenVT();
8337
8338 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8339 // Even i64-element vectors on RV32 can be lowered without scalar
8340 // legalization if the most-significant 32 bits of the value are not affected
8341 // by the sign-extension of the lower 32 bits.
8342 // TODO: We could also catch sign extensions of a 32-bit value.
8343 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8344 const auto *CVal = cast<ConstantSDNode>(Val);
8345 if (isInt<32>(CVal->getSExtValue())) {
8346 IsLegalInsert = true;
8347 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8348 }
8349 }
8350
8351 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8352
8353 SDValue ValInVec;
8354
8355 if (IsLegalInsert) {
8356 unsigned Opc =
8358 if (isNullConstant(Idx)) {
8359 if (!VecVT.isFloatingPoint())
8360 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8361 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8362
8363 if (AlignedIdx)
8364 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8365 Vec, AlignedIdx);
8366 if (!VecVT.isFixedLengthVector())
8367 return Vec;
8368 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8369 }
8370 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8371 } else {
8372 // On RV32, i64-element vectors must be specially handled to place the
8373 // value at element 0, by using two vslide1down instructions in sequence on
8374 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8375 // this.
8376 SDValue ValLo, ValHi;
8377 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8378 MVT I32ContainerVT =
8379 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8380 SDValue I32Mask =
8381 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8382 // Limit the active VL to two.
8383 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8384 // If the Idx is 0 we can insert directly into the vector.
8385 if (isNullConstant(Idx)) {
8386 // First slide in the lo value, then the hi in above it. We use slide1down
8387 // to avoid the register group overlap constraint of vslide1up.
8388 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8389 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8390 // If the source vector is undef don't pass along the tail elements from
8391 // the previous slide1down.
8392 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8393 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8394 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8395 // Bitcast back to the right container type.
8396 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8397
8398 if (AlignedIdx)
8399 ValInVec =
8400 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8401 ValInVec, AlignedIdx);
8402 if (!VecVT.isFixedLengthVector())
8403 return ValInVec;
8404 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8405 }
8406
8407 // First slide in the lo value, then the hi in above it. We use slide1down
8408 // to avoid the register group overlap constraint of vslide1up.
8409 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8410 DAG.getUNDEF(I32ContainerVT),
8411 DAG.getUNDEF(I32ContainerVT), ValLo,
8412 I32Mask, InsertI64VL);
8413 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8414 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8415 I32Mask, InsertI64VL);
8416 // Bitcast back to the right container type.
8417 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8418 }
8419
8420 // Now that the value is in a vector, slide it into position.
8421 SDValue InsertVL =
8422 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8423
8424 // Use tail agnostic policy if Idx is the last index of Vec.
8426 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8427 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8428 Policy = RISCVII::TAIL_AGNOSTIC;
8429 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8430 Idx, Mask, InsertVL, Policy);
8431
8432 if (AlignedIdx)
8433 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8434 Slideup, AlignedIdx);
8435 if (!VecVT.isFixedLengthVector())
8436 return Slideup;
8437 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8438}
8439
8440// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8441// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8442// types this is done using VMV_X_S to allow us to glean information about the
8443// sign bits of the result.
8444SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8445 SelectionDAG &DAG) const {
8446 SDLoc DL(Op);
8447 SDValue Idx = Op.getOperand(1);
8448 SDValue Vec = Op.getOperand(0);
8449 EVT EltVT = Op.getValueType();
8450 MVT VecVT = Vec.getSimpleValueType();
8451 MVT XLenVT = Subtarget.getXLenVT();
8452
8453 if (VecVT.getVectorElementType() == MVT::i1) {
8454 // Use vfirst.m to extract the first bit.
8455 if (isNullConstant(Idx)) {
8456 MVT ContainerVT = VecVT;
8457 if (VecVT.isFixedLengthVector()) {
8458 ContainerVT = getContainerForFixedLengthVector(VecVT);
8459 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8460 }
8461 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8462 SDValue Vfirst =
8463 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8464 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8465 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8466 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8467 }
8468 if (VecVT.isFixedLengthVector()) {
8469 unsigned NumElts = VecVT.getVectorNumElements();
8470 if (NumElts >= 8) {
8471 MVT WideEltVT;
8472 unsigned WidenVecLen;
8473 SDValue ExtractElementIdx;
8474 SDValue ExtractBitIdx;
8475 unsigned MaxEEW = Subtarget.getELen();
8476 MVT LargestEltVT = MVT::getIntegerVT(
8477 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8478 if (NumElts <= LargestEltVT.getSizeInBits()) {
8479 assert(isPowerOf2_32(NumElts) &&
8480 "the number of elements should be power of 2");
8481 WideEltVT = MVT::getIntegerVT(NumElts);
8482 WidenVecLen = 1;
8483 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8484 ExtractBitIdx = Idx;
8485 } else {
8486 WideEltVT = LargestEltVT;
8487 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8488 // extract element index = index / element width
8489 ExtractElementIdx = DAG.getNode(
8490 ISD::SRL, DL, XLenVT, Idx,
8491 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8492 // mask bit index = index % element width
8493 ExtractBitIdx = DAG.getNode(
8494 ISD::AND, DL, XLenVT, Idx,
8495 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8496 }
8497 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8498 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8499 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8500 Vec, ExtractElementIdx);
8501 // Extract the bit from GPR.
8502 SDValue ShiftRight =
8503 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8504 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8505 DAG.getConstant(1, DL, XLenVT));
8506 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8507 }
8508 }
8509 // Otherwise, promote to an i8 vector and extract from that.
8510 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8511 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8512 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8513 }
8514
8515 // If this is a fixed vector, we need to convert it to a scalable vector.
8516 MVT ContainerVT = VecVT;
8517 if (VecVT.isFixedLengthVector()) {
8518 ContainerVT = getContainerForFixedLengthVector(VecVT);
8519 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8520 }
8521
8522 // If we're compiling for an exact VLEN value and we have a known
8523 // constant index, we can always perform the extract in m1 (or
8524 // smaller) as we can determine the register corresponding to
8525 // the index in the register group.
8526 const auto VLen = Subtarget.getRealVLen();
8527 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8528 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8529 MVT M1VT = getLMUL1VT(ContainerVT);
8530 unsigned OrigIdx = IdxC->getZExtValue();
8531 EVT ElemVT = VecVT.getVectorElementType();
8532 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8533 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8534 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8535 unsigned ExtractIdx =
8536 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8537 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8538 DAG.getVectorIdxConstant(ExtractIdx, DL));
8539 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8540 ContainerVT = M1VT;
8541 }
8542
8543 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8544 // contains our index.
8545 std::optional<uint64_t> MaxIdx;
8546 if (VecVT.isFixedLengthVector())
8547 MaxIdx = VecVT.getVectorNumElements() - 1;
8548 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8549 MaxIdx = IdxC->getZExtValue();
8550 if (MaxIdx) {
8551 if (auto SmallerVT =
8552 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8553 ContainerVT = *SmallerVT;
8554 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8555 DAG.getConstant(0, DL, XLenVT));
8556 }
8557 }
8558
8559 // If after narrowing, the required slide is still greater than LMUL2,
8560 // fallback to generic expansion and go through the stack. This is done
8561 // for a subtle reason: extracting *all* elements out of a vector is
8562 // widely expected to be linear in vector size, but because vslidedown
8563 // is linear in LMUL, performing N extracts using vslidedown becomes
8564 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8565 // seems to have the same problem (the store is linear in LMUL), but the
8566 // generic expansion *memoizes* the store, and thus for many extracts of
8567 // the same vector we end up with one store and a bunch of loads.
8568 // TODO: We don't have the same code for insert_vector_elt because we
8569 // have BUILD_VECTOR and handle the degenerate case there. Should we
8570 // consider adding an inverse BUILD_VECTOR node?
8571 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8572 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8573 return SDValue();
8574
8575 // If the index is 0, the vector is already in the right position.
8576 if (!isNullConstant(Idx)) {
8577 // Use a VL of 1 to avoid processing more elements than we need.
8578 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8579 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8580 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8581 }
8582
8583 if (!EltVT.isInteger()) {
8584 // Floating-point extracts are handled in TableGen.
8585 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8586 DAG.getVectorIdxConstant(0, DL));
8587 }
8588
8589 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8590 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8591}
8592
8593// Some RVV intrinsics may claim that they want an integer operand to be
8594// promoted or expanded.
8596 const RISCVSubtarget &Subtarget) {
8597 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8598 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8599 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8600 "Unexpected opcode");
8601
8602 if (!Subtarget.hasVInstructions())
8603 return SDValue();
8604
8605 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8606 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8607 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8608
8609 SDLoc DL(Op);
8610
8612 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8613 if (!II || !II->hasScalarOperand())
8614 return SDValue();
8615
8616 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8617 assert(SplatOp < Op.getNumOperands());
8618
8619 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8620 SDValue &ScalarOp = Operands[SplatOp];
8621 MVT OpVT = ScalarOp.getSimpleValueType();
8622 MVT XLenVT = Subtarget.getXLenVT();
8623
8624 // If this isn't a scalar, or its type is XLenVT we're done.
8625 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8626 return SDValue();
8627
8628 // Simplest case is that the operand needs to be promoted to XLenVT.
8629 if (OpVT.bitsLT(XLenVT)) {
8630 // If the operand is a constant, sign extend to increase our chances
8631 // of being able to use a .vi instruction. ANY_EXTEND would become a
8632 // a zero extend and the simm5 check in isel would fail.
8633 // FIXME: Should we ignore the upper bits in isel instead?
8634 unsigned ExtOpc =
8635 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8636 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8637 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8638 }
8639
8640 // Use the previous operand to get the vXi64 VT. The result might be a mask
8641 // VT for compares. Using the previous operand assumes that the previous
8642 // operand will never have a smaller element size than a scalar operand and
8643 // that a widening operation never uses SEW=64.
8644 // NOTE: If this fails the below assert, we can probably just find the
8645 // element count from any operand or result and use it to construct the VT.
8646 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8647 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8648
8649 // The more complex case is when the scalar is larger than XLenVT.
8650 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8651 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8652
8653 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8654 // instruction to sign-extend since SEW>XLEN.
8655 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8656 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8657 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8658 }
8659
8660 switch (IntNo) {
8661 case Intrinsic::riscv_vslide1up:
8662 case Intrinsic::riscv_vslide1down:
8663 case Intrinsic::riscv_vslide1up_mask:
8664 case Intrinsic::riscv_vslide1down_mask: {
8665 // We need to special case these when the scalar is larger than XLen.
8666 unsigned NumOps = Op.getNumOperands();
8667 bool IsMasked = NumOps == 7;
8668
8669 // Convert the vector source to the equivalent nxvXi32 vector.
8670 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8671 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8672 SDValue ScalarLo, ScalarHi;
8673 std::tie(ScalarLo, ScalarHi) =
8674 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8675
8676 // Double the VL since we halved SEW.
8677 SDValue AVL = getVLOperand(Op);
8678 SDValue I32VL;
8679
8680 // Optimize for constant AVL
8681 if (isa<ConstantSDNode>(AVL)) {
8682 const auto [MinVLMAX, MaxVLMAX] =
8684
8685 uint64_t AVLInt = AVL->getAsZExtVal();
8686 if (AVLInt <= MinVLMAX) {
8687 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8688 } else if (AVLInt >= 2 * MaxVLMAX) {
8689 // Just set vl to VLMAX in this situation
8691 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8692 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8693 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8694 SDValue SETVLMAX = DAG.getTargetConstant(
8695 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8696 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8697 LMUL);
8698 } else {
8699 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8700 // is related to the hardware implementation.
8701 // So let the following code handle
8702 }
8703 }
8704 if (!I32VL) {
8706 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8707 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8708 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8709 SDValue SETVL =
8710 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8711 // Using vsetvli instruction to get actually used length which related to
8712 // the hardware implementation
8713 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8714 SEW, LMUL);
8715 I32VL =
8716 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8717 }
8718
8719 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8720
8721 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8722 // instructions.
8723 SDValue Passthru;
8724 if (IsMasked)
8725 Passthru = DAG.getUNDEF(I32VT);
8726 else
8727 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8728
8729 if (IntNo == Intrinsic::riscv_vslide1up ||
8730 IntNo == Intrinsic::riscv_vslide1up_mask) {
8731 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8732 ScalarHi, I32Mask, I32VL);
8733 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8734 ScalarLo, I32Mask, I32VL);
8735 } else {
8736 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8737 ScalarLo, I32Mask, I32VL);
8738 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8739 ScalarHi, I32Mask, I32VL);
8740 }
8741
8742 // Convert back to nxvXi64.
8743 Vec = DAG.getBitcast(VT, Vec);
8744
8745 if (!IsMasked)
8746 return Vec;
8747 // Apply mask after the operation.
8748 SDValue Mask = Operands[NumOps - 3];
8749 SDValue MaskedOff = Operands[1];
8750 // Assume Policy operand is the last operand.
8751 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8752 // We don't need to select maskedoff if it's undef.
8753 if (MaskedOff.isUndef())
8754 return Vec;
8755 // TAMU
8756 if (Policy == RISCVII::TAIL_AGNOSTIC)
8757 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8758 DAG.getUNDEF(VT), AVL);
8759 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8760 // It's fine because vmerge does not care mask policy.
8761 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8762 MaskedOff, AVL);
8763 }
8764 }
8765
8766 // We need to convert the scalar to a splat vector.
8767 SDValue VL = getVLOperand(Op);
8768 assert(VL.getValueType() == XLenVT);
8769 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8770 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8771}
8772
8773// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8774// scalable vector llvm.get.vector.length for now.
8775//
8776// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8777// (vscale * VF). The vscale and VF are independent of element width. We use
8778// SEW=8 for the vsetvli because it is the only element width that supports all
8779// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8780// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8781// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8782// SEW and LMUL are better for the surrounding vector instructions.
8784 const RISCVSubtarget &Subtarget) {
8785 MVT XLenVT = Subtarget.getXLenVT();
8786
8787 // The smallest LMUL is only valid for the smallest element width.
8788 const unsigned ElementWidth = 8;
8789
8790 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8791 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8792 // We don't support VF==1 with ELEN==32.
8793 [[maybe_unused]] unsigned MinVF =
8794 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8795
8796 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8797 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8798 "Unexpected VF");
8799
8800 bool Fractional = VF < LMul1VF;
8801 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8802 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8803 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8804
8805 SDLoc DL(N);
8806
8807 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8808 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8809
8810 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8811
8812 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8813 SDValue Res =
8814 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8815 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8816}
8817
8819 const RISCVSubtarget &Subtarget) {
8820 SDValue Op0 = N->getOperand(1);
8821 MVT OpVT = Op0.getSimpleValueType();
8822 MVT ContainerVT = OpVT;
8823 if (OpVT.isFixedLengthVector()) {
8824 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
8825 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
8826 }
8827 MVT XLenVT = Subtarget.getXLenVT();
8828 SDLoc DL(N);
8829 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
8830 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
8831 if (isOneConstant(N->getOperand(2)))
8832 return Res;
8833
8834 // Convert -1 to VL.
8835 SDValue Setcc =
8836 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
8837 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
8838 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
8839}
8840
8841static inline void promoteVCIXScalar(const SDValue &Op,
8843 SelectionDAG &DAG) {
8844 const RISCVSubtarget &Subtarget =
8846
8847 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8848 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8849 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8850 SDLoc DL(Op);
8851
8853 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8854 if (!II || !II->hasScalarOperand())
8855 return;
8856
8857 unsigned SplatOp = II->ScalarOperand + 1;
8858 assert(SplatOp < Op.getNumOperands());
8859
8860 SDValue &ScalarOp = Operands[SplatOp];
8861 MVT OpVT = ScalarOp.getSimpleValueType();
8862 MVT XLenVT = Subtarget.getXLenVT();
8863
8864 // The code below is partially copied from lowerVectorIntrinsicScalars.
8865 // If this isn't a scalar, or its type is XLenVT we're done.
8866 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8867 return;
8868
8869 // Manually emit promote operation for scalar operation.
8870 if (OpVT.bitsLT(XLenVT)) {
8871 unsigned ExtOpc =
8872 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8873 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8874 }
8875
8876 return;
8877}
8878
8879static void processVCIXOperands(SDValue &OrigOp,
8881 SelectionDAG &DAG) {
8882 promoteVCIXScalar(OrigOp, Operands, DAG);
8883 const RISCVSubtarget &Subtarget =
8885 for (SDValue &V : Operands) {
8886 EVT ValType = V.getValueType();
8887 if (ValType.isVector() && ValType.isFloatingPoint()) {
8888 MVT InterimIVT =
8889 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8890 ValType.getVectorElementCount());
8891 V = DAG.getBitcast(InterimIVT, V);
8892 }
8893 if (ValType.isFixedLengthVector()) {
8894 MVT OpContainerVT = getContainerForFixedLengthVector(
8895 DAG, V.getSimpleValueType(), Subtarget);
8896 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8897 }
8898 }
8899}
8900
8901// LMUL * VLEN should be greater than or equal to EGS * SEW
8902static inline bool isValidEGW(int EGS, EVT VT,
8903 const RISCVSubtarget &Subtarget) {
8904 return (Subtarget.getRealMinVLen() *
8906 EGS * VT.getScalarSizeInBits();
8907}
8908
8909SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8910 SelectionDAG &DAG) const {
8911 unsigned IntNo = Op.getConstantOperandVal(0);
8912 SDLoc DL(Op);
8913 MVT XLenVT = Subtarget.getXLenVT();
8914
8915 switch (IntNo) {
8916 default:
8917 break; // Don't custom lower most intrinsics.
8918 case Intrinsic::thread_pointer: {
8919 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8920 return DAG.getRegister(RISCV::X4, PtrVT);
8921 }
8922 case Intrinsic::riscv_orc_b:
8923 case Intrinsic::riscv_brev8:
8924 case Intrinsic::riscv_sha256sig0:
8925 case Intrinsic::riscv_sha256sig1:
8926 case Intrinsic::riscv_sha256sum0:
8927 case Intrinsic::riscv_sha256sum1:
8928 case Intrinsic::riscv_sm3p0:
8929 case Intrinsic::riscv_sm3p1: {
8930 unsigned Opc;
8931 switch (IntNo) {
8932 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8933 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8934 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8935 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8936 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8937 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8938 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8939 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8940 }
8941
8942 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8943 SDValue NewOp =
8944 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8945 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8946 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8947 }
8948
8949 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8950 }
8951 case Intrinsic::riscv_sm4ks:
8952 case Intrinsic::riscv_sm4ed: {
8953 unsigned Opc =
8954 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8955
8956 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8957 SDValue NewOp0 =
8958 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8959 SDValue NewOp1 =
8960 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8961 SDValue Res =
8962 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8963 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8964 }
8965
8966 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8967 Op.getOperand(3));
8968 }
8969 case Intrinsic::riscv_zip:
8970 case Intrinsic::riscv_unzip: {
8971 unsigned Opc =
8972 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8973 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8974 }
8975 case Intrinsic::riscv_mopr: {
8976 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8977 SDValue NewOp =
8978 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8979 SDValue Res = DAG.getNode(
8980 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8981 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8982 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8983 }
8984 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8985 Op.getOperand(2));
8986 }
8987
8988 case Intrinsic::riscv_moprr: {
8989 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8990 SDValue NewOp0 =
8991 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8992 SDValue NewOp1 =
8993 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8994 SDValue Res = DAG.getNode(
8995 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8996 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8997 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8998 }
8999 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9000 Op.getOperand(2), Op.getOperand(3));
9001 }
9002 case Intrinsic::riscv_clmul:
9003 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9004 SDValue NewOp0 =
9005 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9006 SDValue NewOp1 =
9007 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9008 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
9009 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9010 }
9011 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9012 Op.getOperand(2));
9013 case Intrinsic::riscv_clmulh:
9014 case Intrinsic::riscv_clmulr: {
9015 unsigned Opc =
9016 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9017 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
9018 SDValue NewOp0 =
9019 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
9020 SDValue NewOp1 =
9021 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
9022 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
9023 DAG.getConstant(32, DL, MVT::i64));
9024 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
9025 DAG.getConstant(32, DL, MVT::i64));
9026 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
9027 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
9028 DAG.getConstant(32, DL, MVT::i64));
9029 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
9030 }
9031
9032 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9033 }
9034 case Intrinsic::experimental_get_vector_length:
9035 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9036 case Intrinsic::experimental_cttz_elts:
9037 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9038 case Intrinsic::riscv_vmv_x_s: {
9039 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9040 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9041 }
9042 case Intrinsic::riscv_vfmv_f_s:
9043 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9044 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9045 case Intrinsic::riscv_vmv_v_x:
9046 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9047 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9048 Subtarget);
9049 case Intrinsic::riscv_vfmv_v_f:
9050 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9051 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9052 case Intrinsic::riscv_vmv_s_x: {
9053 SDValue Scalar = Op.getOperand(2);
9054
9055 if (Scalar.getValueType().bitsLE(XLenVT)) {
9056 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9057 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9058 Op.getOperand(1), Scalar, Op.getOperand(3));
9059 }
9060
9061 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9062
9063 // This is an i64 value that lives in two scalar registers. We have to
9064 // insert this in a convoluted way. First we build vXi64 splat containing
9065 // the two values that we assemble using some bit math. Next we'll use
9066 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9067 // to merge element 0 from our splat into the source vector.
9068 // FIXME: This is probably not the best way to do this, but it is
9069 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9070 // point.
9071 // sw lo, (a0)
9072 // sw hi, 4(a0)
9073 // vlse vX, (a0)
9074 //
9075 // vid.v vVid
9076 // vmseq.vx mMask, vVid, 0
9077 // vmerge.vvm vDest, vSrc, vVal, mMask
9078 MVT VT = Op.getSimpleValueType();
9079 SDValue Vec = Op.getOperand(1);
9080 SDValue VL = getVLOperand(Op);
9081
9082 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9083 if (Op.getOperand(1).isUndef())
9084 return SplattedVal;
9085 SDValue SplattedIdx =
9086 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9087 DAG.getConstant(0, DL, MVT::i32), VL);
9088
9089 MVT MaskVT = getMaskTypeFor(VT);
9090 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9091 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9092 SDValue SelectCond =
9093 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9094 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9095 DAG.getUNDEF(MaskVT), Mask, VL});
9096 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9097 Vec, DAG.getUNDEF(VT), VL);
9098 }
9099 case Intrinsic::riscv_vfmv_s_f:
9100 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9101 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9102 // EGS * EEW >= 128 bits
9103 case Intrinsic::riscv_vaesdf_vv:
9104 case Intrinsic::riscv_vaesdf_vs:
9105 case Intrinsic::riscv_vaesdm_vv:
9106 case Intrinsic::riscv_vaesdm_vs:
9107 case Intrinsic::riscv_vaesef_vv:
9108 case Intrinsic::riscv_vaesef_vs:
9109 case Intrinsic::riscv_vaesem_vv:
9110 case Intrinsic::riscv_vaesem_vs:
9111 case Intrinsic::riscv_vaeskf1:
9112 case Intrinsic::riscv_vaeskf2:
9113 case Intrinsic::riscv_vaesz_vs:
9114 case Intrinsic::riscv_vsm4k:
9115 case Intrinsic::riscv_vsm4r_vv:
9116 case Intrinsic::riscv_vsm4r_vs: {
9117 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9118 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9119 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9120 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9121 return Op;
9122 }
9123 // EGS * EEW >= 256 bits
9124 case Intrinsic::riscv_vsm3c:
9125 case Intrinsic::riscv_vsm3me: {
9126 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9127 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9128 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9129 return Op;
9130 }
9131 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9132 case Intrinsic::riscv_vsha2ch:
9133 case Intrinsic::riscv_vsha2cl:
9134 case Intrinsic::riscv_vsha2ms: {
9135 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9136 !Subtarget.hasStdExtZvknhb())
9137 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9138 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9139 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9140 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9141 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9142 return Op;
9143 }
9144 case Intrinsic::riscv_sf_vc_v_x:
9145 case Intrinsic::riscv_sf_vc_v_i:
9146 case Intrinsic::riscv_sf_vc_v_xv:
9147 case Intrinsic::riscv_sf_vc_v_iv:
9148 case Intrinsic::riscv_sf_vc_v_vv:
9149 case Intrinsic::riscv_sf_vc_v_fv:
9150 case Intrinsic::riscv_sf_vc_v_xvv:
9151 case Intrinsic::riscv_sf_vc_v_ivv:
9152 case Intrinsic::riscv_sf_vc_v_vvv:
9153 case Intrinsic::riscv_sf_vc_v_fvv:
9154 case Intrinsic::riscv_sf_vc_v_xvw:
9155 case Intrinsic::riscv_sf_vc_v_ivw:
9156 case Intrinsic::riscv_sf_vc_v_vvw:
9157 case Intrinsic::riscv_sf_vc_v_fvw: {
9158 MVT VT = Op.getSimpleValueType();
9159
9160 SmallVector<SDValue> Operands{Op->op_values()};
9162
9163 MVT RetVT = VT;
9164 if (VT.isFixedLengthVector())
9166 else if (VT.isFloatingPoint())
9169
9170 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9171
9172 if (VT.isFixedLengthVector())
9173 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9174 else if (VT.isFloatingPoint())
9175 NewNode = DAG.getBitcast(VT, NewNode);
9176
9177 if (Op == NewNode)
9178 break;
9179
9180 return NewNode;
9181 }
9182 }
9183
9184 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9185}
9186
9188 unsigned Type) {
9189 SDLoc DL(Op);
9190 SmallVector<SDValue> Operands{Op->op_values()};
9191 Operands.erase(Operands.begin() + 1);
9192
9193 const RISCVSubtarget &Subtarget =
9195 MVT VT = Op.getSimpleValueType();
9196 MVT RetVT = VT;
9197 MVT FloatVT = VT;
9198
9199 if (VT.isFloatingPoint()) {
9200 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9202 FloatVT = RetVT;
9203 }
9204 if (VT.isFixedLengthVector())
9206 Subtarget);
9207
9209
9210 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9211 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9212 SDValue Chain = NewNode.getValue(1);
9213
9214 if (VT.isFixedLengthVector())
9215 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9216 if (VT.isFloatingPoint())
9217 NewNode = DAG.getBitcast(VT, NewNode);
9218
9219 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9220
9221 return NewNode;
9222}
9223
9225 unsigned Type) {
9226 SmallVector<SDValue> Operands{Op->op_values()};
9227 Operands.erase(Operands.begin() + 1);
9229
9230 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9231}
9232
9233SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9234 SelectionDAG &DAG) const {
9235 unsigned IntNo = Op.getConstantOperandVal(1);
9236 switch (IntNo) {
9237 default:
9238 break;
9239 case Intrinsic::riscv_masked_strided_load: {
9240 SDLoc DL(Op);
9241 MVT XLenVT = Subtarget.getXLenVT();
9242
9243 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9244 // the selection of the masked intrinsics doesn't do this for us.
9245 SDValue Mask = Op.getOperand(5);
9246 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9247
9248 MVT VT = Op->getSimpleValueType(0);
9249 MVT ContainerVT = VT;
9250 if (VT.isFixedLengthVector())
9251 ContainerVT = getContainerForFixedLengthVector(VT);
9252
9253 SDValue PassThru = Op.getOperand(2);
9254 if (!IsUnmasked) {
9255 MVT MaskVT = getMaskTypeFor(ContainerVT);
9256 if (VT.isFixedLengthVector()) {
9257 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9258 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9259 }
9260 }
9261
9262 auto *Load = cast<MemIntrinsicSDNode>(Op);
9263 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9264 SDValue Ptr = Op.getOperand(3);
9265 SDValue Stride = Op.getOperand(4);
9266 SDValue Result, Chain;
9267
9268 // TODO: We restrict this to unmasked loads currently in consideration of
9269 // the complexity of handling all falses masks.
9270 MVT ScalarVT = ContainerVT.getVectorElementType();
9271 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9272 SDValue ScalarLoad =
9273 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9274 ScalarVT, Load->getMemOperand());
9275 Chain = ScalarLoad.getValue(1);
9276 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9277 Subtarget);
9278 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9279 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9280 Load->getMemOperand());
9281 Chain = ScalarLoad.getValue(1);
9282 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9283 } else {
9284 SDValue IntID = DAG.getTargetConstant(
9285 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9286 XLenVT);
9287
9288 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9289 if (IsUnmasked)
9290 Ops.push_back(DAG.getUNDEF(ContainerVT));
9291 else
9292 Ops.push_back(PassThru);
9293 Ops.push_back(Ptr);
9294 Ops.push_back(Stride);
9295 if (!IsUnmasked)
9296 Ops.push_back(Mask);
9297 Ops.push_back(VL);
9298 if (!IsUnmasked) {
9299 SDValue Policy =
9301 Ops.push_back(Policy);
9302 }
9303
9304 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9305 Result =
9307 Load->getMemoryVT(), Load->getMemOperand());
9308 Chain = Result.getValue(1);
9309 }
9310 if (VT.isFixedLengthVector())
9311 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9312 return DAG.getMergeValues({Result, Chain}, DL);
9313 }
9314 case Intrinsic::riscv_seg2_load:
9315 case Intrinsic::riscv_seg3_load:
9316 case Intrinsic::riscv_seg4_load:
9317 case Intrinsic::riscv_seg5_load:
9318 case Intrinsic::riscv_seg6_load:
9319 case Intrinsic::riscv_seg7_load:
9320 case Intrinsic::riscv_seg8_load: {
9321 SDLoc DL(Op);
9322 static const Intrinsic::ID VlsegInts[7] = {
9323 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9324 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9325 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9326 Intrinsic::riscv_vlseg8};
9327 unsigned NF = Op->getNumValues() - 1;
9328 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9329 MVT XLenVT = Subtarget.getXLenVT();
9330 MVT VT = Op->getSimpleValueType(0);
9331 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9332
9333 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9334 Subtarget);
9335 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9336 auto *Load = cast<MemIntrinsicSDNode>(Op);
9337 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9338 ContainerVTs.push_back(MVT::Other);
9339 SDVTList VTs = DAG.getVTList(ContainerVTs);
9340 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9341 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9342 Ops.push_back(Op.getOperand(2));
9343 Ops.push_back(VL);
9344 SDValue Result =
9346 Load->getMemoryVT(), Load->getMemOperand());
9348 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9349 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9350 DAG, Subtarget));
9351 Results.push_back(Result.getValue(NF));
9352 return DAG.getMergeValues(Results, DL);
9353 }
9354 case Intrinsic::riscv_sf_vc_v_x_se:
9356 case Intrinsic::riscv_sf_vc_v_i_se:
9358 case Intrinsic::riscv_sf_vc_v_xv_se:
9360 case Intrinsic::riscv_sf_vc_v_iv_se:
9362 case Intrinsic::riscv_sf_vc_v_vv_se:
9364 case Intrinsic::riscv_sf_vc_v_fv_se:
9366 case Intrinsic::riscv_sf_vc_v_xvv_se:
9368 case Intrinsic::riscv_sf_vc_v_ivv_se:
9370 case Intrinsic::riscv_sf_vc_v_vvv_se:
9372 case Intrinsic::riscv_sf_vc_v_fvv_se:
9374 case Intrinsic::riscv_sf_vc_v_xvw_se:
9376 case Intrinsic::riscv_sf_vc_v_ivw_se:
9378 case Intrinsic::riscv_sf_vc_v_vvw_se:
9380 case Intrinsic::riscv_sf_vc_v_fvw_se:
9382 }
9383
9384 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9385}
9386
9387SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9388 SelectionDAG &DAG) const {
9389 unsigned IntNo = Op.getConstantOperandVal(1);
9390 switch (IntNo) {
9391 default:
9392 break;
9393 case Intrinsic::riscv_masked_strided_store: {
9394 SDLoc DL(Op);
9395 MVT XLenVT = Subtarget.getXLenVT();
9396
9397 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9398 // the selection of the masked intrinsics doesn't do this for us.
9399 SDValue Mask = Op.getOperand(5);
9400 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9401
9402 SDValue Val = Op.getOperand(2);
9403 MVT VT = Val.getSimpleValueType();
9404 MVT ContainerVT = VT;
9405 if (VT.isFixedLengthVector()) {
9406 ContainerVT = getContainerForFixedLengthVector(VT);
9407 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9408 }
9409 if (!IsUnmasked) {
9410 MVT MaskVT = getMaskTypeFor(ContainerVT);
9411 if (VT.isFixedLengthVector())
9412 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9413 }
9414
9415 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9416
9417 SDValue IntID = DAG.getTargetConstant(
9418 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9419 XLenVT);
9420
9421 auto *Store = cast<MemIntrinsicSDNode>(Op);
9422 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9423 Ops.push_back(Val);
9424 Ops.push_back(Op.getOperand(3)); // Ptr
9425 Ops.push_back(Op.getOperand(4)); // Stride
9426 if (!IsUnmasked)
9427 Ops.push_back(Mask);
9428 Ops.push_back(VL);
9429
9430 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9431 Ops, Store->getMemoryVT(),
9432 Store->getMemOperand());
9433 }
9434 case Intrinsic::riscv_seg2_store:
9435 case Intrinsic::riscv_seg3_store:
9436 case Intrinsic::riscv_seg4_store:
9437 case Intrinsic::riscv_seg5_store:
9438 case Intrinsic::riscv_seg6_store:
9439 case Intrinsic::riscv_seg7_store:
9440 case Intrinsic::riscv_seg8_store: {
9441 SDLoc DL(Op);
9442 static const Intrinsic::ID VssegInts[] = {
9443 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9444 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9445 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9446 Intrinsic::riscv_vsseg8};
9447 // Operands are (chain, int_id, vec*, ptr, vl)
9448 unsigned NF = Op->getNumOperands() - 4;
9449 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9450 MVT XLenVT = Subtarget.getXLenVT();
9451 MVT VT = Op->getOperand(2).getSimpleValueType();
9452 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9453
9454 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9455 Subtarget);
9456 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9457 SDValue Ptr = Op->getOperand(NF + 2);
9458
9459 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9460 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9461 for (unsigned i = 0; i < NF; i++)
9463 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9464 Ops.append({Ptr, VL});
9465
9466 return DAG.getMemIntrinsicNode(
9467 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9468 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9469 }
9470 case Intrinsic::riscv_sf_vc_xv_se:
9472 case Intrinsic::riscv_sf_vc_iv_se:
9474 case Intrinsic::riscv_sf_vc_vv_se:
9476 case Intrinsic::riscv_sf_vc_fv_se:
9478 case Intrinsic::riscv_sf_vc_xvv_se:
9480 case Intrinsic::riscv_sf_vc_ivv_se:
9482 case Intrinsic::riscv_sf_vc_vvv_se:
9484 case Intrinsic::riscv_sf_vc_fvv_se:
9486 case Intrinsic::riscv_sf_vc_xvw_se:
9488 case Intrinsic::riscv_sf_vc_ivw_se:
9490 case Intrinsic::riscv_sf_vc_vvw_se:
9492 case Intrinsic::riscv_sf_vc_fvw_se:
9494 }
9495
9496 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9497}
9498
9499static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9500 switch (ISDOpcode) {
9501 default:
9502 llvm_unreachable("Unhandled reduction");
9503 case ISD::VP_REDUCE_ADD:
9504 case ISD::VECREDUCE_ADD:
9506 case ISD::VP_REDUCE_UMAX:
9509 case ISD::VP_REDUCE_SMAX:
9512 case ISD::VP_REDUCE_UMIN:
9515 case ISD::VP_REDUCE_SMIN:
9518 case ISD::VP_REDUCE_AND:
9519 case ISD::VECREDUCE_AND:
9521 case ISD::VP_REDUCE_OR:
9522 case ISD::VECREDUCE_OR:
9524 case ISD::VP_REDUCE_XOR:
9525 case ISD::VECREDUCE_XOR:
9527 case ISD::VP_REDUCE_FADD:
9529 case ISD::VP_REDUCE_SEQ_FADD:
9531 case ISD::VP_REDUCE_FMAX:
9532 case ISD::VP_REDUCE_FMAXIMUM:
9534 case ISD::VP_REDUCE_FMIN:
9535 case ISD::VP_REDUCE_FMINIMUM:
9537 }
9538
9539}
9540
9541SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9542 SelectionDAG &DAG,
9543 bool IsVP) const {
9544 SDLoc DL(Op);
9545 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9546 MVT VecVT = Vec.getSimpleValueType();
9547 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9548 Op.getOpcode() == ISD::VECREDUCE_OR ||
9549 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9550 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9551 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9552 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9553 "Unexpected reduction lowering");
9554
9555 MVT XLenVT = Subtarget.getXLenVT();
9556
9557 MVT ContainerVT = VecVT;
9558 if (VecVT.isFixedLengthVector()) {
9559 ContainerVT = getContainerForFixedLengthVector(VecVT);
9560 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9561 }
9562
9563 SDValue Mask, VL;
9564 if (IsVP) {
9565 Mask = Op.getOperand(2);
9566 VL = Op.getOperand(3);
9567 } else {
9568 std::tie(Mask, VL) =
9569 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9570 }
9571
9572 unsigned BaseOpc;
9574 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9575
9576 switch (Op.getOpcode()) {
9577 default:
9578 llvm_unreachable("Unhandled reduction");
9579 case ISD::VECREDUCE_AND:
9580 case ISD::VP_REDUCE_AND: {
9581 // vcpop ~x == 0
9582 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9583 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9584 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9585 CC = ISD::SETEQ;
9586 BaseOpc = ISD::AND;
9587 break;
9588 }
9589 case ISD::VECREDUCE_OR:
9590 case ISD::VP_REDUCE_OR:
9591 // vcpop x != 0
9592 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9593 CC = ISD::SETNE;
9594 BaseOpc = ISD::OR;
9595 break;
9596 case ISD::VECREDUCE_XOR:
9597 case ISD::VP_REDUCE_XOR: {
9598 // ((vcpop x) & 1) != 0
9599 SDValue One = DAG.getConstant(1, DL, XLenVT);
9600 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9601 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9602 CC = ISD::SETNE;
9603 BaseOpc = ISD::XOR;
9604 break;
9605 }
9606 }
9607
9608 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9609 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9610
9611 if (!IsVP)
9612 return SetCC;
9613
9614 // Now include the start value in the operation.
9615 // Note that we must return the start value when no elements are operated
9616 // upon. The vcpop instructions we've emitted in each case above will return
9617 // 0 for an inactive vector, and so we've already received the neutral value:
9618 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9619 // can simply include the start value.
9620 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9621}
9622
9623static bool isNonZeroAVL(SDValue AVL) {
9624 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9625 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9626 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9627 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9628}
9629
9630/// Helper to lower a reduction sequence of the form:
9631/// scalar = reduce_op vec, scalar_start
9632static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9633 SDValue StartValue, SDValue Vec, SDValue Mask,
9634 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9635 const RISCVSubtarget &Subtarget) {
9636 const MVT VecVT = Vec.getSimpleValueType();
9637 const MVT M1VT = getLMUL1VT(VecVT);
9638 const MVT XLenVT = Subtarget.getXLenVT();
9639 const bool NonZeroAVL = isNonZeroAVL(VL);
9640
9641 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9642 // or the original VT if fractional.
9643 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9644 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9645 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9646 // be the result of the reduction operation.
9647 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9648 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9649 DAG, Subtarget);
9650 if (M1VT != InnerVT)
9651 InitialValue =
9652 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9653 InitialValue, DAG.getVectorIdxConstant(0, DL));
9654 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9655 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9656 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9657 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9658 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9659 DAG.getVectorIdxConstant(0, DL));
9660}
9661
9662SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9663 SelectionDAG &DAG) const {
9664 SDLoc DL(Op);
9665 SDValue Vec = Op.getOperand(0);
9666 EVT VecEVT = Vec.getValueType();
9667
9668 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9669
9670 // Due to ordering in legalize types we may have a vector type that needs to
9671 // be split. Do that manually so we can get down to a legal type.
9672 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9674 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9675 VecEVT = Lo.getValueType();
9676 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9677 }
9678
9679 // TODO: The type may need to be widened rather than split. Or widened before
9680 // it can be split.
9681 if (!isTypeLegal(VecEVT))
9682 return SDValue();
9683
9684 MVT VecVT = VecEVT.getSimpleVT();
9685 MVT VecEltVT = VecVT.getVectorElementType();
9686 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9687
9688 MVT ContainerVT = VecVT;
9689 if (VecVT.isFixedLengthVector()) {
9690 ContainerVT = getContainerForFixedLengthVector(VecVT);
9691 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9692 }
9693
9694 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9695
9696 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9697 switch (BaseOpc) {
9698 case ISD::AND:
9699 case ISD::OR:
9700 case ISD::UMAX:
9701 case ISD::UMIN:
9702 case ISD::SMAX:
9703 case ISD::SMIN:
9704 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9705 DAG.getVectorIdxConstant(0, DL));
9706 }
9707 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9708 Mask, VL, DL, DAG, Subtarget);
9709}
9710
9711// Given a reduction op, this function returns the matching reduction opcode,
9712// the vector SDValue and the scalar SDValue required to lower this to a
9713// RISCVISD node.
9714static std::tuple<unsigned, SDValue, SDValue>
9716 const RISCVSubtarget &Subtarget) {
9717 SDLoc DL(Op);
9718 auto Flags = Op->getFlags();
9719 unsigned Opcode = Op.getOpcode();
9720 switch (Opcode) {
9721 default:
9722 llvm_unreachable("Unhandled reduction");
9723 case ISD::VECREDUCE_FADD: {
9724 // Use positive zero if we can. It is cheaper to materialize.
9725 SDValue Zero =
9726 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9727 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9728 }
9730 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9731 Op.getOperand(0));
9735 case ISD::VECREDUCE_FMAX: {
9736 SDValue Front =
9737 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9738 DAG.getVectorIdxConstant(0, DL));
9739 unsigned RVVOpc =
9740 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9743 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9744 }
9745 }
9746}
9747
9748SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9749 SelectionDAG &DAG) const {
9750 SDLoc DL(Op);
9751 MVT VecEltVT = Op.getSimpleValueType();
9752
9753 unsigned RVVOpcode;
9754 SDValue VectorVal, ScalarVal;
9755 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9756 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9757 MVT VecVT = VectorVal.getSimpleValueType();
9758
9759 MVT ContainerVT = VecVT;
9760 if (VecVT.isFixedLengthVector()) {
9761 ContainerVT = getContainerForFixedLengthVector(VecVT);
9762 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9763 }
9764
9765 MVT ResVT = Op.getSimpleValueType();
9766 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9767 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9768 VL, DL, DAG, Subtarget);
9769 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9770 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9771 return Res;
9772
9773 if (Op->getFlags().hasNoNaNs())
9774 return Res;
9775
9776 // Force output to NaN if any element is Nan.
9777 SDValue IsNan =
9778 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9779 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9780 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9781 MVT XLenVT = Subtarget.getXLenVT();
9782 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9783 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9784 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9785 return DAG.getSelect(
9786 DL, ResVT, NoNaNs, Res,
9788 ResVT));
9789}
9790
9791SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9792 SelectionDAG &DAG) const {
9793 SDLoc DL(Op);
9794 unsigned Opc = Op.getOpcode();
9795 SDValue Start = Op.getOperand(0);
9796 SDValue Vec = Op.getOperand(1);
9797 EVT VecEVT = Vec.getValueType();
9798 MVT XLenVT = Subtarget.getXLenVT();
9799
9800 // TODO: The type may need to be widened rather than split. Or widened before
9801 // it can be split.
9802 if (!isTypeLegal(VecEVT))
9803 return SDValue();
9804
9805 MVT VecVT = VecEVT.getSimpleVT();
9806 unsigned RVVOpcode = getRVVReductionOp(Opc);
9807
9808 if (VecVT.isFixedLengthVector()) {
9809 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9810 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9811 }
9812
9813 SDValue VL = Op.getOperand(3);
9814 SDValue Mask = Op.getOperand(2);
9815 SDValue Res =
9816 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9817 Vec, Mask, VL, DL, DAG, Subtarget);
9818 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
9819 Op->getFlags().hasNoNaNs())
9820 return Res;
9821
9822 // Propagate NaNs.
9823 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
9824 // Check if any of the elements in Vec is NaN.
9825 SDValue IsNaN = DAG.getNode(
9826 RISCVISD::SETCC_VL, DL, PredVT,
9827 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
9828 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
9829 // Check if the start value is NaN.
9830 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
9831 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
9832 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
9833 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9834 MVT ResVT = Res.getSimpleValueType();
9835 return DAG.getSelect(
9836 DL, ResVT, NoNaNs, Res,
9838 ResVT));
9839}
9840
9841SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9842 SelectionDAG &DAG) const {
9843 SDValue Vec = Op.getOperand(0);
9844 SDValue SubVec = Op.getOperand(1);
9845 MVT VecVT = Vec.getSimpleValueType();
9846 MVT SubVecVT = SubVec.getSimpleValueType();
9847
9848 SDLoc DL(Op);
9849 MVT XLenVT = Subtarget.getXLenVT();
9850 unsigned OrigIdx = Op.getConstantOperandVal(2);
9851 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9852
9853 // We don't have the ability to slide mask vectors up indexed by their i1
9854 // elements; the smallest we can do is i8. Often we are able to bitcast to
9855 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9856 // into a scalable one, we might not necessarily have enough scalable
9857 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9858 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9859 (OrigIdx != 0 || !Vec.isUndef())) {
9860 if (VecVT.getVectorMinNumElements() >= 8 &&
9861 SubVecVT.getVectorMinNumElements() >= 8) {
9862 assert(OrigIdx % 8 == 0 && "Invalid index");
9863 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9864 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9865 "Unexpected mask vector lowering");
9866 OrigIdx /= 8;
9867 SubVecVT =
9868 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9869 SubVecVT.isScalableVector());
9870 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9871 VecVT.isScalableVector());
9872 Vec = DAG.getBitcast(VecVT, Vec);
9873 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9874 } else {
9875 // We can't slide this mask vector up indexed by its i1 elements.
9876 // This poses a problem when we wish to insert a scalable vector which
9877 // can't be re-expressed as a larger type. Just choose the slow path and
9878 // extend to a larger type, then truncate back down.
9879 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9880 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9881 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9882 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9883 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9884 Op.getOperand(2));
9885 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9886 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9887 }
9888 }
9889
9890 // If the subvector vector is a fixed-length type and we don't know VLEN
9891 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9892 // don't know which register of a LMUL group contains the specific subvector
9893 // as we only know the minimum register size. Therefore we must slide the
9894 // vector group up the full amount.
9895 const auto VLen = Subtarget.getRealVLen();
9896 if (SubVecVT.isFixedLengthVector() && !VLen) {
9897 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9898 return Op;
9899 MVT ContainerVT = VecVT;
9900 if (VecVT.isFixedLengthVector()) {
9901 ContainerVT = getContainerForFixedLengthVector(VecVT);
9902 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9903 }
9904
9905 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9906 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9907 DAG.getUNDEF(ContainerVT), SubVec,
9908 DAG.getVectorIdxConstant(0, DL));
9909 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9910 return DAG.getBitcast(Op.getValueType(), SubVec);
9911 }
9912
9913 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9914 DAG.getUNDEF(ContainerVT), SubVec,
9915 DAG.getVectorIdxConstant(0, DL));
9916 SDValue Mask =
9917 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9918 // Set the vector length to only the number of elements we care about. Note
9919 // that for slideup this includes the offset.
9920 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9921 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9922
9923 // Use tail agnostic policy if we're inserting over Vec's tail.
9925 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9926 Policy = RISCVII::TAIL_AGNOSTIC;
9927
9928 // If we're inserting into the lowest elements, use a tail undisturbed
9929 // vmv.v.v.
9930 if (OrigIdx == 0) {
9931 SubVec =
9932 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9933 } else {
9934 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9935 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9936 SlideupAmt, Mask, VL, Policy);
9937 }
9938
9939 if (VecVT.isFixedLengthVector())
9940 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9941 return DAG.getBitcast(Op.getValueType(), SubVec);
9942 }
9943
9944 MVT ContainerVecVT = VecVT;
9945 if (VecVT.isFixedLengthVector()) {
9946 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
9947 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
9948 }
9949
9950 MVT ContainerSubVecVT = SubVecVT;
9951 if (SubVecVT.isFixedLengthVector()) {
9952 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9953 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
9954 }
9955
9956 unsigned SubRegIdx;
9957 ElementCount RemIdx;
9958 // insert_subvector scales the index by vscale if the subvector is scalable,
9959 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9960 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9961 if (SubVecVT.isFixedLengthVector()) {
9962 assert(VLen);
9963 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9964 auto Decompose =
9966 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9967 SubRegIdx = Decompose.first;
9968 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9969 (OrigIdx % Vscale));
9970 } else {
9971 auto Decompose =
9973 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
9974 SubRegIdx = Decompose.first;
9975 RemIdx = ElementCount::getScalable(Decompose.second);
9976 }
9977
9980 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
9981 bool ExactlyVecRegSized =
9982 Subtarget.expandVScale(SubVecVT.getSizeInBits())
9983 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
9984
9985 // 1. If the Idx has been completely eliminated and this subvector's size is
9986 // a vector register or a multiple thereof, or the surrounding elements are
9987 // undef, then this is a subvector insert which naturally aligns to a vector
9988 // register. These can easily be handled using subregister manipulation.
9989 // 2. If the subvector isn't an exact multiple of a valid register group size,
9990 // then the insertion must preserve the undisturbed elements of the register.
9991 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
9992 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
9993 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
9994 // of that LMUL=1 type back into the larger vector (resolving to another
9995 // subregister operation). See below for how our VSLIDEUP works. We go via a
9996 // LMUL=1 type to avoid allocating a large register group to hold our
9997 // subvector.
9998 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
9999 if (SubVecVT.isFixedLengthVector()) {
10000 // We may get NoSubRegister if inserting at index 0 and the subvec
10001 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10002 if (SubRegIdx == RISCV::NoSubRegister) {
10003 assert(OrigIdx == 0);
10004 return Op;
10005 }
10006
10007 SDValue Insert =
10008 DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
10009 if (VecVT.isFixedLengthVector())
10010 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10011 return Insert;
10012 }
10013 return Op;
10014 }
10015
10016 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10017 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10018 // (in our case undisturbed). This means we can set up a subvector insertion
10019 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10020 // size of the subvector.
10021 MVT InterSubVT = ContainerVecVT;
10022 SDValue AlignedExtract = Vec;
10023 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10024 if (SubVecVT.isFixedLengthVector())
10025 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10026 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10027 InterSubVT = getLMUL1VT(ContainerVecVT);
10028 // Extract a subvector equal to the nearest full vector register type. This
10029 // should resolve to a EXTRACT_SUBREG instruction.
10030 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10031 DAG.getVectorIdxConstant(AlignedIdx, DL));
10032 }
10033
10034 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10035 DAG.getUNDEF(InterSubVT), SubVec,
10036 DAG.getVectorIdxConstant(0, DL));
10037
10038 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10039
10040 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10041 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10042
10043 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10045 if (Subtarget.expandVScale(EndIndex) ==
10046 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10047 Policy = RISCVII::TAIL_AGNOSTIC;
10048
10049 // If we're inserting into the lowest elements, use a tail undisturbed
10050 // vmv.v.v.
10051 if (RemIdx.isZero()) {
10052 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10053 SubVec, VL);
10054 } else {
10055 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10056
10057 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10058 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10059
10060 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10061 SlideupAmt, Mask, VL, Policy);
10062 }
10063
10064 // If required, insert this subvector back into the correct vector register.
10065 // This should resolve to an INSERT_SUBREG instruction.
10066 if (ContainerVecVT.bitsGT(InterSubVT))
10067 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10068 DAG.getVectorIdxConstant(AlignedIdx, DL));
10069
10070 if (VecVT.isFixedLengthVector())
10071 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10072
10073 // We might have bitcast from a mask type: cast back to the original type if
10074 // required.
10075 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10076}
10077
10078SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10079 SelectionDAG &DAG) const {
10080 SDValue Vec = Op.getOperand(0);
10081 MVT SubVecVT = Op.getSimpleValueType();
10082 MVT VecVT = Vec.getSimpleValueType();
10083
10084 SDLoc DL(Op);
10085 MVT XLenVT = Subtarget.getXLenVT();
10086 unsigned OrigIdx = Op.getConstantOperandVal(1);
10087 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10088
10089 // We don't have the ability to slide mask vectors down indexed by their i1
10090 // elements; the smallest we can do is i8. Often we are able to bitcast to
10091 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10092 // from a scalable one, we might not necessarily have enough scalable
10093 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10094 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
10095 if (VecVT.getVectorMinNumElements() >= 8 &&
10096 SubVecVT.getVectorMinNumElements() >= 8) {
10097 assert(OrigIdx % 8 == 0 && "Invalid index");
10098 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10099 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10100 "Unexpected mask vector lowering");
10101 OrigIdx /= 8;
10102 SubVecVT =
10103 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10104 SubVecVT.isScalableVector());
10105 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10106 VecVT.isScalableVector());
10107 Vec = DAG.getBitcast(VecVT, Vec);
10108 } else {
10109 // We can't slide this mask vector down, indexed by its i1 elements.
10110 // This poses a problem when we wish to extract a scalable vector which
10111 // can't be re-expressed as a larger type. Just choose the slow path and
10112 // extend to a larger type, then truncate back down.
10113 // TODO: We could probably improve this when extracting certain fixed
10114 // from fixed, where we can extract as i8 and shift the correct element
10115 // right to reach the desired subvector?
10116 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10117 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10118 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10119 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10120 Op.getOperand(1));
10121 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10122 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10123 }
10124 }
10125
10126 // With an index of 0 this is a cast-like subvector, which can be performed
10127 // with subregister operations.
10128 if (OrigIdx == 0)
10129 return Op;
10130
10131 const auto VLen = Subtarget.getRealVLen();
10132
10133 // If the subvector vector is a fixed-length type and we don't know VLEN
10134 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10135 // don't know which register of a LMUL group contains the specific subvector
10136 // as we only know the minimum register size. Therefore we must slide the
10137 // vector group down the full amount.
10138 if (SubVecVT.isFixedLengthVector() && !VLen) {
10139 MVT ContainerVT = VecVT;
10140 if (VecVT.isFixedLengthVector()) {
10141 ContainerVT = getContainerForFixedLengthVector(VecVT);
10142 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10143 }
10144
10145 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10146 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10147 if (auto ShrunkVT =
10148 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10149 ContainerVT = *ShrunkVT;
10150 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10151 DAG.getVectorIdxConstant(0, DL));
10152 }
10153
10154 SDValue Mask =
10155 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10156 // Set the vector length to only the number of elements we care about. This
10157 // avoids sliding down elements we're going to discard straight away.
10158 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
10159 Subtarget);
10160 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10161 SDValue Slidedown =
10162 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10163 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10164 // Now we can use a cast-like subvector extract to get the result.
10165 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10166 DAG.getVectorIdxConstant(0, DL));
10167 return DAG.getBitcast(Op.getValueType(), Slidedown);
10168 }
10169
10170 if (VecVT.isFixedLengthVector()) {
10171 VecVT = getContainerForFixedLengthVector(VecVT);
10172 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10173 }
10174
10175 MVT ContainerSubVecVT = SubVecVT;
10176 if (SubVecVT.isFixedLengthVector())
10177 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10178
10179 unsigned SubRegIdx;
10180 ElementCount RemIdx;
10181 // extract_subvector scales the index by vscale if the subvector is scalable,
10182 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10183 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10184 if (SubVecVT.isFixedLengthVector()) {
10185 assert(VLen);
10186 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10187 auto Decompose =
10189 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10190 SubRegIdx = Decompose.first;
10191 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10192 (OrigIdx % Vscale));
10193 } else {
10194 auto Decompose =
10196 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10197 SubRegIdx = Decompose.first;
10198 RemIdx = ElementCount::getScalable(Decompose.second);
10199 }
10200
10201 // If the Idx has been completely eliminated then this is a subvector extract
10202 // which naturally aligns to a vector register. These can easily be handled
10203 // using subregister manipulation.
10204 if (RemIdx.isZero()) {
10205 if (SubVecVT.isFixedLengthVector()) {
10206 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
10207 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10208 }
10209 return Op;
10210 }
10211
10212 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10213 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10214 // divide exactly.
10215 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10216 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10217
10218 // If the vector type is an LMUL-group type, extract a subvector equal to the
10219 // nearest full vector register type.
10220 MVT InterSubVT = VecVT;
10221 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10222 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10223 // we should have successfully decomposed the extract into a subregister.
10224 assert(SubRegIdx != RISCV::NoSubRegister);
10225 InterSubVT = getLMUL1VT(VecVT);
10226 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10227 }
10228
10229 // Slide this vector register down by the desired number of elements in order
10230 // to place the desired subvector starting at element 0.
10231 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10232 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10233 if (SubVecVT.isFixedLengthVector())
10234 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10235 Subtarget);
10236 SDValue Slidedown =
10237 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10238 Vec, SlidedownAmt, Mask, VL);
10239
10240 // Now the vector is in the right position, extract our final subvector. This
10241 // should resolve to a COPY.
10242 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10243 DAG.getVectorIdxConstant(0, DL));
10244
10245 // We might have bitcast from a mask type: cast back to the original type if
10246 // required.
10247 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10248}
10249
10250// Widen a vector's operands to i8, then truncate its results back to the
10251// original type, typically i1. All operand and result types must be the same.
10253 SelectionDAG &DAG) {
10254 MVT VT = N.getSimpleValueType();
10255 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10257 for (SDValue Op : N->ops()) {
10258 assert(Op.getSimpleValueType() == VT &&
10259 "Operands and result must be same type");
10260 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10261 }
10262
10263 unsigned NumVals = N->getNumValues();
10264
10266 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10267 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10268 SmallVector<SDValue, 4> TruncVals;
10269 for (unsigned I = 0; I < NumVals; I++) {
10270 TruncVals.push_back(
10271 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10272 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10273 }
10274
10275 if (TruncVals.size() > 1)
10276 return DAG.getMergeValues(TruncVals, DL);
10277 return TruncVals.front();
10278}
10279
10280SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10281 SelectionDAG &DAG) const {
10282 SDLoc DL(Op);
10283 MVT VecVT = Op.getSimpleValueType();
10284
10285 assert(VecVT.isScalableVector() &&
10286 "vector_interleave on non-scalable vector!");
10287
10288 // 1 bit element vectors need to be widened to e8
10289 if (VecVT.getVectorElementType() == MVT::i1)
10290 return widenVectorOpsToi8(Op, DL, DAG);
10291
10292 // If the VT is LMUL=8, we need to split and reassemble.
10293 if (VecVT.getSizeInBits().getKnownMinValue() ==
10294 (8 * RISCV::RVVBitsPerBlock)) {
10295 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10296 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10297 EVT SplitVT = Op0Lo.getValueType();
10298
10300 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10302 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10303
10304 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10305 ResLo.getValue(0), ResHi.getValue(0));
10306 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10307 ResHi.getValue(1));
10308 return DAG.getMergeValues({Even, Odd}, DL);
10309 }
10310
10311 // Concatenate the two vectors as one vector to deinterleave
10312 MVT ConcatVT =
10315 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10316 Op.getOperand(0), Op.getOperand(1));
10317
10318 // We want to operate on all lanes, so get the mask and VL and mask for it
10319 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10320 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10321
10322 // We can deinterleave through vnsrl.wi if the element type is smaller than
10323 // ELEN
10324 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10325 SDValue Even =
10326 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10327 SDValue Odd =
10328 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10329 return DAG.getMergeValues({Even, Odd}, DL);
10330 }
10331
10332 // For the indices, use the same SEW to avoid an extra vsetvli
10333 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10334 // Create a vector of even indices {0, 2, 4, ...}
10335 SDValue EvenIdx =
10336 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10337 // Create a vector of odd indices {1, 3, 5, ... }
10338 SDValue OddIdx =
10339 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10340
10341 // Gather the even and odd elements into two separate vectors
10342 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10343 Concat, EvenIdx, Passthru, Mask, VL);
10344 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10345 Concat, OddIdx, Passthru, Mask, VL);
10346
10347 // Extract the result half of the gather for even and odd
10348 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10349 DAG.getVectorIdxConstant(0, DL));
10350 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10351 DAG.getVectorIdxConstant(0, DL));
10352
10353 return DAG.getMergeValues({Even, Odd}, DL);
10354}
10355
10356SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10357 SelectionDAG &DAG) const {
10358 SDLoc DL(Op);
10359 MVT VecVT = Op.getSimpleValueType();
10360
10361 assert(VecVT.isScalableVector() &&
10362 "vector_interleave on non-scalable vector!");
10363
10364 // i1 vectors need to be widened to i8
10365 if (VecVT.getVectorElementType() == MVT::i1)
10366 return widenVectorOpsToi8(Op, DL, DAG);
10367
10368 MVT XLenVT = Subtarget.getXLenVT();
10369 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10370
10371 // If the VT is LMUL=8, we need to split and reassemble.
10372 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10373 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10374 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10375 EVT SplitVT = Op0Lo.getValueType();
10376
10378 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10380 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10381
10382 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10383 ResLo.getValue(0), ResLo.getValue(1));
10384 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10385 ResHi.getValue(0), ResHi.getValue(1));
10386 return DAG.getMergeValues({Lo, Hi}, DL);
10387 }
10388
10389 SDValue Interleaved;
10390
10391 // If the element type is smaller than ELEN, then we can interleave with
10392 // vwaddu.vv and vwmaccu.vx
10393 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10394 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10395 DAG, Subtarget);
10396 } else {
10397 // Otherwise, fallback to using vrgathere16.vv
10398 MVT ConcatVT =
10401 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10402 Op.getOperand(0), Op.getOperand(1));
10403
10404 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10405
10406 // 0 1 2 3 4 5 6 7 ...
10407 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10408
10409 // 1 1 1 1 1 1 1 1 ...
10410 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10411
10412 // 1 0 1 0 1 0 1 0 ...
10413 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10414 OddMask = DAG.getSetCC(
10415 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10416 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10418
10419 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10420
10421 // Build up the index vector for interleaving the concatenated vector
10422 // 0 0 1 1 2 2 3 3 ...
10423 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10424 // 0 n 1 n+1 2 n+2 3 n+3 ...
10425 Idx =
10426 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10427
10428 // Then perform the interleave
10429 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10430 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10431 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10432 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10433 }
10434
10435 // Extract the two halves from the interleaved result
10436 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10437 DAG.getVectorIdxConstant(0, DL));
10438 SDValue Hi = DAG.getNode(
10439 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10441
10442 return DAG.getMergeValues({Lo, Hi}, DL);
10443}
10444
10445// Lower step_vector to the vid instruction. Any non-identity step value must
10446// be accounted for my manual expansion.
10447SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10448 SelectionDAG &DAG) const {
10449 SDLoc DL(Op);
10450 MVT VT = Op.getSimpleValueType();
10451 assert(VT.isScalableVector() && "Expected scalable vector");
10452 MVT XLenVT = Subtarget.getXLenVT();
10453 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10454 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10455 uint64_t StepValImm = Op.getConstantOperandVal(0);
10456 if (StepValImm != 1) {
10457 if (isPowerOf2_64(StepValImm)) {
10458 SDValue StepVal =
10459 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10460 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10461 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10462 } else {
10463 SDValue StepVal = lowerScalarSplat(
10464 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10465 VL, VT, DL, DAG, Subtarget);
10466 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10467 }
10468 }
10469 return StepVec;
10470}
10471
10472// Implement vector_reverse using vrgather.vv with indices determined by
10473// subtracting the id of each element from (VLMAX-1). This will convert
10474// the indices like so:
10475// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10476// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10477SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10478 SelectionDAG &DAG) const {
10479 SDLoc DL(Op);
10480 MVT VecVT = Op.getSimpleValueType();
10481 if (VecVT.getVectorElementType() == MVT::i1) {
10482 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10483 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10484 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10485 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10486 }
10487 unsigned EltSize = VecVT.getScalarSizeInBits();
10488 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10489 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10490 unsigned MaxVLMAX =
10491 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10492
10493 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10494 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10495
10496 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10497 // to use vrgatherei16.vv.
10498 // TODO: It's also possible to use vrgatherei16.vv for other types to
10499 // decrease register width for the index calculation.
10500 if (MaxVLMAX > 256 && EltSize == 8) {
10501 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10502 // Reverse each half, then reassemble them in reverse order.
10503 // NOTE: It's also possible that after splitting that VLMAX no longer
10504 // requires vrgatherei16.vv.
10505 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10506 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10507 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10508 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10509 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10510 // Reassemble the low and high pieces reversed.
10511 // FIXME: This is a CONCAT_VECTORS.
10512 SDValue Res =
10513 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10514 DAG.getVectorIdxConstant(0, DL));
10515 return DAG.getNode(
10516 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10517 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10518 }
10519
10520 // Just promote the int type to i16 which will double the LMUL.
10521 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10522 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10523 }
10524
10525 MVT XLenVT = Subtarget.getXLenVT();
10526 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10527
10528 // Calculate VLMAX-1 for the desired SEW.
10529 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10530 computeVLMax(VecVT, DL, DAG),
10531 DAG.getConstant(1, DL, XLenVT));
10532
10533 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10534 bool IsRV32E64 =
10535 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10536 SDValue SplatVL;
10537 if (!IsRV32E64)
10538 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10539 else
10540 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10541 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10542
10543 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10544 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10545 DAG.getUNDEF(IntVT), Mask, VL);
10546
10547 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10548 DAG.getUNDEF(VecVT), Mask, VL);
10549}
10550
10551SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10552 SelectionDAG &DAG) const {
10553 SDLoc DL(Op);
10554 SDValue V1 = Op.getOperand(0);
10555 SDValue V2 = Op.getOperand(1);
10556 MVT XLenVT = Subtarget.getXLenVT();
10557 MVT VecVT = Op.getSimpleValueType();
10558
10559 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10560
10561 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10562 SDValue DownOffset, UpOffset;
10563 if (ImmValue >= 0) {
10564 // The operand is a TargetConstant, we need to rebuild it as a regular
10565 // constant.
10566 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10567 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10568 } else {
10569 // The operand is a TargetConstant, we need to rebuild it as a regular
10570 // constant rather than negating the original operand.
10571 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10572 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10573 }
10574
10575 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10576
10577 SDValue SlideDown =
10578 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10579 DownOffset, TrueMask, UpOffset);
10580 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10581 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10583}
10584
10585SDValue
10586RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10587 SelectionDAG &DAG) const {
10588 SDLoc DL(Op);
10589 auto *Load = cast<LoadSDNode>(Op);
10590
10592 Load->getMemoryVT(),
10593 *Load->getMemOperand()) &&
10594 "Expecting a correctly-aligned load");
10595
10596 MVT VT = Op.getSimpleValueType();
10597 MVT XLenVT = Subtarget.getXLenVT();
10598 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10599
10600 // If we know the exact VLEN and our fixed length vector completely fills
10601 // the container, use a whole register load instead.
10602 const auto [MinVLMAX, MaxVLMAX] =
10603 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10604 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10605 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10606 MachineMemOperand *MMO = Load->getMemOperand();
10607 SDValue NewLoad =
10608 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
10609 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
10610 MMO->getAAInfo(), MMO->getRanges());
10611 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10612 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10613 }
10614
10615 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10616
10617 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10618 SDValue IntID = DAG.getTargetConstant(
10619 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10620 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10621 if (!IsMaskOp)
10622 Ops.push_back(DAG.getUNDEF(ContainerVT));
10623 Ops.push_back(Load->getBasePtr());
10624 Ops.push_back(VL);
10625 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10626 SDValue NewLoad =
10628 Load->getMemoryVT(), Load->getMemOperand());
10629
10630 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10631 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10632}
10633
10634SDValue
10635RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10636 SelectionDAG &DAG) const {
10637 SDLoc DL(Op);
10638 auto *Store = cast<StoreSDNode>(Op);
10639
10641 Store->getMemoryVT(),
10642 *Store->getMemOperand()) &&
10643 "Expecting a correctly-aligned store");
10644
10645 SDValue StoreVal = Store->getValue();
10646 MVT VT = StoreVal.getSimpleValueType();
10647 MVT XLenVT = Subtarget.getXLenVT();
10648
10649 // If the size less than a byte, we need to pad with zeros to make a byte.
10650 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10651 VT = MVT::v8i1;
10652 StoreVal =
10653 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10654 StoreVal, DAG.getVectorIdxConstant(0, DL));
10655 }
10656
10657 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10658
10659 SDValue NewValue =
10660 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10661
10662
10663 // If we know the exact VLEN and our fixed length vector completely fills
10664 // the container, use a whole register store instead.
10665 const auto [MinVLMAX, MaxVLMAX] =
10666 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10667 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10668 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10669 MachineMemOperand *MMO = Store->getMemOperand();
10670 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10671 MMO->getPointerInfo(), MMO->getBaseAlign(),
10672 MMO->getFlags(), MMO->getAAInfo());
10673 }
10674
10675 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10676 Subtarget);
10677
10678 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10679 SDValue IntID = DAG.getTargetConstant(
10680 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10681 return DAG.getMemIntrinsicNode(
10682 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10683 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10684 Store->getMemoryVT(), Store->getMemOperand());
10685}
10686
10687SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10688 SelectionDAG &DAG) const {
10689 SDLoc DL(Op);
10690 MVT VT = Op.getSimpleValueType();
10691
10692 const auto *MemSD = cast<MemSDNode>(Op);
10693 EVT MemVT = MemSD->getMemoryVT();
10694 MachineMemOperand *MMO = MemSD->getMemOperand();
10695 SDValue Chain = MemSD->getChain();
10696 SDValue BasePtr = MemSD->getBasePtr();
10697
10698 SDValue Mask, PassThru, VL;
10699 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10700 Mask = VPLoad->getMask();
10701 PassThru = DAG.getUNDEF(VT);
10702 VL = VPLoad->getVectorLength();
10703 } else {
10704 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10705 Mask = MLoad->getMask();
10706 PassThru = MLoad->getPassThru();
10707 }
10708
10709 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10710
10711 MVT XLenVT = Subtarget.getXLenVT();
10712
10713 MVT ContainerVT = VT;
10714 if (VT.isFixedLengthVector()) {
10715 ContainerVT = getContainerForFixedLengthVector(VT);
10716 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10717 if (!IsUnmasked) {
10718 MVT MaskVT = getMaskTypeFor(ContainerVT);
10719 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10720 }
10721 }
10722
10723 if (!VL)
10724 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10725
10726 unsigned IntID =
10727 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10728 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10729 if (IsUnmasked)
10730 Ops.push_back(DAG.getUNDEF(ContainerVT));
10731 else
10732 Ops.push_back(PassThru);
10733 Ops.push_back(BasePtr);
10734 if (!IsUnmasked)
10735 Ops.push_back(Mask);
10736 Ops.push_back(VL);
10737 if (!IsUnmasked)
10739
10740 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10741
10742 SDValue Result =
10743 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10744 Chain = Result.getValue(1);
10745
10746 if (VT.isFixedLengthVector())
10747 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10748
10749 return DAG.getMergeValues({Result, Chain}, DL);
10750}
10751
10752SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10753 SelectionDAG &DAG) const {
10754 SDLoc DL(Op);
10755
10756 const auto *MemSD = cast<MemSDNode>(Op);
10757 EVT MemVT = MemSD->getMemoryVT();
10758 MachineMemOperand *MMO = MemSD->getMemOperand();
10759 SDValue Chain = MemSD->getChain();
10760 SDValue BasePtr = MemSD->getBasePtr();
10761 SDValue Val, Mask, VL;
10762
10763 bool IsCompressingStore = false;
10764 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10765 Val = VPStore->getValue();
10766 Mask = VPStore->getMask();
10767 VL = VPStore->getVectorLength();
10768 } else {
10769 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10770 Val = MStore->getValue();
10771 Mask = MStore->getMask();
10772 IsCompressingStore = MStore->isCompressingStore();
10773 }
10774
10775 bool IsUnmasked =
10776 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10777
10778 MVT VT = Val.getSimpleValueType();
10779 MVT XLenVT = Subtarget.getXLenVT();
10780
10781 MVT ContainerVT = VT;
10782 if (VT.isFixedLengthVector()) {
10783 ContainerVT = getContainerForFixedLengthVector(VT);
10784
10785 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10786 if (!IsUnmasked || IsCompressingStore) {
10787 MVT MaskVT = getMaskTypeFor(ContainerVT);
10788 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10789 }
10790 }
10791
10792 if (!VL)
10793 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10794
10795 if (IsCompressingStore) {
10796 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10797 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10798 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10799 VL =
10800 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10801 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10802 }
10803
10804 unsigned IntID =
10805 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10806 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10807 Ops.push_back(Val);
10808 Ops.push_back(BasePtr);
10809 if (!IsUnmasked)
10810 Ops.push_back(Mask);
10811 Ops.push_back(VL);
10812
10814 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10815}
10816
10817SDValue
10818RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10819 SelectionDAG &DAG) const {
10820 MVT InVT = Op.getOperand(0).getSimpleValueType();
10821 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10822
10823 MVT VT = Op.getSimpleValueType();
10824
10825 SDValue Op1 =
10826 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10827 SDValue Op2 =
10828 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10829
10830 SDLoc DL(Op);
10831 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10832 DAG, Subtarget);
10833 MVT MaskVT = getMaskTypeFor(ContainerVT);
10834
10835 SDValue Cmp =
10836 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10837 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10838
10839 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10840}
10841
10842SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10843 SelectionDAG &DAG) const {
10844 unsigned Opc = Op.getOpcode();
10845 SDLoc DL(Op);
10846 SDValue Chain = Op.getOperand(0);
10847 SDValue Op1 = Op.getOperand(1);
10848 SDValue Op2 = Op.getOperand(2);
10849 SDValue CC = Op.getOperand(3);
10850 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10851 MVT VT = Op.getSimpleValueType();
10852 MVT InVT = Op1.getSimpleValueType();
10853
10854 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10855 // condition code.
10856 if (Opc == ISD::STRICT_FSETCCS) {
10857 // Expand strict_fsetccs(x, oeq) to
10858 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10859 SDVTList VTList = Op->getVTList();
10860 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10861 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10862 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10863 Op2, OLECCVal);
10864 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10865 Op1, OLECCVal);
10866 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10867 Tmp1.getValue(1), Tmp2.getValue(1));
10868 // Tmp1 and Tmp2 might be the same node.
10869 if (Tmp1 != Tmp2)
10870 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10871 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10872 }
10873
10874 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10875 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10876 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10877 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10878 Op2, OEQCCVal);
10879 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10880 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10881 }
10882 }
10883
10884 MVT ContainerInVT = InVT;
10885 if (InVT.isFixedLengthVector()) {
10886 ContainerInVT = getContainerForFixedLengthVector(InVT);
10887 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10888 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10889 }
10890 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10891
10892 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10893
10894 SDValue Res;
10895 if (Opc == ISD::STRICT_FSETCC &&
10896 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10897 CCVal == ISD::SETOLE)) {
10898 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10899 // active when both input elements are ordered.
10900 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10901 SDValue OrderMask1 = DAG.getNode(
10902 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10903 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10904 True, VL});
10905 SDValue OrderMask2 = DAG.getNode(
10906 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10907 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10908 True, VL});
10909 Mask =
10910 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10911 // Use Mask as the merge operand to let the result be 0 if either of the
10912 // inputs is unordered.
10914 DAG.getVTList(MaskVT, MVT::Other),
10915 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10916 } else {
10917 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10919 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10920 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10921 }
10922
10923 if (VT.isFixedLengthVector()) {
10924 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10925 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10926 }
10927 return Res;
10928}
10929
10930// Lower vector ABS to smax(X, sub(0, X)).
10931SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10932 SDLoc DL(Op);
10933 MVT VT = Op.getSimpleValueType();
10934 SDValue X = Op.getOperand(0);
10935
10936 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10937 "Unexpected type for ISD::ABS");
10938
10939 MVT ContainerVT = VT;
10940 if (VT.isFixedLengthVector()) {
10941 ContainerVT = getContainerForFixedLengthVector(VT);
10942 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10943 }
10944
10945 SDValue Mask, VL;
10946 if (Op->getOpcode() == ISD::VP_ABS) {
10947 Mask = Op->getOperand(1);
10948 if (VT.isFixedLengthVector())
10949 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10950 Subtarget);
10951 VL = Op->getOperand(2);
10952 } else
10953 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10954
10955 SDValue SplatZero = DAG.getNode(
10956 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10957 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10958 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10959 DAG.getUNDEF(ContainerVT), Mask, VL);
10960 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10961 DAG.getUNDEF(ContainerVT), Mask, VL);
10962
10963 if (VT.isFixedLengthVector())
10964 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10965 return Max;
10966}
10967
10968SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10969 SDValue Op, SelectionDAG &DAG) const {
10970 SDLoc DL(Op);
10971 MVT VT = Op.getSimpleValueType();
10972 SDValue Mag = Op.getOperand(0);
10973 SDValue Sign = Op.getOperand(1);
10974 assert(Mag.getValueType() == Sign.getValueType() &&
10975 "Can only handle COPYSIGN with matching types.");
10976
10977 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10978 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10979 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10980
10981 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10982
10983 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10984 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10985
10986 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10987}
10988
10989SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10990 SDValue Op, SelectionDAG &DAG) const {
10991 MVT VT = Op.getSimpleValueType();
10992 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10993
10994 MVT I1ContainerVT =
10995 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10996
10997 SDValue CC =
10998 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10999 SDValue Op1 =
11000 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11001 SDValue Op2 =
11002 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11003
11004 SDLoc DL(Op);
11005 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11006
11007 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11008 Op2, DAG.getUNDEF(ContainerVT), VL);
11009
11010 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11011}
11012
11013SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11014 SelectionDAG &DAG) const {
11015 unsigned NewOpc = getRISCVVLOp(Op);
11016 bool HasMergeOp = hasMergeOp(NewOpc);
11017 bool HasMask = hasMaskOp(NewOpc);
11018
11019 MVT VT = Op.getSimpleValueType();
11020 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11021
11022 // Create list of operands by converting existing ones to scalable types.
11024 for (const SDValue &V : Op->op_values()) {
11025 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11026
11027 // Pass through non-vector operands.
11028 if (!V.getValueType().isVector()) {
11029 Ops.push_back(V);
11030 continue;
11031 }
11032
11033 // "cast" fixed length vector to a scalable vector.
11034 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11035 "Only fixed length vectors are supported!");
11036 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11037 }
11038
11039 SDLoc DL(Op);
11040 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11041 if (HasMergeOp)
11042 Ops.push_back(DAG.getUNDEF(ContainerVT));
11043 if (HasMask)
11044 Ops.push_back(Mask);
11045 Ops.push_back(VL);
11046
11047 // StrictFP operations have two result values. Their lowered result should
11048 // have same result count.
11049 if (Op->isStrictFPOpcode()) {
11050 SDValue ScalableRes =
11051 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11052 Op->getFlags());
11053 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11054 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11055 }
11056
11057 SDValue ScalableRes =
11058 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11059 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11060}
11061
11062// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11063// * Operands of each node are assumed to be in the same order.
11064// * The EVL operand is promoted from i32 to i64 on RV64.
11065// * Fixed-length vectors are converted to their scalable-vector container
11066// types.
11067SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11068 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11069 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
11070
11071 SDLoc DL(Op);
11072 MVT VT = Op.getSimpleValueType();
11074
11075 MVT ContainerVT = VT;
11076 if (VT.isFixedLengthVector())
11077 ContainerVT = getContainerForFixedLengthVector(VT);
11078
11079 for (const auto &OpIdx : enumerate(Op->ops())) {
11080 SDValue V = OpIdx.value();
11081 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11082 // Add dummy merge value before the mask. Or if there isn't a mask, before
11083 // EVL.
11084 if (HasMergeOp) {
11085 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11086 if (MaskIdx) {
11087 if (*MaskIdx == OpIdx.index())
11088 Ops.push_back(DAG.getUNDEF(ContainerVT));
11089 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11090 OpIdx.index()) {
11091 if (Op.getOpcode() == ISD::VP_MERGE) {
11092 // For VP_MERGE, copy the false operand instead of an undef value.
11093 Ops.push_back(Ops.back());
11094 } else {
11095 assert(Op.getOpcode() == ISD::VP_SELECT);
11096 // For VP_SELECT, add an undef value.
11097 Ops.push_back(DAG.getUNDEF(ContainerVT));
11098 }
11099 }
11100 }
11101 // Pass through operands which aren't fixed-length vectors.
11102 if (!V.getValueType().isFixedLengthVector()) {
11103 Ops.push_back(V);
11104 continue;
11105 }
11106 // "cast" fixed length vector to a scalable vector.
11107 MVT OpVT = V.getSimpleValueType();
11108 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11109 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11110 "Only fixed length vectors are supported!");
11111 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11112 }
11113
11114 if (!VT.isFixedLengthVector())
11115 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11116
11117 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11118
11119 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11120}
11121
11122SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11123 SelectionDAG &DAG) const {
11124 SDLoc DL(Op);
11125 MVT VT = Op.getSimpleValueType();
11126
11127 SDValue Src = Op.getOperand(0);
11128 // NOTE: Mask is dropped.
11129 SDValue VL = Op.getOperand(2);
11130
11131 MVT ContainerVT = VT;
11132 if (VT.isFixedLengthVector()) {
11133 ContainerVT = getContainerForFixedLengthVector(VT);
11134 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11135 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11136 }
11137
11138 MVT XLenVT = Subtarget.getXLenVT();
11139 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11140 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11141 DAG.getUNDEF(ContainerVT), Zero, VL);
11142
11143 SDValue SplatValue = DAG.getConstant(
11144 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11145 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11146 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11147
11148 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11149 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11150 if (!VT.isFixedLengthVector())
11151 return Result;
11152 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11153}
11154
11155SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11156 SelectionDAG &DAG) const {
11157 SDLoc DL(Op);
11158 MVT VT = Op.getSimpleValueType();
11159
11160 SDValue Op1 = Op.getOperand(0);
11161 SDValue Op2 = Op.getOperand(1);
11162 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11163 // NOTE: Mask is dropped.
11164 SDValue VL = Op.getOperand(4);
11165
11166 MVT ContainerVT = VT;
11167 if (VT.isFixedLengthVector()) {
11168 ContainerVT = getContainerForFixedLengthVector(VT);
11169 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11170 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11171 }
11172
11174 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11175
11176 switch (Condition) {
11177 default:
11178 break;
11179 // X != Y --> (X^Y)
11180 case ISD::SETNE:
11181 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11182 break;
11183 // X == Y --> ~(X^Y)
11184 case ISD::SETEQ: {
11185 SDValue Temp =
11186 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11187 Result =
11188 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11189 break;
11190 }
11191 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11192 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11193 case ISD::SETGT:
11194 case ISD::SETULT: {
11195 SDValue Temp =
11196 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11197 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11198 break;
11199 }
11200 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11201 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11202 case ISD::SETLT:
11203 case ISD::SETUGT: {
11204 SDValue Temp =
11205 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11206 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11207 break;
11208 }
11209 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11210 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11211 case ISD::SETGE:
11212 case ISD::SETULE: {
11213 SDValue Temp =
11214 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11215 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11216 break;
11217 }
11218 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11219 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11220 case ISD::SETLE:
11221 case ISD::SETUGE: {
11222 SDValue Temp =
11223 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11224 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11225 break;
11226 }
11227 }
11228
11229 if (!VT.isFixedLengthVector())
11230 return Result;
11231 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11232}
11233
11234// Lower Floating-Point/Integer Type-Convert VP SDNodes
11235SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11236 SelectionDAG &DAG) const {
11237 SDLoc DL(Op);
11238
11239 SDValue Src = Op.getOperand(0);
11240 SDValue Mask = Op.getOperand(1);
11241 SDValue VL = Op.getOperand(2);
11242 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11243
11244 MVT DstVT = Op.getSimpleValueType();
11245 MVT SrcVT = Src.getSimpleValueType();
11246 if (DstVT.isFixedLengthVector()) {
11247 DstVT = getContainerForFixedLengthVector(DstVT);
11248 SrcVT = getContainerForFixedLengthVector(SrcVT);
11249 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11250 MVT MaskVT = getMaskTypeFor(DstVT);
11251 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11252 }
11253
11254 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11255 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11256
11258 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11259 if (SrcVT.isInteger()) {
11260 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11261
11262 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11265
11266 // Do we need to do any pre-widening before converting?
11267 if (SrcEltSize == 1) {
11268 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11269 MVT XLenVT = Subtarget.getXLenVT();
11270 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11271 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11272 DAG.getUNDEF(IntVT), Zero, VL);
11273 SDValue One = DAG.getConstant(
11274 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11275 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11276 DAG.getUNDEF(IntVT), One, VL);
11277 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11278 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11279 } else if (DstEltSize > (2 * SrcEltSize)) {
11280 // Widen before converting.
11281 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11282 DstVT.getVectorElementCount());
11283 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11284 }
11285
11286 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11287 } else {
11288 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11289 "Wrong input/output vector types");
11290
11291 // Convert f16 to f32 then convert f32 to i64.
11292 if (DstEltSize > (2 * SrcEltSize)) {
11293 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11294 MVT InterimFVT =
11295 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11296 Src =
11297 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11298 }
11299
11300 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11301 }
11302 } else { // Narrowing + Conversion
11303 if (SrcVT.isInteger()) {
11304 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11305 // First do a narrowing convert to an FP type half the size, then round
11306 // the FP type to a small FP type if needed.
11307
11308 MVT InterimFVT = DstVT;
11309 if (SrcEltSize > (2 * DstEltSize)) {
11310 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11311 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11312 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11313 }
11314
11315 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11316
11317 if (InterimFVT != DstVT) {
11318 Src = Result;
11319 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11320 }
11321 } else {
11322 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11323 "Wrong input/output vector types");
11324 // First do a narrowing conversion to an integer half the size, then
11325 // truncate if needed.
11326
11327 if (DstEltSize == 1) {
11328 // First convert to the same size integer, then convert to mask using
11329 // setcc.
11330 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11331 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11332 DstVT.getVectorElementCount());
11333 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11334
11335 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11336 // otherwise the conversion was undefined.
11337 MVT XLenVT = Subtarget.getXLenVT();
11338 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11339 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11340 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11341 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11342 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11343 DAG.getUNDEF(DstVT), Mask, VL});
11344 } else {
11345 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11346 DstVT.getVectorElementCount());
11347
11348 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11349
11350 while (InterimIVT != DstVT) {
11351 SrcEltSize /= 2;
11352 Src = Result;
11353 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11354 DstVT.getVectorElementCount());
11355 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11356 Src, Mask, VL);
11357 }
11358 }
11359 }
11360 }
11361
11362 MVT VT = Op.getSimpleValueType();
11363 if (!VT.isFixedLengthVector())
11364 return Result;
11365 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11366}
11367
11368SDValue
11369RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11370 SelectionDAG &DAG) const {
11371 SDLoc DL(Op);
11372
11373 SDValue Op1 = Op.getOperand(0);
11374 SDValue Op2 = Op.getOperand(1);
11375 SDValue Offset = Op.getOperand(2);
11376 SDValue Mask = Op.getOperand(3);
11377 SDValue EVL1 = Op.getOperand(4);
11378 SDValue EVL2 = Op.getOperand(5);
11379
11380 const MVT XLenVT = Subtarget.getXLenVT();
11381 MVT VT = Op.getSimpleValueType();
11382 MVT ContainerVT = VT;
11383 if (VT.isFixedLengthVector()) {
11384 ContainerVT = getContainerForFixedLengthVector(VT);
11385 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11386 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11387 MVT MaskVT = getMaskTypeFor(ContainerVT);
11388 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11389 }
11390
11391 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11392 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11393
11394 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11395 if (IsMaskVector) {
11396 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11397
11398 // Expand input operands
11399 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11400 DAG.getUNDEF(ContainerVT),
11401 DAG.getConstant(1, DL, XLenVT), EVL1);
11402 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11403 DAG.getUNDEF(ContainerVT),
11404 DAG.getConstant(0, DL, XLenVT), EVL1);
11405 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11406 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11407
11408 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11409 DAG.getUNDEF(ContainerVT),
11410 DAG.getConstant(1, DL, XLenVT), EVL2);
11411 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11412 DAG.getUNDEF(ContainerVT),
11413 DAG.getConstant(0, DL, XLenVT), EVL2);
11414 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11415 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11416 }
11417
11418 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11419 SDValue DownOffset, UpOffset;
11420 if (ImmValue >= 0) {
11421 // The operand is a TargetConstant, we need to rebuild it as a regular
11422 // constant.
11423 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11424 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11425 } else {
11426 // The operand is a TargetConstant, we need to rebuild it as a regular
11427 // constant rather than negating the original operand.
11428 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11429 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11430 }
11431
11432 SDValue SlideDown =
11433 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11434 Op1, DownOffset, Mask, UpOffset);
11435 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11436 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11437
11438 if (IsMaskVector) {
11439 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11440 Result = DAG.getNode(
11441 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11442 {Result, DAG.getConstant(0, DL, ContainerVT),
11443 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11444 Mask, EVL2});
11445 }
11446
11447 if (!VT.isFixedLengthVector())
11448 return Result;
11449 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11450}
11451
11452SDValue
11453RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11454 SelectionDAG &DAG) const {
11455 SDLoc DL(Op);
11456 MVT VT = Op.getSimpleValueType();
11457 MVT XLenVT = Subtarget.getXLenVT();
11458
11459 SDValue Op1 = Op.getOperand(0);
11460 SDValue Mask = Op.getOperand(1);
11461 SDValue EVL = Op.getOperand(2);
11462
11463 MVT ContainerVT = VT;
11464 if (VT.isFixedLengthVector()) {
11465 ContainerVT = getContainerForFixedLengthVector(VT);
11466 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11467 MVT MaskVT = getMaskTypeFor(ContainerVT);
11468 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11469 }
11470
11471 MVT GatherVT = ContainerVT;
11472 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11473 // Check if we are working with mask vectors
11474 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11475 if (IsMaskVector) {
11476 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11477
11478 // Expand input operand
11479 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11480 DAG.getUNDEF(IndicesVT),
11481 DAG.getConstant(1, DL, XLenVT), EVL);
11482 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11483 DAG.getUNDEF(IndicesVT),
11484 DAG.getConstant(0, DL, XLenVT), EVL);
11485 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11486 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11487 }
11488
11489 unsigned EltSize = GatherVT.getScalarSizeInBits();
11490 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11491 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11492 unsigned MaxVLMAX =
11493 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11494
11495 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11496 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11497 // to use vrgatherei16.vv.
11498 // TODO: It's also possible to use vrgatherei16.vv for other types to
11499 // decrease register width for the index calculation.
11500 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11501 if (MaxVLMAX > 256 && EltSize == 8) {
11502 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11503 // Split the vector in half and reverse each half using a full register
11504 // reverse.
11505 // Swap the halves and concatenate them.
11506 // Slide the concatenated result by (VLMax - VL).
11507 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11508 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11509 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11510
11511 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11512 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11513
11514 // Reassemble the low and high pieces reversed.
11515 // NOTE: this Result is unmasked (because we do not need masks for
11516 // shuffles). If in the future this has to change, we can use a SELECT_VL
11517 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11518 SDValue Result =
11519 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11520
11521 // Slide off any elements from past EVL that were reversed into the low
11522 // elements.
11523 unsigned MinElts = GatherVT.getVectorMinNumElements();
11524 SDValue VLMax =
11525 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11526 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11527
11528 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11529 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11530
11531 if (IsMaskVector) {
11532 // Truncate Result back to a mask vector
11533 Result =
11534 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11535 {Result, DAG.getConstant(0, DL, GatherVT),
11537 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11538 }
11539
11540 if (!VT.isFixedLengthVector())
11541 return Result;
11542 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11543 }
11544
11545 // Just promote the int type to i16 which will double the LMUL.
11546 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11547 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11548 }
11549
11550 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11551 SDValue VecLen =
11552 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11553 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11554 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11555 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11556 DAG.getUNDEF(IndicesVT), Mask, EVL);
11557 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11558 DAG.getUNDEF(GatherVT), Mask, EVL);
11559
11560 if (IsMaskVector) {
11561 // Truncate Result back to a mask vector
11562 Result = DAG.getNode(
11563 RISCVISD::SETCC_VL, DL, ContainerVT,
11564 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11565 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11566 }
11567
11568 if (!VT.isFixedLengthVector())
11569 return Result;
11570 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11571}
11572
11573SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11574 SelectionDAG &DAG) const {
11575 MVT VT = Op.getSimpleValueType();
11576 if (VT.getVectorElementType() != MVT::i1)
11577 return lowerVPOp(Op, DAG);
11578
11579 // It is safe to drop mask parameter as masked-off elements are undef.
11580 SDValue Op1 = Op->getOperand(0);
11581 SDValue Op2 = Op->getOperand(1);
11582 SDValue VL = Op->getOperand(3);
11583
11584 MVT ContainerVT = VT;
11585 const bool IsFixed = VT.isFixedLengthVector();
11586 if (IsFixed) {
11587 ContainerVT = getContainerForFixedLengthVector(VT);
11588 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11589 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11590 }
11591
11592 SDLoc DL(Op);
11593 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11594 if (!IsFixed)
11595 return Val;
11596 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11597}
11598
11599SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11600 SelectionDAG &DAG) const {
11601 SDLoc DL(Op);
11602 MVT XLenVT = Subtarget.getXLenVT();
11603 MVT VT = Op.getSimpleValueType();
11604 MVT ContainerVT = VT;
11605 if (VT.isFixedLengthVector())
11606 ContainerVT = getContainerForFixedLengthVector(VT);
11607
11608 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11609
11610 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11611 // Check if the mask is known to be all ones
11612 SDValue Mask = VPNode->getMask();
11613 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11614
11615 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11616 : Intrinsic::riscv_vlse_mask,
11617 DL, XLenVT);
11618 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11619 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11620 VPNode->getStride()};
11621 if (!IsUnmasked) {
11622 if (VT.isFixedLengthVector()) {
11623 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11624 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11625 }
11626 Ops.push_back(Mask);
11627 }
11628 Ops.push_back(VPNode->getVectorLength());
11629 if (!IsUnmasked) {
11630 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11631 Ops.push_back(Policy);
11632 }
11633
11634 SDValue Result =
11636 VPNode->getMemoryVT(), VPNode->getMemOperand());
11637 SDValue Chain = Result.getValue(1);
11638
11639 if (VT.isFixedLengthVector())
11640 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11641
11642 return DAG.getMergeValues({Result, Chain}, DL);
11643}
11644
11645SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11646 SelectionDAG &DAG) const {
11647 SDLoc DL(Op);
11648 MVT XLenVT = Subtarget.getXLenVT();
11649
11650 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11651 SDValue StoreVal = VPNode->getValue();
11652 MVT VT = StoreVal.getSimpleValueType();
11653 MVT ContainerVT = VT;
11654 if (VT.isFixedLengthVector()) {
11655 ContainerVT = getContainerForFixedLengthVector(VT);
11656 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11657 }
11658
11659 // Check if the mask is known to be all ones
11660 SDValue Mask = VPNode->getMask();
11661 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11662
11663 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11664 : Intrinsic::riscv_vsse_mask,
11665 DL, XLenVT);
11666 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11667 VPNode->getBasePtr(), VPNode->getStride()};
11668 if (!IsUnmasked) {
11669 if (VT.isFixedLengthVector()) {
11670 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11671 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11672 }
11673 Ops.push_back(Mask);
11674 }
11675 Ops.push_back(VPNode->getVectorLength());
11676
11677 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11678 Ops, VPNode->getMemoryVT(),
11679 VPNode->getMemOperand());
11680}
11681
11682// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11683// matched to a RVV indexed load. The RVV indexed load instructions only
11684// support the "unsigned unscaled" addressing mode; indices are implicitly
11685// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11686// signed or scaled indexing is extended to the XLEN value type and scaled
11687// accordingly.
11688SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11689 SelectionDAG &DAG) const {
11690 SDLoc DL(Op);
11691 MVT VT = Op.getSimpleValueType();
11692
11693 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11694 EVT MemVT = MemSD->getMemoryVT();
11695 MachineMemOperand *MMO = MemSD->getMemOperand();
11696 SDValue Chain = MemSD->getChain();
11697 SDValue BasePtr = MemSD->getBasePtr();
11698
11699 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11700 SDValue Index, Mask, PassThru, VL;
11701
11702 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11703 Index = VPGN->getIndex();
11704 Mask = VPGN->getMask();
11705 PassThru = DAG.getUNDEF(VT);
11706 VL = VPGN->getVectorLength();
11707 // VP doesn't support extending loads.
11709 } else {
11710 // Else it must be a MGATHER.
11711 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11712 Index = MGN->getIndex();
11713 Mask = MGN->getMask();
11714 PassThru = MGN->getPassThru();
11715 LoadExtType = MGN->getExtensionType();
11716 }
11717
11718 MVT IndexVT = Index.getSimpleValueType();
11719 MVT XLenVT = Subtarget.getXLenVT();
11720
11722 "Unexpected VTs!");
11723 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11724 // Targets have to explicitly opt-in for extending vector loads.
11725 assert(LoadExtType == ISD::NON_EXTLOAD &&
11726 "Unexpected extending MGATHER/VP_GATHER");
11727
11728 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11729 // the selection of the masked intrinsics doesn't do this for us.
11730 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11731
11732 MVT ContainerVT = VT;
11733 if (VT.isFixedLengthVector()) {
11734 ContainerVT = getContainerForFixedLengthVector(VT);
11735 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11736 ContainerVT.getVectorElementCount());
11737
11738 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11739
11740 if (!IsUnmasked) {
11741 MVT MaskVT = getMaskTypeFor(ContainerVT);
11742 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11743 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11744 }
11745 }
11746
11747 if (!VL)
11748 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11749
11750 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11751 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11752 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11753 }
11754
11755 unsigned IntID =
11756 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11757 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11758 if (IsUnmasked)
11759 Ops.push_back(DAG.getUNDEF(ContainerVT));
11760 else
11761 Ops.push_back(PassThru);
11762 Ops.push_back(BasePtr);
11763 Ops.push_back(Index);
11764 if (!IsUnmasked)
11765 Ops.push_back(Mask);
11766 Ops.push_back(VL);
11767 if (!IsUnmasked)
11769
11770 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11771 SDValue Result =
11772 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11773 Chain = Result.getValue(1);
11774
11775 if (VT.isFixedLengthVector())
11776 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11777
11778 return DAG.getMergeValues({Result, Chain}, DL);
11779}
11780
11781// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11782// matched to a RVV indexed store. The RVV indexed store instructions only
11783// support the "unsigned unscaled" addressing mode; indices are implicitly
11784// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11785// signed or scaled indexing is extended to the XLEN value type and scaled
11786// accordingly.
11787SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11788 SelectionDAG &DAG) const {
11789 SDLoc DL(Op);
11790 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11791 EVT MemVT = MemSD->getMemoryVT();
11792 MachineMemOperand *MMO = MemSD->getMemOperand();
11793 SDValue Chain = MemSD->getChain();
11794 SDValue BasePtr = MemSD->getBasePtr();
11795
11796 [[maybe_unused]] bool IsTruncatingStore = false;
11797 SDValue Index, Mask, Val, VL;
11798
11799 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11800 Index = VPSN->getIndex();
11801 Mask = VPSN->getMask();
11802 Val = VPSN->getValue();
11803 VL = VPSN->getVectorLength();
11804 // VP doesn't support truncating stores.
11805 IsTruncatingStore = false;
11806 } else {
11807 // Else it must be a MSCATTER.
11808 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11809 Index = MSN->getIndex();
11810 Mask = MSN->getMask();
11811 Val = MSN->getValue();
11812 IsTruncatingStore = MSN->isTruncatingStore();
11813 }
11814
11815 MVT VT = Val.getSimpleValueType();
11816 MVT IndexVT = Index.getSimpleValueType();
11817 MVT XLenVT = Subtarget.getXLenVT();
11818
11820 "Unexpected VTs!");
11821 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11822 // Targets have to explicitly opt-in for extending vector loads and
11823 // truncating vector stores.
11824 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11825
11826 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11827 // the selection of the masked intrinsics doesn't do this for us.
11828 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11829
11830 MVT ContainerVT = VT;
11831 if (VT.isFixedLengthVector()) {
11832 ContainerVT = getContainerForFixedLengthVector(VT);
11833 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11834 ContainerVT.getVectorElementCount());
11835
11836 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11837 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11838
11839 if (!IsUnmasked) {
11840 MVT MaskVT = getMaskTypeFor(ContainerVT);
11841 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11842 }
11843 }
11844
11845 if (!VL)
11846 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11847
11848 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11849 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11850 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11851 }
11852
11853 unsigned IntID =
11854 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11855 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11856 Ops.push_back(Val);
11857 Ops.push_back(BasePtr);
11858 Ops.push_back(Index);
11859 if (!IsUnmasked)
11860 Ops.push_back(Mask);
11861 Ops.push_back(VL);
11862
11864 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11865}
11866
11867SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11868 SelectionDAG &DAG) const {
11869 const MVT XLenVT = Subtarget.getXLenVT();
11870 SDLoc DL(Op);
11871 SDValue Chain = Op->getOperand(0);
11872 SDValue SysRegNo = DAG.getTargetConstant(
11873 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11874 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11875 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11876
11877 // Encoding used for rounding mode in RISC-V differs from that used in
11878 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11879 // table, which consists of a sequence of 4-bit fields, each representing
11880 // corresponding FLT_ROUNDS mode.
11881 static const int Table =
11887
11888 SDValue Shift =
11889 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11890 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11891 DAG.getConstant(Table, DL, XLenVT), Shift);
11892 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11893 DAG.getConstant(7, DL, XLenVT));
11894
11895 return DAG.getMergeValues({Masked, Chain}, DL);
11896}
11897
11898SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11899 SelectionDAG &DAG) const {
11900 const MVT XLenVT = Subtarget.getXLenVT();
11901 SDLoc DL(Op);
11902 SDValue Chain = Op->getOperand(0);
11903 SDValue RMValue = Op->getOperand(1);
11904 SDValue SysRegNo = DAG.getTargetConstant(
11905 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11906
11907 // Encoding used for rounding mode in RISC-V differs from that used in
11908 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11909 // a table, which consists of a sequence of 4-bit fields, each representing
11910 // corresponding RISC-V mode.
11911 static const unsigned Table =
11917
11918 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11919
11920 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11921 DAG.getConstant(2, DL, XLenVT));
11922 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11923 DAG.getConstant(Table, DL, XLenVT), Shift);
11924 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11925 DAG.getConstant(0x7, DL, XLenVT));
11926 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11927 RMValue);
11928}
11929
11930SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11931 SelectionDAG &DAG) const {
11933
11934 bool isRISCV64 = Subtarget.is64Bit();
11935 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11936
11937 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11938 return DAG.getFrameIndex(FI, PtrVT);
11939}
11940
11941// Returns the opcode of the target-specific SDNode that implements the 32-bit
11942// form of the given Opcode.
11943static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11944 switch (Opcode) {
11945 default:
11946 llvm_unreachable("Unexpected opcode");
11947 case ISD::SHL:
11948 return RISCVISD::SLLW;
11949 case ISD::SRA:
11950 return RISCVISD::SRAW;
11951 case ISD::SRL:
11952 return RISCVISD::SRLW;
11953 case ISD::SDIV:
11954 return RISCVISD::DIVW;
11955 case ISD::UDIV:
11956 return RISCVISD::DIVUW;
11957 case ISD::UREM:
11958 return RISCVISD::REMUW;
11959 case ISD::ROTL:
11960 return RISCVISD::ROLW;
11961 case ISD::ROTR:
11962 return RISCVISD::RORW;
11963 }
11964}
11965
11966// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11967// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11968// otherwise be promoted to i64, making it difficult to select the
11969// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11970// type i8/i16/i32 is lost.
11972 unsigned ExtOpc = ISD::ANY_EXTEND) {
11973 SDLoc DL(N);
11974 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11975 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11976 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11977 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11978 // ReplaceNodeResults requires we maintain the same type for the return value.
11979 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11980}
11981
11982// Converts the given 32-bit operation to a i64 operation with signed extension
11983// semantic to reduce the signed extension instructions.
11985 SDLoc DL(N);
11986 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11987 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11988 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11989 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11990 DAG.getValueType(MVT::i32));
11991 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11992}
11993
11996 SelectionDAG &DAG) const {
11997 SDLoc DL(N);
11998 switch (N->getOpcode()) {
11999 default:
12000 llvm_unreachable("Don't know how to custom type legalize this operation!");
12003 case ISD::FP_TO_SINT:
12004 case ISD::FP_TO_UINT: {
12005 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12006 "Unexpected custom legalisation");
12007 bool IsStrict = N->isStrictFPOpcode();
12008 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12009 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12010 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12011 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12013 if (!isTypeLegal(Op0.getValueType()))
12014 return;
12015 if (IsStrict) {
12016 SDValue Chain = N->getOperand(0);
12017 // In absense of Zfh, promote f16 to f32, then convert.
12018 if (Op0.getValueType() == MVT::f16 &&
12019 !Subtarget.hasStdExtZfhOrZhinx()) {
12020 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12021 {Chain, Op0});
12022 Chain = Op0.getValue(1);
12023 }
12024 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12026 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12027 SDValue Res = DAG.getNode(
12028 Opc, DL, VTs, Chain, Op0,
12029 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12030 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12031 Results.push_back(Res.getValue(1));
12032 return;
12033 }
12034 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12035 // convert.
12036 if ((Op0.getValueType() == MVT::f16 &&
12037 !Subtarget.hasStdExtZfhOrZhinx()) ||
12038 Op0.getValueType() == MVT::bf16)
12039 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12040
12041 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12042 SDValue Res =
12043 DAG.getNode(Opc, DL, MVT::i64, Op0,
12044 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12045 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12046 return;
12047 }
12048 // If the FP type needs to be softened, emit a library call using the 'si'
12049 // version. If we left it to default legalization we'd end up with 'di'. If
12050 // the FP type doesn't need to be softened just let generic type
12051 // legalization promote the result type.
12052 RTLIB::Libcall LC;
12053 if (IsSigned)
12054 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12055 else
12056 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12057 MakeLibCallOptions CallOptions;
12058 EVT OpVT = Op0.getValueType();
12059 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12060 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12061 SDValue Result;
12062 std::tie(Result, Chain) =
12063 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12064 Results.push_back(Result);
12065 if (IsStrict)
12066 Results.push_back(Chain);
12067 break;
12068 }
12069 case ISD::LROUND: {
12070 SDValue Op0 = N->getOperand(0);
12071 EVT Op0VT = Op0.getValueType();
12072 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12074 if (!isTypeLegal(Op0VT))
12075 return;
12076
12077 // In absense of Zfh, promote f16 to f32, then convert.
12078 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12079 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12080
12081 SDValue Res =
12082 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12083 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12084 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12085 return;
12086 }
12087 // If the FP type needs to be softened, emit a library call to lround. We'll
12088 // need to truncate the result. We assume any value that doesn't fit in i32
12089 // is allowed to return an unspecified value.
12090 RTLIB::Libcall LC =
12091 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12092 MakeLibCallOptions CallOptions;
12093 EVT OpVT = Op0.getValueType();
12094 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12095 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12096 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12097 Results.push_back(Result);
12098 break;
12099 }
12102 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12103 "has custom type legalization on riscv32");
12104
12105 SDValue LoCounter, HiCounter;
12106 MVT XLenVT = Subtarget.getXLenVT();
12107 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12108 LoCounter = DAG.getTargetConstant(
12109 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
12110 HiCounter = DAG.getTargetConstant(
12111 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
12112 } else {
12113 LoCounter = DAG.getTargetConstant(
12114 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
12115 HiCounter = DAG.getTargetConstant(
12116 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
12117 }
12118 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12120 N->getOperand(0), LoCounter, HiCounter);
12121
12122 Results.push_back(
12123 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12124 Results.push_back(RCW.getValue(2));
12125 break;
12126 }
12127 case ISD::LOAD: {
12128 if (!ISD::isNON_EXTLoad(N))
12129 return;
12130
12131 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12132 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12133 LoadSDNode *Ld = cast<LoadSDNode>(N);
12134
12135 SDLoc dl(N);
12136 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12137 Ld->getBasePtr(), Ld->getMemoryVT(),
12138 Ld->getMemOperand());
12139 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12140 Results.push_back(Res.getValue(1));
12141 return;
12142 }
12143 case ISD::MUL: {
12144 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12145 unsigned XLen = Subtarget.getXLen();
12146 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12147 if (Size > XLen) {
12148 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12149 SDValue LHS = N->getOperand(0);
12150 SDValue RHS = N->getOperand(1);
12151 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12152
12153 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12154 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12155 // We need exactly one side to be unsigned.
12156 if (LHSIsU == RHSIsU)
12157 return;
12158
12159 auto MakeMULPair = [&](SDValue S, SDValue U) {
12160 MVT XLenVT = Subtarget.getXLenVT();
12161 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12162 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12163 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12164 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12165 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
12166 };
12167
12168 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
12169 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
12170
12171 // The other operand should be signed, but still prefer MULH when
12172 // possible.
12173 if (RHSIsU && LHSIsS && !RHSIsS)
12174 Results.push_back(MakeMULPair(LHS, RHS));
12175 else if (LHSIsU && RHSIsS && !LHSIsS)
12176 Results.push_back(MakeMULPair(RHS, LHS));
12177
12178 return;
12179 }
12180 [[fallthrough]];
12181 }
12182 case ISD::ADD:
12183 case ISD::SUB:
12184 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12185 "Unexpected custom legalisation");
12186 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
12187 break;
12188 case ISD::SHL:
12189 case ISD::SRA:
12190 case ISD::SRL:
12191 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12192 "Unexpected custom legalisation");
12193 if (N->getOperand(1).getOpcode() != ISD::Constant) {
12194 // If we can use a BSET instruction, allow default promotion to apply.
12195 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
12196 isOneConstant(N->getOperand(0)))
12197 break;
12198 Results.push_back(customLegalizeToWOp(N, DAG));
12199 break;
12200 }
12201
12202 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
12203 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
12204 // shift amount.
12205 if (N->getOpcode() == ISD::SHL) {
12206 SDLoc DL(N);
12207 SDValue NewOp0 =
12208 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12209 SDValue NewOp1 =
12210 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12211 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12212 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12213 DAG.getValueType(MVT::i32));
12214 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12215 }
12216
12217 break;
12218 case ISD::ROTL:
12219 case ISD::ROTR:
12220 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12221 "Unexpected custom legalisation");
12222 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12223 Subtarget.hasVendorXTHeadBb()) &&
12224 "Unexpected custom legalization");
12225 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12226 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12227 return;
12228 Results.push_back(customLegalizeToWOp(N, DAG));
12229 break;
12230 case ISD::CTTZ:
12232 case ISD::CTLZ:
12233 case ISD::CTLZ_ZERO_UNDEF: {
12234 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12235 "Unexpected custom legalisation");
12236
12237 SDValue NewOp0 =
12238 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12239 bool IsCTZ =
12240 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12241 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12242 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12243 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12244 return;
12245 }
12246 case ISD::SDIV:
12247 case ISD::UDIV:
12248 case ISD::UREM: {
12249 MVT VT = N->getSimpleValueType(0);
12250 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12251 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12252 "Unexpected custom legalisation");
12253 // Don't promote division/remainder by constant since we should expand those
12254 // to multiply by magic constant.
12256 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12257 !isIntDivCheap(N->getValueType(0), Attr))
12258 return;
12259
12260 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12261 // the upper 32 bits. For other types we need to sign or zero extend
12262 // based on the opcode.
12263 unsigned ExtOpc = ISD::ANY_EXTEND;
12264 if (VT != MVT::i32)
12265 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12267
12268 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12269 break;
12270 }
12271 case ISD::SADDO: {
12272 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12273 "Unexpected custom legalisation");
12274
12275 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12276 // use the default legalization.
12277 if (!isa<ConstantSDNode>(N->getOperand(1)))
12278 return;
12279
12280 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12281 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12282 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12283 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12284 DAG.getValueType(MVT::i32));
12285
12286 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12287
12288 // For an addition, the result should be less than one of the operands (LHS)
12289 // if and only if the other operand (RHS) is negative, otherwise there will
12290 // be overflow.
12291 // For a subtraction, the result should be less than one of the operands
12292 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12293 // otherwise there will be overflow.
12294 EVT OType = N->getValueType(1);
12295 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12296 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12297
12298 SDValue Overflow =
12299 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12300 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12301 Results.push_back(Overflow);
12302 return;
12303 }
12304 case ISD::UADDO:
12305 case ISD::USUBO: {
12306 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12307 "Unexpected custom legalisation");
12308 bool IsAdd = N->getOpcode() == ISD::UADDO;
12309 // Create an ADDW or SUBW.
12310 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12311 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12312 SDValue Res =
12313 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12314 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12315 DAG.getValueType(MVT::i32));
12316
12317 SDValue Overflow;
12318 if (IsAdd && isOneConstant(RHS)) {
12319 // Special case uaddo X, 1 overflowed if the addition result is 0.
12320 // The general case (X + C) < C is not necessarily beneficial. Although we
12321 // reduce the live range of X, we may introduce the materialization of
12322 // constant C, especially when the setcc result is used by branch. We have
12323 // no compare with constant and branch instructions.
12324 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12325 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12326 } else if (IsAdd && isAllOnesConstant(RHS)) {
12327 // Special case uaddo X, -1 overflowed if X != 0.
12328 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12329 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12330 } else {
12331 // Sign extend the LHS and perform an unsigned compare with the ADDW
12332 // result. Since the inputs are sign extended from i32, this is equivalent
12333 // to comparing the lower 32 bits.
12334 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12335 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12336 IsAdd ? ISD::SETULT : ISD::SETUGT);
12337 }
12338
12339 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12340 Results.push_back(Overflow);
12341 return;
12342 }
12343 case ISD::UADDSAT:
12344 case ISD::USUBSAT: {
12345 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12346 "Unexpected custom legalisation");
12347 if (Subtarget.hasStdExtZbb()) {
12348 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12349 // sign extend allows overflow of the lower 32 bits to be detected on
12350 // the promoted size.
12351 SDValue LHS =
12352 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12353 SDValue RHS =
12354 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12355 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12356 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12357 return;
12358 }
12359
12360 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12361 // promotion for UADDO/USUBO.
12362 Results.push_back(expandAddSubSat(N, DAG));
12363 return;
12364 }
12365 case ISD::SADDSAT:
12366 case ISD::SSUBSAT: {
12367 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12368 "Unexpected custom legalisation");
12369 Results.push_back(expandAddSubSat(N, DAG));
12370 return;
12371 }
12372 case ISD::ABS: {
12373 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12374 "Unexpected custom legalisation");
12375
12376 if (Subtarget.hasStdExtZbb()) {
12377 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12378 // This allows us to remember that the result is sign extended. Expanding
12379 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12380 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12381 N->getOperand(0));
12382 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12383 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12384 return;
12385 }
12386
12387 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12388 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12389
12390 // Freeze the source so we can increase it's use count.
12391 Src = DAG.getFreeze(Src);
12392
12393 // Copy sign bit to all bits using the sraiw pattern.
12394 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12395 DAG.getValueType(MVT::i32));
12396 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12397 DAG.getConstant(31, DL, MVT::i64));
12398
12399 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12400 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12401
12402 // NOTE: The result is only required to be anyextended, but sext is
12403 // consistent with type legalization of sub.
12404 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12405 DAG.getValueType(MVT::i32));
12406 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12407 return;
12408 }
12409 case ISD::BITCAST: {
12410 EVT VT = N->getValueType(0);
12411 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12412 SDValue Op0 = N->getOperand(0);
12413 EVT Op0VT = Op0.getValueType();
12414 MVT XLenVT = Subtarget.getXLenVT();
12415 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12416 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12417 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12418 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12419 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12420 Subtarget.hasStdExtZfbfmin()) {
12421 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12422 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12423 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12424 Subtarget.hasStdExtFOrZfinx()) {
12425 SDValue FPConv =
12426 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12427 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12428 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12429 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12430 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12431 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12432 NewReg.getValue(0), NewReg.getValue(1));
12433 Results.push_back(RetReg);
12434 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12435 isTypeLegal(Op0VT)) {
12436 // Custom-legalize bitcasts from fixed-length vector types to illegal
12437 // scalar types in order to improve codegen. Bitcast the vector to a
12438 // one-element vector type whose element type is the same as the result
12439 // type, and extract the first element.
12440 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12441 if (isTypeLegal(BVT)) {
12442 SDValue BVec = DAG.getBitcast(BVT, Op0);
12443 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12444 DAG.getVectorIdxConstant(0, DL)));
12445 }
12446 }
12447 break;
12448 }
12449 case RISCVISD::BREV8: {
12450 MVT VT = N->getSimpleValueType(0);
12451 MVT XLenVT = Subtarget.getXLenVT();
12452 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12453 "Unexpected custom legalisation");
12454 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12455 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12456 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12457 // ReplaceNodeResults requires we maintain the same type for the return
12458 // value.
12459 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12460 break;
12461 }
12463 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12464 // type is illegal (currently only vXi64 RV32).
12465 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12466 // transferred to the destination register. We issue two of these from the
12467 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12468 // first element.
12469 SDValue Vec = N->getOperand(0);
12470 SDValue Idx = N->getOperand(1);
12471
12472 // The vector type hasn't been legalized yet so we can't issue target
12473 // specific nodes if it needs legalization.
12474 // FIXME: We would manually legalize if it's important.
12475 if (!isTypeLegal(Vec.getValueType()))
12476 return;
12477
12478 MVT VecVT = Vec.getSimpleValueType();
12479
12480 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12481 VecVT.getVectorElementType() == MVT::i64 &&
12482 "Unexpected EXTRACT_VECTOR_ELT legalization");
12483
12484 // If this is a fixed vector, we need to convert it to a scalable vector.
12485 MVT ContainerVT = VecVT;
12486 if (VecVT.isFixedLengthVector()) {
12487 ContainerVT = getContainerForFixedLengthVector(VecVT);
12488 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12489 }
12490
12491 MVT XLenVT = Subtarget.getXLenVT();
12492
12493 // Use a VL of 1 to avoid processing more elements than we need.
12494 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12495
12496 // Unless the index is known to be 0, we must slide the vector down to get
12497 // the desired element into index 0.
12498 if (!isNullConstant(Idx)) {
12499 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12500 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12501 }
12502
12503 // Extract the lower XLEN bits of the correct vector element.
12504 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12505
12506 // To extract the upper XLEN bits of the vector element, shift the first
12507 // element right by 32 bits and re-extract the lower XLEN bits.
12508 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12509 DAG.getUNDEF(ContainerVT),
12510 DAG.getConstant(32, DL, XLenVT), VL);
12511 SDValue LShr32 =
12512 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12513 DAG.getUNDEF(ContainerVT), Mask, VL);
12514
12515 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12516
12517 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12518 break;
12519 }
12521 unsigned IntNo = N->getConstantOperandVal(0);
12522 switch (IntNo) {
12523 default:
12525 "Don't know how to custom type legalize this intrinsic!");
12526 case Intrinsic::experimental_get_vector_length: {
12527 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12528 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12529 return;
12530 }
12531 case Intrinsic::experimental_cttz_elts: {
12532 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
12533 Results.push_back(
12534 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
12535 return;
12536 }
12537 case Intrinsic::riscv_orc_b:
12538 case Intrinsic::riscv_brev8:
12539 case Intrinsic::riscv_sha256sig0:
12540 case Intrinsic::riscv_sha256sig1:
12541 case Intrinsic::riscv_sha256sum0:
12542 case Intrinsic::riscv_sha256sum1:
12543 case Intrinsic::riscv_sm3p0:
12544 case Intrinsic::riscv_sm3p1: {
12545 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12546 return;
12547 unsigned Opc;
12548 switch (IntNo) {
12549 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12550 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12551 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12552 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12553 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12554 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12555 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12556 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12557 }
12558
12559 SDValue NewOp =
12560 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12561 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12562 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12563 return;
12564 }
12565 case Intrinsic::riscv_sm4ks:
12566 case Intrinsic::riscv_sm4ed: {
12567 unsigned Opc =
12568 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12569 SDValue NewOp0 =
12570 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12571 SDValue NewOp1 =
12572 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12573 SDValue Res =
12574 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12575 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12576 return;
12577 }
12578 case Intrinsic::riscv_mopr: {
12579 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12580 return;
12581 SDValue NewOp =
12582 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12583 SDValue Res = DAG.getNode(
12584 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12585 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12586 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12587 return;
12588 }
12589 case Intrinsic::riscv_moprr: {
12590 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12591 return;
12592 SDValue NewOp0 =
12593 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12594 SDValue NewOp1 =
12595 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12596 SDValue Res = DAG.getNode(
12597 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12598 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12599 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12600 return;
12601 }
12602 case Intrinsic::riscv_clmul: {
12603 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12604 return;
12605
12606 SDValue NewOp0 =
12607 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12608 SDValue NewOp1 =
12609 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12610 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12611 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12612 return;
12613 }
12614 case Intrinsic::riscv_clmulh:
12615 case Intrinsic::riscv_clmulr: {
12616 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12617 return;
12618
12619 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12620 // to the full 128-bit clmul result of multiplying two xlen values.
12621 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12622 // upper 32 bits.
12623 //
12624 // The alternative is to mask the inputs to 32 bits and use clmul, but
12625 // that requires two shifts to mask each input without zext.w.
12626 // FIXME: If the inputs are known zero extended or could be freely
12627 // zero extended, the mask form would be better.
12628 SDValue NewOp0 =
12629 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12630 SDValue NewOp1 =
12631 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12632 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12633 DAG.getConstant(32, DL, MVT::i64));
12634 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12635 DAG.getConstant(32, DL, MVT::i64));
12636 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12638 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12639 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12640 DAG.getConstant(32, DL, MVT::i64));
12641 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12642 return;
12643 }
12644 case Intrinsic::riscv_vmv_x_s: {
12645 EVT VT = N->getValueType(0);
12646 MVT XLenVT = Subtarget.getXLenVT();
12647 if (VT.bitsLT(XLenVT)) {
12648 // Simple case just extract using vmv.x.s and truncate.
12649 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12650 Subtarget.getXLenVT(), N->getOperand(1));
12651 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12652 return;
12653 }
12654
12655 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12656 "Unexpected custom legalization");
12657
12658 // We need to do the move in two steps.
12659 SDValue Vec = N->getOperand(1);
12660 MVT VecVT = Vec.getSimpleValueType();
12661
12662 // First extract the lower XLEN bits of the element.
12663 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12664
12665 // To extract the upper XLEN bits of the vector element, shift the first
12666 // element right by 32 bits and re-extract the lower XLEN bits.
12667 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12668
12669 SDValue ThirtyTwoV =
12670 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12671 DAG.getConstant(32, DL, XLenVT), VL);
12672 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12673 DAG.getUNDEF(VecVT), Mask, VL);
12674 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12675
12676 Results.push_back(
12677 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12678 break;
12679 }
12680 }
12681 break;
12682 }
12683 case ISD::VECREDUCE_ADD:
12684 case ISD::VECREDUCE_AND:
12685 case ISD::VECREDUCE_OR:
12686 case ISD::VECREDUCE_XOR:
12691 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12692 Results.push_back(V);
12693 break;
12694 case ISD::VP_REDUCE_ADD:
12695 case ISD::VP_REDUCE_AND:
12696 case ISD::VP_REDUCE_OR:
12697 case ISD::VP_REDUCE_XOR:
12698 case ISD::VP_REDUCE_SMAX:
12699 case ISD::VP_REDUCE_UMAX:
12700 case ISD::VP_REDUCE_SMIN:
12701 case ISD::VP_REDUCE_UMIN:
12702 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12703 Results.push_back(V);
12704 break;
12705 case ISD::GET_ROUNDING: {
12706 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12707 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12708 Results.push_back(Res.getValue(0));
12709 Results.push_back(Res.getValue(1));
12710 break;
12711 }
12712 }
12713}
12714
12715/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12716/// which corresponds to it.
12717static unsigned getVecReduceOpcode(unsigned Opc) {
12718 switch (Opc) {
12719 default:
12720 llvm_unreachable("Unhandled binary to transfrom reduction");
12721 case ISD::ADD:
12722 return ISD::VECREDUCE_ADD;
12723 case ISD::UMAX:
12724 return ISD::VECREDUCE_UMAX;
12725 case ISD::SMAX:
12726 return ISD::VECREDUCE_SMAX;
12727 case ISD::UMIN:
12728 return ISD::VECREDUCE_UMIN;
12729 case ISD::SMIN:
12730 return ISD::VECREDUCE_SMIN;
12731 case ISD::AND:
12732 return ISD::VECREDUCE_AND;
12733 case ISD::OR:
12734 return ISD::VECREDUCE_OR;
12735 case ISD::XOR:
12736 return ISD::VECREDUCE_XOR;
12737 case ISD::FADD:
12738 // Note: This is the associative form of the generic reduction opcode.
12739 return ISD::VECREDUCE_FADD;
12740 }
12741}
12742
12743/// Perform two related transforms whose purpose is to incrementally recognize
12744/// an explode_vector followed by scalar reduction as a vector reduction node.
12745/// This exists to recover from a deficiency in SLP which can't handle
12746/// forests with multiple roots sharing common nodes. In some cases, one
12747/// of the trees will be vectorized, and the other will remain (unprofitably)
12748/// scalarized.
12749static SDValue
12751 const RISCVSubtarget &Subtarget) {
12752
12753 // This transforms need to run before all integer types have been legalized
12754 // to i64 (so that the vector element type matches the add type), and while
12755 // it's safe to introduce odd sized vector types.
12757 return SDValue();
12758
12759 // Without V, this transform isn't useful. We could form the (illegal)
12760 // operations and let them be scalarized again, but there's really no point.
12761 if (!Subtarget.hasVInstructions())
12762 return SDValue();
12763
12764 const SDLoc DL(N);
12765 const EVT VT = N->getValueType(0);
12766 const unsigned Opc = N->getOpcode();
12767
12768 // For FADD, we only handle the case with reassociation allowed. We
12769 // could handle strict reduction order, but at the moment, there's no
12770 // known reason to, and the complexity isn't worth it.
12771 // TODO: Handle fminnum and fmaxnum here
12772 if (!VT.isInteger() &&
12773 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12774 return SDValue();
12775
12776 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12777 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12778 "Inconsistent mappings");
12779 SDValue LHS = N->getOperand(0);
12780 SDValue RHS = N->getOperand(1);
12781
12782 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12783 return SDValue();
12784
12785 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12786 std::swap(LHS, RHS);
12787
12788 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12789 !isa<ConstantSDNode>(RHS.getOperand(1)))
12790 return SDValue();
12791
12792 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12793 SDValue SrcVec = RHS.getOperand(0);
12794 EVT SrcVecVT = SrcVec.getValueType();
12795 assert(SrcVecVT.getVectorElementType() == VT);
12796 if (SrcVecVT.isScalableVector())
12797 return SDValue();
12798
12799 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12800 return SDValue();
12801
12802 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12803 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12804 // root of our reduction tree. TODO: We could extend this to any two
12805 // adjacent aligned constant indices if desired.
12806 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12807 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12808 uint64_t LHSIdx =
12809 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12810 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12811 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12812 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12813 DAG.getVectorIdxConstant(0, DL));
12814 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12815 }
12816 }
12817
12818 // Match (binop (reduce (extract_subvector V, 0),
12819 // (extract_vector_elt V, sizeof(SubVec))))
12820 // into a reduction of one more element from the original vector V.
12821 if (LHS.getOpcode() != ReduceOpc)
12822 return SDValue();
12823
12824 SDValue ReduceVec = LHS.getOperand(0);
12825 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12826 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12827 isNullConstant(ReduceVec.getOperand(1)) &&
12828 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12829 // For illegal types (e.g. 3xi32), most will be combined again into a
12830 // wider (hopefully legal) type. If this is a terminal state, we are
12831 // relying on type legalization here to produce something reasonable
12832 // and this lowering quality could probably be improved. (TODO)
12833 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12834 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12835 DAG.getVectorIdxConstant(0, DL));
12836 auto Flags = ReduceVec->getFlags();
12837 Flags.intersectWith(N->getFlags());
12838 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12839 }
12840
12841 return SDValue();
12842}
12843
12844
12845// Try to fold (<bop> x, (reduction.<bop> vec, start))
12847 const RISCVSubtarget &Subtarget) {
12848 auto BinOpToRVVReduce = [](unsigned Opc) {
12849 switch (Opc) {
12850 default:
12851 llvm_unreachable("Unhandled binary to transfrom reduction");
12852 case ISD::ADD:
12854 case ISD::UMAX:
12856 case ISD::SMAX:
12858 case ISD::UMIN:
12860 case ISD::SMIN:
12862 case ISD::AND:
12864 case ISD::OR:
12866 case ISD::XOR:
12868 case ISD::FADD:
12870 case ISD::FMAXNUM:
12872 case ISD::FMINNUM:
12874 }
12875 };
12876
12877 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12878 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12879 isNullConstant(V.getOperand(1)) &&
12880 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12881 };
12882
12883 unsigned Opc = N->getOpcode();
12884 unsigned ReduceIdx;
12885 if (IsReduction(N->getOperand(0), Opc))
12886 ReduceIdx = 0;
12887 else if (IsReduction(N->getOperand(1), Opc))
12888 ReduceIdx = 1;
12889 else
12890 return SDValue();
12891
12892 // Skip if FADD disallows reassociation but the combiner needs.
12893 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12894 return SDValue();
12895
12896 SDValue Extract = N->getOperand(ReduceIdx);
12897 SDValue Reduce = Extract.getOperand(0);
12898 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12899 return SDValue();
12900
12901 SDValue ScalarV = Reduce.getOperand(2);
12902 EVT ScalarVT = ScalarV.getValueType();
12903 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12904 ScalarV.getOperand(0)->isUndef() &&
12905 isNullConstant(ScalarV.getOperand(2)))
12906 ScalarV = ScalarV.getOperand(1);
12907
12908 // Make sure that ScalarV is a splat with VL=1.
12909 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12910 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12911 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12912 return SDValue();
12913
12914 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12915 return SDValue();
12916
12917 // Check the scalar of ScalarV is neutral element
12918 // TODO: Deal with value other than neutral element.
12919 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12920 0))
12921 return SDValue();
12922
12923 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12924 // FIXME: We might be able to improve this if operand 0 is undef.
12925 if (!isNonZeroAVL(Reduce.getOperand(5)))
12926 return SDValue();
12927
12928 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12929
12930 SDLoc DL(N);
12931 SDValue NewScalarV =
12932 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12933 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12934
12935 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12936 if (ScalarVT != ScalarV.getValueType())
12937 NewScalarV =
12938 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12939 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12940
12941 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12942 NewScalarV, Reduce.getOperand(3),
12943 Reduce.getOperand(4), Reduce.getOperand(5)};
12944 SDValue NewReduce =
12945 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12946 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12947 Extract.getOperand(1));
12948}
12949
12950// Optimize (add (shl x, c0), (shl y, c1)) ->
12951// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12953 const RISCVSubtarget &Subtarget) {
12954 // Perform this optimization only in the zba extension.
12955 if (!Subtarget.hasStdExtZba())
12956 return SDValue();
12957
12958 // Skip for vector types and larger types.
12959 EVT VT = N->getValueType(0);
12960 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12961 return SDValue();
12962
12963 // The two operand nodes must be SHL and have no other use.
12964 SDValue N0 = N->getOperand(0);
12965 SDValue N1 = N->getOperand(1);
12966 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12967 !N0->hasOneUse() || !N1->hasOneUse())
12968 return SDValue();
12969
12970 // Check c0 and c1.
12971 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12972 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12973 if (!N0C || !N1C)
12974 return SDValue();
12975 int64_t C0 = N0C->getSExtValue();
12976 int64_t C1 = N1C->getSExtValue();
12977 if (C0 <= 0 || C1 <= 0)
12978 return SDValue();
12979
12980 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12981 int64_t Bits = std::min(C0, C1);
12982 int64_t Diff = std::abs(C0 - C1);
12983 if (Diff != 1 && Diff != 2 && Diff != 3)
12984 return SDValue();
12985
12986 // Build nodes.
12987 SDLoc DL(N);
12988 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12989 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12990 SDValue NA0 =
12991 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12992 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12993 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12994}
12995
12996// Combine a constant select operand into its use:
12997//
12998// (and (select cond, -1, c), x)
12999// -> (select cond, x, (and x, c)) [AllOnes=1]
13000// (or (select cond, 0, c), x)
13001// -> (select cond, x, (or x, c)) [AllOnes=0]
13002// (xor (select cond, 0, c), x)
13003// -> (select cond, x, (xor x, c)) [AllOnes=0]
13004// (add (select cond, 0, c), x)
13005// -> (select cond, x, (add x, c)) [AllOnes=0]
13006// (sub x, (select cond, 0, c))
13007// -> (select cond, x, (sub x, c)) [AllOnes=0]
13009 SelectionDAG &DAG, bool AllOnes,
13010 const RISCVSubtarget &Subtarget) {
13011 EVT VT = N->getValueType(0);
13012
13013 // Skip vectors.
13014 if (VT.isVector())
13015 return SDValue();
13016
13017 if (!Subtarget.hasConditionalMoveFusion()) {
13018 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13019 if ((!Subtarget.hasStdExtZicond() &&
13020 !Subtarget.hasVendorXVentanaCondOps()) ||
13021 N->getOpcode() != ISD::AND)
13022 return SDValue();
13023
13024 // Maybe harmful when condition code has multiple use.
13025 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13026 return SDValue();
13027
13028 // Maybe harmful when VT is wider than XLen.
13029 if (VT.getSizeInBits() > Subtarget.getXLen())
13030 return SDValue();
13031 }
13032
13033 if ((Slct.getOpcode() != ISD::SELECT &&
13034 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13035 !Slct.hasOneUse())
13036 return SDValue();
13037
13038 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13040 };
13041
13042 bool SwapSelectOps;
13043 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13044 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13045 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13046 SDValue NonConstantVal;
13047 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13048 SwapSelectOps = false;
13049 NonConstantVal = FalseVal;
13050 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13051 SwapSelectOps = true;
13052 NonConstantVal = TrueVal;
13053 } else
13054 return SDValue();
13055
13056 // Slct is now know to be the desired identity constant when CC is true.
13057 TrueVal = OtherOp;
13058 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13059 // Unless SwapSelectOps says the condition should be false.
13060 if (SwapSelectOps)
13061 std::swap(TrueVal, FalseVal);
13062
13063 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13064 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13065 {Slct.getOperand(0), Slct.getOperand(1),
13066 Slct.getOperand(2), TrueVal, FalseVal});
13067
13068 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13069 {Slct.getOperand(0), TrueVal, FalseVal});
13070}
13071
13072// Attempt combineSelectAndUse on each operand of a commutative operator N.
13074 bool AllOnes,
13075 const RISCVSubtarget &Subtarget) {
13076 SDValue N0 = N->getOperand(0);
13077 SDValue N1 = N->getOperand(1);
13078 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13079 return Result;
13080 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13081 return Result;
13082 return SDValue();
13083}
13084
13085// Transform (add (mul x, c0), c1) ->
13086// (add (mul (add x, c1/c0), c0), c1%c0).
13087// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13088// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13089// to an infinite loop in DAGCombine if transformed.
13090// Or transform (add (mul x, c0), c1) ->
13091// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13092// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13093// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13094// lead to an infinite loop in DAGCombine if transformed.
13095// Or transform (add (mul x, c0), c1) ->
13096// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13097// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13098// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13099// lead to an infinite loop in DAGCombine if transformed.
13100// Or transform (add (mul x, c0), c1) ->
13101// (mul (add x, c1/c0), c0).
13102// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13104 const RISCVSubtarget &Subtarget) {
13105 // Skip for vector types and larger types.
13106 EVT VT = N->getValueType(0);
13107 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13108 return SDValue();
13109 // The first operand node must be a MUL and has no other use.
13110 SDValue N0 = N->getOperand(0);
13111 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13112 return SDValue();
13113 // Check if c0 and c1 match above conditions.
13114 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13115 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13116 if (!N0C || !N1C)
13117 return SDValue();
13118 // If N0C has multiple uses it's possible one of the cases in
13119 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13120 // in an infinite loop.
13121 if (!N0C->hasOneUse())
13122 return SDValue();
13123 int64_t C0 = N0C->getSExtValue();
13124 int64_t C1 = N1C->getSExtValue();
13125 int64_t CA, CB;
13126 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13127 return SDValue();
13128 // Search for proper CA (non-zero) and CB that both are simm12.
13129 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13130 !isInt<12>(C0 * (C1 / C0))) {
13131 CA = C1 / C0;
13132 CB = C1 % C0;
13133 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13134 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13135 CA = C1 / C0 + 1;
13136 CB = C1 % C0 - C0;
13137 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13138 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13139 CA = C1 / C0 - 1;
13140 CB = C1 % C0 + C0;
13141 } else
13142 return SDValue();
13143 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13144 SDLoc DL(N);
13145 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13146 DAG.getConstant(CA, DL, VT));
13147 SDValue New1 =
13148 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
13149 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
13150}
13151
13152// add (zext, zext) -> zext (add (zext, zext))
13153// sub (zext, zext) -> sext (sub (zext, zext))
13154// mul (zext, zext) -> zext (mul (zext, zext))
13155// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13156// udiv (zext, zext) -> zext (udiv (zext, zext))
13157// srem (zext, zext) -> zext (srem (zext, zext))
13158// urem (zext, zext) -> zext (urem (zext, zext))
13159//
13160// where the sum of the extend widths match, and the the range of the bin op
13161// fits inside the width of the narrower bin op. (For profitability on rvv, we
13162// use a power of two for both inner and outer extend.)
13164
13165 EVT VT = N->getValueType(0);
13166 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13167 return SDValue();
13168
13169 SDValue N0 = N->getOperand(0);
13170 SDValue N1 = N->getOperand(1);
13172 return SDValue();
13173 if (!N0.hasOneUse() || !N1.hasOneUse())
13174 return SDValue();
13175
13176 SDValue Src0 = N0.getOperand(0);
13177 SDValue Src1 = N1.getOperand(0);
13178 EVT SrcVT = Src0.getValueType();
13179 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
13180 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
13181 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
13182 return SDValue();
13183
13184 LLVMContext &C = *DAG.getContext();
13186 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
13187
13188 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
13189 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
13190
13191 // Src0 and Src1 are zero extended, so they're always positive if signed.
13192 //
13193 // sub can produce a negative from two positive operands, so it needs sign
13194 // extended. Other nodes produce a positive from two positive operands, so
13195 // zero extend instead.
13196 unsigned OuterExtend =
13197 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13198
13199 return DAG.getNode(
13200 OuterExtend, SDLoc(N), VT,
13201 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
13202}
13203
13204// Try to turn (add (xor bool, 1) -1) into (neg bool).
13206 SDValue N0 = N->getOperand(0);
13207 SDValue N1 = N->getOperand(1);
13208 EVT VT = N->getValueType(0);
13209 SDLoc DL(N);
13210
13211 // RHS should be -1.
13212 if (!isAllOnesConstant(N1))
13213 return SDValue();
13214
13215 // Look for (xor X, 1).
13216 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
13217 return SDValue();
13218
13219 // First xor input should be 0 or 1.
13221 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13222 return SDValue();
13223
13224 // Emit a negate of the setcc.
13225 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13226 N0.getOperand(0));
13227}
13228
13230 const RISCVSubtarget &Subtarget) {
13231 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13232 return V;
13233 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13234 return V;
13235 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13236 return V;
13237 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13238 return V;
13239 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13240 return V;
13241 if (SDValue V = combineBinOpOfZExt(N, DAG))
13242 return V;
13243
13244 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13245 // (select lhs, rhs, cc, x, (add x, y))
13246 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13247}
13248
13249// Try to turn a sub boolean RHS and constant LHS into an addi.
13251 SDValue N0 = N->getOperand(0);
13252 SDValue N1 = N->getOperand(1);
13253 EVT VT = N->getValueType(0);
13254 SDLoc DL(N);
13255
13256 // Require a constant LHS.
13257 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13258 if (!N0C)
13259 return SDValue();
13260
13261 // All our optimizations involve subtracting 1 from the immediate and forming
13262 // an ADDI. Make sure the new immediate is valid for an ADDI.
13263 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13264 if (!ImmValMinus1.isSignedIntN(12))
13265 return SDValue();
13266
13267 SDValue NewLHS;
13268 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13269 // (sub constant, (setcc x, y, eq/neq)) ->
13270 // (add (setcc x, y, neq/eq), constant - 1)
13271 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13272 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13273 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13274 return SDValue();
13275 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13276 NewLHS =
13277 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13278 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13279 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13280 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13281 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13282 NewLHS = N1.getOperand(0);
13283 } else
13284 return SDValue();
13285
13286 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13287 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13288}
13289
13291 const RISCVSubtarget &Subtarget) {
13292 if (SDValue V = combineSubOfBoolean(N, DAG))
13293 return V;
13294
13295 EVT VT = N->getValueType(0);
13296 SDValue N0 = N->getOperand(0);
13297 SDValue N1 = N->getOperand(1);
13298 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13299 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13300 isNullConstant(N1.getOperand(1))) {
13301 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13302 if (CCVal == ISD::SETLT) {
13303 SDLoc DL(N);
13304 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13305 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13306 DAG.getConstant(ShAmt, DL, VT));
13307 }
13308 }
13309
13310 if (SDValue V = combineBinOpOfZExt(N, DAG))
13311 return V;
13312
13313 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13314 // (select lhs, rhs, cc, x, (sub x, y))
13315 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13316}
13317
13318// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13319// Legalizing setcc can introduce xors like this. Doing this transform reduces
13320// the number of xors and may allow the xor to fold into a branch condition.
13322 SDValue N0 = N->getOperand(0);
13323 SDValue N1 = N->getOperand(1);
13324 bool IsAnd = N->getOpcode() == ISD::AND;
13325
13326 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13327 return SDValue();
13328
13329 if (!N0.hasOneUse() || !N1.hasOneUse())
13330 return SDValue();
13331
13332 SDValue N01 = N0.getOperand(1);
13333 SDValue N11 = N1.getOperand(1);
13334
13335 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13336 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13337 // operation is And, allow one of the Xors to use -1.
13338 if (isOneConstant(N01)) {
13339 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13340 return SDValue();
13341 } else if (isOneConstant(N11)) {
13342 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13343 if (!(IsAnd && isAllOnesConstant(N01)))
13344 return SDValue();
13345 } else
13346 return SDValue();
13347
13348 EVT VT = N->getValueType(0);
13349
13350 SDValue N00 = N0.getOperand(0);
13351 SDValue N10 = N1.getOperand(0);
13352
13353 // The LHS of the xors needs to be 0/1.
13355 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13356 return SDValue();
13357
13358 // Invert the opcode and insert a new xor.
13359 SDLoc DL(N);
13360 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13361 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13362 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13363}
13364
13366 const RISCVSubtarget &Subtarget) {
13367 SDValue N0 = N->getOperand(0);
13368 EVT VT = N->getValueType(0);
13369
13370 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13371 // extending X. This is safe since we only need the LSB after the shift and
13372 // shift amounts larger than 31 would produce poison. If we wait until
13373 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13374 // to use a BEXT instruction.
13375 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13376 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13377 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13378 SDLoc DL(N0);
13379 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13380 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13381 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13382 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13383 }
13384
13385 return SDValue();
13386}
13387
13388// Combines two comparison operation and logic operation to one selection
13389// operation(min, max) and logic operation. Returns new constructed Node if
13390// conditions for optimization are satisfied.
13393 const RISCVSubtarget &Subtarget) {
13394 SelectionDAG &DAG = DCI.DAG;
13395
13396 SDValue N0 = N->getOperand(0);
13397 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13398 // extending X. This is safe since we only need the LSB after the shift and
13399 // shift amounts larger than 31 would produce poison. If we wait until
13400 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13401 // to use a BEXT instruction.
13402 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13403 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13404 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13405 N0.hasOneUse()) {
13406 SDLoc DL(N);
13407 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13408 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13409 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13410 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13411 DAG.getConstant(1, DL, MVT::i64));
13412 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13413 }
13414
13415 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13416 return V;
13417 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13418 return V;
13419
13420 if (DCI.isAfterLegalizeDAG())
13421 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13422 return V;
13423
13424 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13425 // (select lhs, rhs, cc, x, (and x, y))
13426 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13427}
13428
13429// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13430// FIXME: Generalize to other binary operators with same operand.
13432 SelectionDAG &DAG) {
13433 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13434
13435 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13437 !N0.hasOneUse() || !N1.hasOneUse())
13438 return SDValue();
13439
13440 // Should have the same condition.
13441 SDValue Cond = N0.getOperand(1);
13442 if (Cond != N1.getOperand(1))
13443 return SDValue();
13444
13445 SDValue TrueV = N0.getOperand(0);
13446 SDValue FalseV = N1.getOperand(0);
13447
13448 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13449 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13450 !isOneConstant(TrueV.getOperand(1)) ||
13451 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13452 return SDValue();
13453
13454 EVT VT = N->getValueType(0);
13455 SDLoc DL(N);
13456
13457 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13458 Cond);
13459 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13460 Cond);
13461 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13462 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13463}
13464
13466 const RISCVSubtarget &Subtarget) {
13467 SelectionDAG &DAG = DCI.DAG;
13468
13469 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13470 return V;
13471 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13472 return V;
13473
13474 if (DCI.isAfterLegalizeDAG())
13475 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13476 return V;
13477
13478 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13479 // We may be able to pull a common operation out of the true and false value.
13480 SDValue N0 = N->getOperand(0);
13481 SDValue N1 = N->getOperand(1);
13482 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13483 return V;
13484 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13485 return V;
13486
13487 // fold (or (select cond, 0, y), x) ->
13488 // (select cond, x, (or x, y))
13489 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13490}
13491
13493 const RISCVSubtarget &Subtarget) {
13494 SDValue N0 = N->getOperand(0);
13495 SDValue N1 = N->getOperand(1);
13496
13497 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13498 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13499 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13500 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13501 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13502 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13503 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13504 SDLoc DL(N);
13505 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13506 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13507 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13508 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13509 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13510 }
13511
13512 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13513 // NOTE: Assumes ROL being legal means ROLW is legal.
13514 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13515 if (N0.getOpcode() == RISCVISD::SLLW &&
13517 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13518 SDLoc DL(N);
13519 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13520 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13521 }
13522
13523 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13524 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13525 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13526 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13527 if (ConstN00 && CC == ISD::SETLT) {
13528 EVT VT = N0.getValueType();
13529 SDLoc DL(N0);
13530 const APInt &Imm = ConstN00->getAPIntValue();
13531 if ((Imm + 1).isSignedIntN(12))
13532 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13533 DAG.getConstant(Imm + 1, DL, VT), CC);
13534 }
13535 }
13536
13537 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13538 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13539 // would have been promoted to i32, but the setcc would have i64 result.
13540 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13541 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13542 SDValue N00 = N0.getOperand(0);
13543 SDLoc DL(N);
13544 SDValue LHS = N00.getOperand(0);
13545 SDValue RHS = N00.getOperand(1);
13546 SDValue CC = N00.getOperand(2);
13547 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13548 LHS.getValueType());
13549 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13550 LHS, RHS, NotCC);
13551 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13552 }
13553
13554 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13555 return V;
13556 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13557 return V;
13558
13559 // fold (xor (select cond, 0, y), x) ->
13560 // (select cond, x, (xor x, y))
13561 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13562}
13563
13564// Try to expand a scalar multiply to a faster sequence.
13567 const RISCVSubtarget &Subtarget) {
13568
13569 EVT VT = N->getValueType(0);
13570
13571 // LI + MUL is usually smaller than the alternative sequence.
13573 return SDValue();
13574
13575 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
13576 return SDValue();
13577
13578 if (VT != Subtarget.getXLenVT())
13579 return SDValue();
13580
13581 if (!Subtarget.hasStdExtZba() && !Subtarget.hasVendorXTHeadBa())
13582 return SDValue();
13583
13584 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
13585 if (!CNode)
13586 return SDValue();
13587 uint64_t MulAmt = CNode->getZExtValue();
13588
13589 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
13590 // We're adding additional uses of X here, and in principle, we should be freezing
13591 // X before doing so. However, adding freeze here causes real regressions, and no
13592 // other target properly freezes X in these cases either.
13593 SDValue X = N->getOperand(0);
13594
13595 for (uint64_t Divisor : {3, 5, 9}) {
13596 if (MulAmt % Divisor != 0)
13597 continue;
13598 uint64_t MulAmt2 = MulAmt / Divisor;
13599 // 3/5/9 * 2^N -> shl (shXadd X, X), N
13600 if (isPowerOf2_64(MulAmt2)) {
13601 SDLoc DL(N);
13602 SDValue X = N->getOperand(0);
13603 // Put the shift first if we can fold a zext into the
13604 // shift forming a slli.uw.
13605 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
13606 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
13607 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
13608 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13609 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
13610 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), Shl);
13611 }
13612 // Otherwise, put rhe shl second so that it can fold with following
13613 // instructions (e.g. sext or add).
13614 SDValue Mul359 =
13615 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13616 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13617 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
13618 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
13619 }
13620
13621 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
13622 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
13623 SDLoc DL(N);
13624 SDValue Mul359 =
13625 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13626 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13627 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13628 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
13629 Mul359);
13630 }
13631 }
13632
13633 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
13634 // shXadd. First check if this a sum of two power of 2s because that's
13635 // easy. Then count how many zeros are up to the first bit.
13636 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
13637 unsigned ScaleShift = llvm::countr_zero(MulAmt);
13638 if (ScaleShift >= 1 && ScaleShift < 4) {
13639 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
13640 SDLoc DL(N);
13641 SDValue Shift1 =
13642 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13643 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13644 DAG.getConstant(ScaleShift, DL, VT), Shift1);
13645 }
13646 }
13647
13648 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
13649 // This is the two instruction form, there are also three instruction
13650 // variants we could implement. e.g.
13651 // (2^(1,2,3) * 3,5,9 + 1) << C2
13652 // 2^(C1>3) * 3,5,9 +/- 1
13653 for (uint64_t Divisor : {3, 5, 9}) {
13654 uint64_t C = MulAmt - 1;
13655 if (C <= Divisor)
13656 continue;
13657 unsigned TZ = llvm::countr_zero(C);
13658 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
13659 SDLoc DL(N);
13660 SDValue Mul359 =
13661 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13662 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
13663 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
13664 DAG.getConstant(TZ, DL, VT), X);
13665 }
13666 }
13667
13668 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
13669 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
13670 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
13671 if (ScaleShift >= 1 && ScaleShift < 4) {
13672 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
13673 SDLoc DL(N);
13674 SDValue Shift1 =
13675 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
13676 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
13677 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13678 DAG.getConstant(ScaleShift, DL, VT), X));
13679 }
13680 }
13681
13682 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
13683 for (uint64_t Offset : {3, 5, 9}) {
13684 if (isPowerOf2_64(MulAmt + Offset)) {
13685 SDLoc DL(N);
13686 SDValue Shift1 =
13687 DAG.getNode(ISD::SHL, DL, VT, X,
13688 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
13689 SDValue Mul359 = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
13690 DAG.getConstant(Log2_64(Offset - 1), DL, VT),
13691 X);
13692 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
13693 }
13694 }
13695
13696 return SDValue();
13697}
13698
13699
13702 const RISCVSubtarget &Subtarget) {
13703 EVT VT = N->getValueType(0);
13704 if (!VT.isVector())
13705 return expandMul(N, DAG, DCI, Subtarget);
13706
13707 SDLoc DL(N);
13708 SDValue N0 = N->getOperand(0);
13709 SDValue N1 = N->getOperand(1);
13710 SDValue MulOper;
13711 unsigned AddSubOpc;
13712
13713 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13714 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13715 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13716 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13717 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13718 AddSubOpc = V->getOpcode();
13719 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13720 SDValue Opnd = V->getOperand(1);
13721 MulOper = V->getOperand(0);
13722 if (AddSubOpc == ISD::SUB)
13723 std::swap(Opnd, MulOper);
13724 if (isOneOrOneSplat(Opnd))
13725 return true;
13726 }
13727 return false;
13728 };
13729
13730 if (IsAddSubWith1(N0)) {
13731 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13732 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13733 }
13734
13735 if (IsAddSubWith1(N1)) {
13736 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13737 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13738 }
13739
13740 if (SDValue V = combineBinOpOfZExt(N, DAG))
13741 return V;
13742
13743 return SDValue();
13744}
13745
13746/// According to the property that indexed load/store instructions zero-extend
13747/// their indices, try to narrow the type of index operand.
13748static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13749 if (isIndexTypeSigned(IndexType))
13750 return false;
13751
13752 if (!N->hasOneUse())
13753 return false;
13754
13755 EVT VT = N.getValueType();
13756 SDLoc DL(N);
13757
13758 // In general, what we're doing here is seeing if we can sink a truncate to
13759 // a smaller element type into the expression tree building our index.
13760 // TODO: We can generalize this and handle a bunch more cases if useful.
13761
13762 // Narrow a buildvector to the narrowest element type. This requires less
13763 // work and less register pressure at high LMUL, and creates smaller constants
13764 // which may be cheaper to materialize.
13765 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13766 KnownBits Known = DAG.computeKnownBits(N);
13767 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13768 LLVMContext &C = *DAG.getContext();
13769 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13770 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13771 N = DAG.getNode(ISD::TRUNCATE, DL,
13772 VT.changeVectorElementType(ResultVT), N);
13773 return true;
13774 }
13775 }
13776
13777 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13778 if (N.getOpcode() != ISD::SHL)
13779 return false;
13780
13781 SDValue N0 = N.getOperand(0);
13782 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13784 return false;
13785 if (!N0->hasOneUse())
13786 return false;
13787
13788 APInt ShAmt;
13789 SDValue N1 = N.getOperand(1);
13790 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13791 return false;
13792
13793 SDValue Src = N0.getOperand(0);
13794 EVT SrcVT = Src.getValueType();
13795 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13796 unsigned ShAmtV = ShAmt.getZExtValue();
13797 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13798 NewElen = std::max(NewElen, 8U);
13799
13800 // Skip if NewElen is not narrower than the original extended type.
13801 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13802 return false;
13803
13804 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13805 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13806
13807 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13808 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13809 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13810 return true;
13811}
13812
13813// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13814// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13815// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13816// can become a sext.w instead of a shift pair.
13818 const RISCVSubtarget &Subtarget) {
13819 SDValue N0 = N->getOperand(0);
13820 SDValue N1 = N->getOperand(1);
13821 EVT VT = N->getValueType(0);
13822 EVT OpVT = N0.getValueType();
13823
13824 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13825 return SDValue();
13826
13827 // RHS needs to be a constant.
13828 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13829 if (!N1C)
13830 return SDValue();
13831
13832 // LHS needs to be (and X, 0xffffffff).
13833 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13834 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13835 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13836 return SDValue();
13837
13838 // Looking for an equality compare.
13839 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13840 if (!isIntEqualitySetCC(Cond))
13841 return SDValue();
13842
13843 // Don't do this if the sign bit is provably zero, it will be turned back into
13844 // an AND.
13845 APInt SignMask = APInt::getOneBitSet(64, 31);
13846 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13847 return SDValue();
13848
13849 const APInt &C1 = N1C->getAPIntValue();
13850
13851 SDLoc dl(N);
13852 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13853 // to be equal.
13854 if (C1.getActiveBits() > 32)
13855 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13856
13857 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13858 N0.getOperand(0), DAG.getValueType(MVT::i32));
13859 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13860 dl, OpVT), Cond);
13861}
13862
13863static SDValue
13865 const RISCVSubtarget &Subtarget) {
13866 SDValue Src = N->getOperand(0);
13867 EVT VT = N->getValueType(0);
13868
13869 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13870 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13871 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13872 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13873 Src.getOperand(0));
13874
13875 return SDValue();
13876}
13877
13878namespace {
13879// Forward declaration of the structure holding the necessary information to
13880// apply a combine.
13881struct CombineResult;
13882
13883enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13884/// Helper class for folding sign/zero extensions.
13885/// In particular, this class is used for the following combines:
13886/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
13887/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13888/// mul | mul_vl -> vwmul(u) | vwmul_su
13889/// shl | shl_vl -> vwsll
13890/// fadd -> vfwadd | vfwadd_w
13891/// fsub -> vfwsub | vfwsub_w
13892/// fmul -> vfwmul
13893/// An object of this class represents an operand of the operation we want to
13894/// combine.
13895/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13896/// NodeExtensionHelper for `a` and one for `b`.
13897///
13898/// This class abstracts away how the extension is materialized and
13899/// how its number of users affect the combines.
13900///
13901/// In particular:
13902/// - VWADD_W is conceptually == add(op0, sext(op1))
13903/// - VWADDU_W == add(op0, zext(op1))
13904/// - VWSUB_W == sub(op0, sext(op1))
13905/// - VWSUBU_W == sub(op0, zext(op1))
13906/// - VFWADD_W == fadd(op0, fpext(op1))
13907/// - VFWSUB_W == fsub(op0, fpext(op1))
13908/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13909/// zext|sext(smaller_value).
13910struct NodeExtensionHelper {
13911 /// Records if this operand is like being zero extended.
13912 bool SupportsZExt;
13913 /// Records if this operand is like being sign extended.
13914 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13915 /// instance, a splat constant (e.g., 3), would support being both sign and
13916 /// zero extended.
13917 bool SupportsSExt;
13918 /// Records if this operand is like being floating-Point extended.
13919 bool SupportsFPExt;
13920 /// This boolean captures whether we care if this operand would still be
13921 /// around after the folding happens.
13922 bool EnforceOneUse;
13923 /// Original value that this NodeExtensionHelper represents.
13924 SDValue OrigOperand;
13925
13926 /// Get the value feeding the extension or the value itself.
13927 /// E.g., for zext(a), this would return a.
13928 SDValue getSource() const {
13929 switch (OrigOperand.getOpcode()) {
13930 case ISD::ZERO_EXTEND:
13931 case ISD::SIGN_EXTEND:
13932 case RISCVISD::VSEXT_VL:
13933 case RISCVISD::VZEXT_VL:
13935 return OrigOperand.getOperand(0);
13936 default:
13937 return OrigOperand;
13938 }
13939 }
13940
13941 /// Check if this instance represents a splat.
13942 bool isSplat() const {
13943 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
13944 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
13945 }
13946
13947 /// Get the extended opcode.
13948 unsigned getExtOpc(ExtKind SupportsExt) const {
13949 switch (SupportsExt) {
13950 case ExtKind::SExt:
13951 return RISCVISD::VSEXT_VL;
13952 case ExtKind::ZExt:
13953 return RISCVISD::VZEXT_VL;
13954 case ExtKind::FPExt:
13956 }
13957 llvm_unreachable("Unknown ExtKind enum");
13958 }
13959
13960 /// Get or create a value that can feed \p Root with the given extension \p
13961 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13962 /// operand. \see ::getSource().
13963 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13964 const RISCVSubtarget &Subtarget,
13965 std::optional<ExtKind> SupportsExt) const {
13966 if (!SupportsExt.has_value())
13967 return OrigOperand;
13968
13969 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13970
13971 SDValue Source = getSource();
13972 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13973 if (Source.getValueType() == NarrowVT)
13974 return Source;
13975
13976 unsigned ExtOpc = getExtOpc(*SupportsExt);
13977
13978 // If we need an extension, we should be changing the type.
13979 SDLoc DL(OrigOperand);
13980 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13981 switch (OrigOperand.getOpcode()) {
13982 case ISD::ZERO_EXTEND:
13983 case ISD::SIGN_EXTEND:
13984 case RISCVISD::VSEXT_VL:
13985 case RISCVISD::VZEXT_VL:
13987 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13988 case ISD::SPLAT_VECTOR:
13989 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
13991 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13992 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13993 default:
13994 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13995 // and that operand should already have the right NarrowVT so no
13996 // extension should be required at this point.
13997 llvm_unreachable("Unsupported opcode");
13998 }
13999 }
14000
14001 /// Helper function to get the narrow type for \p Root.
14002 /// The narrow type is the type of \p Root where we divided the size of each
14003 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
14004 /// \pre Both the narrow type and the original type should be legal.
14005 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
14006 MVT VT = Root->getSimpleValueType(0);
14007
14008 // Determine the narrow size.
14009 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14010
14011 MVT EltVT = SupportsExt == ExtKind::FPExt
14012 ? MVT::getFloatingPointVT(NarrowSize)
14013 : MVT::getIntegerVT(NarrowSize);
14014
14015 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
14016 "Trying to extend something we can't represent");
14017 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
14018 return NarrowVT;
14019 }
14020
14021 /// Get the opcode to materialize:
14022 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
14023 static unsigned getSExtOpcode(unsigned Opcode) {
14024 switch (Opcode) {
14025 case ISD::ADD:
14026 case RISCVISD::ADD_VL:
14029 case ISD::OR:
14030 return RISCVISD::VWADD_VL;
14031 case ISD::SUB:
14032 case RISCVISD::SUB_VL:
14035 return RISCVISD::VWSUB_VL;
14036 case ISD::MUL:
14037 case RISCVISD::MUL_VL:
14038 return RISCVISD::VWMUL_VL;
14039 default:
14040 llvm_unreachable("Unexpected opcode");
14041 }
14042 }
14043
14044 /// Get the opcode to materialize:
14045 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
14046 static unsigned getZExtOpcode(unsigned Opcode) {
14047 switch (Opcode) {
14048 case ISD::ADD:
14049 case RISCVISD::ADD_VL:
14052 case ISD::OR:
14053 return RISCVISD::VWADDU_VL;
14054 case ISD::SUB:
14055 case RISCVISD::SUB_VL:
14058 return RISCVISD::VWSUBU_VL;
14059 case ISD::MUL:
14060 case RISCVISD::MUL_VL:
14061 return RISCVISD::VWMULU_VL;
14062 case ISD::SHL:
14063 case RISCVISD::SHL_VL:
14064 return RISCVISD::VWSLL_VL;
14065 default:
14066 llvm_unreachable("Unexpected opcode");
14067 }
14068 }
14069
14070 /// Get the opcode to materialize:
14071 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
14072 static unsigned getFPExtOpcode(unsigned Opcode) {
14073 switch (Opcode) {
14074 case RISCVISD::FADD_VL:
14076 return RISCVISD::VFWADD_VL;
14077 case RISCVISD::FSUB_VL:
14079 return RISCVISD::VFWSUB_VL;
14080 case RISCVISD::FMUL_VL:
14081 return RISCVISD::VFWMUL_VL;
14082 default:
14083 llvm_unreachable("Unexpected opcode");
14084 }
14085 }
14086
14087 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
14088 /// newOpcode(a, b).
14089 static unsigned getSUOpcode(unsigned Opcode) {
14090 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
14091 "SU is only supported for MUL");
14092 return RISCVISD::VWMULSU_VL;
14093 }
14094
14095 /// Get the opcode to materialize
14096 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
14097 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
14098 switch (Opcode) {
14099 case ISD::ADD:
14100 case RISCVISD::ADD_VL:
14101 case ISD::OR:
14102 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
14104 case ISD::SUB:
14105 case RISCVISD::SUB_VL:
14106 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
14108 case RISCVISD::FADD_VL:
14109 return RISCVISD::VFWADD_W_VL;
14110 case RISCVISD::FSUB_VL:
14111 return RISCVISD::VFWSUB_W_VL;
14112 default:
14113 llvm_unreachable("Unexpected opcode");
14114 }
14115 }
14116
14117 using CombineToTry = std::function<std::optional<CombineResult>(
14118 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
14119 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
14120 const RISCVSubtarget &)>;
14121
14122 /// Check if this node needs to be fully folded or extended for all users.
14123 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
14124
14125 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
14126 const RISCVSubtarget &Subtarget) {
14127 unsigned Opc = OrigOperand.getOpcode();
14128 MVT VT = OrigOperand.getSimpleValueType();
14129
14130 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
14131 "Unexpected Opcode");
14132
14133 // The pasthru must be undef for tail agnostic.
14134 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
14135 return;
14136
14137 // Get the scalar value.
14138 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
14139 : OrigOperand.getOperand(1);
14140
14141 // See if we have enough sign bits or zero bits in the scalar to use a
14142 // widening opcode by splatting to smaller element size.
14143 unsigned EltBits = VT.getScalarSizeInBits();
14144 unsigned ScalarBits = Op.getValueSizeInBits();
14145 // Make sure we're getting all element bits from the scalar register.
14146 // FIXME: Support implicit sign extension of vmv.v.x?
14147 if (ScalarBits < EltBits)
14148 return;
14149
14150 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
14151 // If the narrow type cannot be expressed with a legal VMV,
14152 // this is not a valid candidate.
14153 if (NarrowSize < 8)
14154 return;
14155
14156 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
14157 SupportsSExt = true;
14158
14159 if (DAG.MaskedValueIsZero(Op,
14160 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
14161 SupportsZExt = true;
14162
14163 EnforceOneUse = false;
14164 }
14165
14166 /// Helper method to set the various fields of this struct based on the
14167 /// type of \p Root.
14168 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
14169 const RISCVSubtarget &Subtarget) {
14170 SupportsZExt = false;
14171 SupportsSExt = false;
14172 SupportsFPExt = false;
14173 EnforceOneUse = true;
14174 unsigned Opc = OrigOperand.getOpcode();
14175 // For the nodes we handle below, we end up using their inputs directly: see
14176 // getSource(). However since they either don't have a passthru or we check
14177 // that their passthru is undef, we can safely ignore their mask and VL.
14178 switch (Opc) {
14179 case ISD::ZERO_EXTEND:
14180 case ISD::SIGN_EXTEND: {
14181 MVT VT = OrigOperand.getSimpleValueType();
14182 if (!VT.isVector())
14183 break;
14184
14185 SDValue NarrowElt = OrigOperand.getOperand(0);
14186 MVT NarrowVT = NarrowElt.getSimpleValueType();
14187 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
14188 if (NarrowVT.getVectorElementType() == MVT::i1)
14189 break;
14190
14191 SupportsZExt = Opc == ISD::ZERO_EXTEND;
14192 SupportsSExt = Opc == ISD::SIGN_EXTEND;
14193 break;
14194 }
14195 case RISCVISD::VZEXT_VL:
14196 SupportsZExt = true;
14197 break;
14198 case RISCVISD::VSEXT_VL:
14199 SupportsSExt = true;
14200 break;
14202 SupportsFPExt = true;
14203 break;
14204 case ISD::SPLAT_VECTOR:
14206 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
14207 break;
14208 default:
14209 break;
14210 }
14211 }
14212
14213 /// Check if \p Root supports any extension folding combines.
14214 static bool isSupportedRoot(const SDNode *Root,
14215 const RISCVSubtarget &Subtarget) {
14216 switch (Root->getOpcode()) {
14217 case ISD::ADD:
14218 case ISD::SUB:
14219 case ISD::MUL: {
14220 return Root->getValueType(0).isScalableVector();
14221 }
14222 case ISD::OR: {
14223 return Root->getValueType(0).isScalableVector() &&
14224 Root->getFlags().hasDisjoint();
14225 }
14226 // Vector Widening Integer Add/Sub/Mul Instructions
14227 case RISCVISD::ADD_VL:
14228 case RISCVISD::MUL_VL:
14231 case RISCVISD::SUB_VL:
14234 // Vector Widening Floating-Point Add/Sub/Mul Instructions
14235 case RISCVISD::FADD_VL:
14236 case RISCVISD::FSUB_VL:
14237 case RISCVISD::FMUL_VL:
14240 return true;
14241 case ISD::SHL:
14242 return Root->getValueType(0).isScalableVector() &&
14243 Subtarget.hasStdExtZvbb();
14244 case RISCVISD::SHL_VL:
14245 return Subtarget.hasStdExtZvbb();
14246 default:
14247 return false;
14248 }
14249 }
14250
14251 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
14252 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
14253 const RISCVSubtarget &Subtarget) {
14254 assert(isSupportedRoot(Root, Subtarget) &&
14255 "Trying to build an helper with an "
14256 "unsupported root");
14257 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
14259 OrigOperand = Root->getOperand(OperandIdx);
14260
14261 unsigned Opc = Root->getOpcode();
14262 switch (Opc) {
14263 // We consider
14264 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
14265 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
14266 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
14273 if (OperandIdx == 1) {
14274 SupportsZExt =
14276 SupportsSExt =
14278 SupportsFPExt =
14280 // There's no existing extension here, so we don't have to worry about
14281 // making sure it gets removed.
14282 EnforceOneUse = false;
14283 break;
14284 }
14285 [[fallthrough]];
14286 default:
14287 fillUpExtensionSupport(Root, DAG, Subtarget);
14288 break;
14289 }
14290 }
14291
14292 /// Helper function to get the Mask and VL from \p Root.
14293 static std::pair<SDValue, SDValue>
14294 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
14295 const RISCVSubtarget &Subtarget) {
14296 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
14297 switch (Root->getOpcode()) {
14298 case ISD::ADD:
14299 case ISD::SUB:
14300 case ISD::MUL:
14301 case ISD::OR:
14302 case ISD::SHL: {
14303 SDLoc DL(Root);
14304 MVT VT = Root->getSimpleValueType(0);
14305 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
14306 }
14307 default:
14308 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
14309 }
14310 }
14311
14312 /// Helper function to check if \p N is commutative with respect to the
14313 /// foldings that are supported by this class.
14314 static bool isCommutative(const SDNode *N) {
14315 switch (N->getOpcode()) {
14316 case ISD::ADD:
14317 case ISD::MUL:
14318 case ISD::OR:
14319 case RISCVISD::ADD_VL:
14320 case RISCVISD::MUL_VL:
14323 case RISCVISD::FADD_VL:
14324 case RISCVISD::FMUL_VL:
14326 return true;
14327 case ISD::SUB:
14328 case RISCVISD::SUB_VL:
14331 case RISCVISD::FSUB_VL:
14333 case ISD::SHL:
14334 case RISCVISD::SHL_VL:
14335 return false;
14336 default:
14337 llvm_unreachable("Unexpected opcode");
14338 }
14339 }
14340
14341 /// Get a list of combine to try for folding extensions in \p Root.
14342 /// Note that each returned CombineToTry function doesn't actually modify
14343 /// anything. Instead they produce an optional CombineResult that if not None,
14344 /// need to be materialized for the combine to be applied.
14345 /// \see CombineResult::materialize.
14346 /// If the related CombineToTry function returns std::nullopt, that means the
14347 /// combine didn't match.
14348 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
14349};
14350
14351/// Helper structure that holds all the necessary information to materialize a
14352/// combine that does some extension folding.
14353struct CombineResult {
14354 /// Opcode to be generated when materializing the combine.
14355 unsigned TargetOpcode;
14356 // No value means no extension is needed.
14357 std::optional<ExtKind> LHSExt;
14358 std::optional<ExtKind> RHSExt;
14359 /// Root of the combine.
14360 SDNode *Root;
14361 /// LHS of the TargetOpcode.
14362 NodeExtensionHelper LHS;
14363 /// RHS of the TargetOpcode.
14364 NodeExtensionHelper RHS;
14365
14366 CombineResult(unsigned TargetOpcode, SDNode *Root,
14367 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14368 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14369 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14370 LHS(LHS), RHS(RHS) {}
14371
14372 /// Return a value that uses TargetOpcode and that can be used to replace
14373 /// Root.
14374 /// The actual replacement is *not* done in that method.
14375 SDValue materialize(SelectionDAG &DAG,
14376 const RISCVSubtarget &Subtarget) const {
14377 SDValue Mask, VL, Merge;
14378 std::tie(Mask, VL) =
14379 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14380 switch (Root->getOpcode()) {
14381 default:
14382 Merge = Root->getOperand(2);
14383 break;
14384 case ISD::ADD:
14385 case ISD::SUB:
14386 case ISD::MUL:
14387 case ISD::OR:
14388 case ISD::SHL:
14389 Merge = DAG.getUNDEF(Root->getValueType(0));
14390 break;
14391 }
14392 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14393 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14394 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14395 Merge, Mask, VL);
14396 }
14397};
14398
14399/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14400/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14401/// are zext) and LHS and RHS can be folded into Root.
14402/// AllowExtMask define which form `ext` can take in this pattern.
14403///
14404/// \note If the pattern can match with both zext and sext, the returned
14405/// CombineResult will feature the zext result.
14406///
14407/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14408/// can be used to apply the pattern.
14409static std::optional<CombineResult>
14410canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14411 const NodeExtensionHelper &RHS,
14412 uint8_t AllowExtMask, SelectionDAG &DAG,
14413 const RISCVSubtarget &Subtarget) {
14414 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14415 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14416 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14417 /*RHSExt=*/{ExtKind::ZExt});
14418 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14419 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14420 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14421 /*RHSExt=*/{ExtKind::SExt});
14422 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
14423 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14424 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14425 /*RHSExt=*/{ExtKind::FPExt});
14426 return std::nullopt;
14427}
14428
14429/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14430/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14431/// are zext) and LHS and RHS can be folded into Root.
14432///
14433/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14434/// can be used to apply the pattern.
14435static std::optional<CombineResult>
14436canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14437 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14438 const RISCVSubtarget &Subtarget) {
14439 return canFoldToVWWithSameExtensionImpl(
14440 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14441 Subtarget);
14442}
14443
14444/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14445///
14446/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14447/// can be used to apply the pattern.
14448static std::optional<CombineResult>
14449canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14450 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14451 const RISCVSubtarget &Subtarget) {
14452 if (RHS.SupportsFPExt)
14453 return CombineResult(
14454 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14455 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14456
14457 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14458 // sext/zext?
14459 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14460 // purposes.
14461 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14462 return CombineResult(
14463 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14464 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14465 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14466 return CombineResult(
14467 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14468 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14469 return std::nullopt;
14470}
14471
14472/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14473///
14474/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14475/// can be used to apply the pattern.
14476static std::optional<CombineResult>
14477canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14478 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14479 const RISCVSubtarget &Subtarget) {
14480 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14481 Subtarget);
14482}
14483
14484/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14485///
14486/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14487/// can be used to apply the pattern.
14488static std::optional<CombineResult>
14489canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14490 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14491 const RISCVSubtarget &Subtarget) {
14492 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14493 Subtarget);
14494}
14495
14496/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14497///
14498/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14499/// can be used to apply the pattern.
14500static std::optional<CombineResult>
14501canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14502 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14503 const RISCVSubtarget &Subtarget) {
14504 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14505 Subtarget);
14506}
14507
14508/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14509///
14510/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14511/// can be used to apply the pattern.
14512static std::optional<CombineResult>
14513canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14514 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14515 const RISCVSubtarget &Subtarget) {
14516
14517 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14518 return std::nullopt;
14519 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14520 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14521 /*RHSExt=*/{ExtKind::ZExt});
14522}
14523
14525NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14526 SmallVector<CombineToTry> Strategies;
14527 switch (Root->getOpcode()) {
14528 case ISD::ADD:
14529 case ISD::SUB:
14530 case ISD::OR:
14531 case RISCVISD::ADD_VL:
14532 case RISCVISD::SUB_VL:
14533 case RISCVISD::FADD_VL:
14534 case RISCVISD::FSUB_VL:
14535 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14536 Strategies.push_back(canFoldToVWWithSameExtension);
14537 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14538 Strategies.push_back(canFoldToVW_W);
14539 break;
14540 case RISCVISD::FMUL_VL:
14541 Strategies.push_back(canFoldToVWWithSameExtension);
14542 break;
14543 case ISD::MUL:
14544 case RISCVISD::MUL_VL:
14545 // mul -> vwmul(u)
14546 Strategies.push_back(canFoldToVWWithSameExtension);
14547 // mul -> vwmulsu
14548 Strategies.push_back(canFoldToVW_SU);
14549 break;
14550 case ISD::SHL:
14551 case RISCVISD::SHL_VL:
14552 // shl -> vwsll
14553 Strategies.push_back(canFoldToVWWithZEXT);
14554 break;
14557 // vwadd_w|vwsub_w -> vwadd|vwsub
14558 Strategies.push_back(canFoldToVWWithSEXT);
14559 break;
14562 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14563 Strategies.push_back(canFoldToVWWithZEXT);
14564 break;
14567 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14568 Strategies.push_back(canFoldToVWWithFPEXT);
14569 break;
14570 default:
14571 llvm_unreachable("Unexpected opcode");
14572 }
14573 return Strategies;
14574}
14575} // End anonymous namespace.
14576
14577/// Combine a binary operation to its equivalent VW or VW_W form.
14578/// The supported combines are:
14579/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14580/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14581/// mul | mul_vl -> vwmul(u) | vwmul_su
14582/// shl | shl_vl -> vwsll
14583/// fadd_vl -> vfwadd | vfwadd_w
14584/// fsub_vl -> vfwsub | vfwsub_w
14585/// fmul_vl -> vfwmul
14586/// vwadd_w(u) -> vwadd(u)
14587/// vwsub_w(u) -> vwsub(u)
14588/// vfwadd_w -> vfwadd
14589/// vfwsub_w -> vfwsub
14592 const RISCVSubtarget &Subtarget) {
14593 SelectionDAG &DAG = DCI.DAG;
14594 if (DCI.isBeforeLegalize())
14595 return SDValue();
14596
14597 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
14598 return SDValue();
14599
14600 SmallVector<SDNode *> Worklist;
14601 SmallSet<SDNode *, 8> Inserted;
14602 Worklist.push_back(N);
14603 Inserted.insert(N);
14604 SmallVector<CombineResult> CombinesToApply;
14605
14606 while (!Worklist.empty()) {
14607 SDNode *Root = Worklist.pop_back_val();
14608 if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
14609 return SDValue();
14610
14611 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14612 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14613 auto AppendUsersIfNeeded = [&Worklist,
14614 &Inserted](const NodeExtensionHelper &Op) {
14615 if (Op.needToPromoteOtherUsers()) {
14616 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14617 if (Inserted.insert(TheUse).second)
14618 Worklist.push_back(TheUse);
14619 }
14620 }
14621 };
14622
14623 // Control the compile time by limiting the number of node we look at in
14624 // total.
14625 if (Inserted.size() > ExtensionMaxWebSize)
14626 return SDValue();
14627
14629 NodeExtensionHelper::getSupportedFoldings(N);
14630
14631 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14632 bool Matched = false;
14633 for (int Attempt = 0;
14634 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14635 ++Attempt) {
14636
14637 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14638 FoldingStrategies) {
14639 std::optional<CombineResult> Res =
14640 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14641 if (Res) {
14642 Matched = true;
14643 CombinesToApply.push_back(*Res);
14644 // All the inputs that are extended need to be folded, otherwise
14645 // we would be leaving the old input (since it is may still be used),
14646 // and the new one.
14647 if (Res->LHSExt.has_value())
14648 AppendUsersIfNeeded(LHS);
14649 if (Res->RHSExt.has_value())
14650 AppendUsersIfNeeded(RHS);
14651 break;
14652 }
14653 }
14654 std::swap(LHS, RHS);
14655 }
14656 // Right now we do an all or nothing approach.
14657 if (!Matched)
14658 return SDValue();
14659 }
14660 // Store the value for the replacement of the input node separately.
14661 SDValue InputRootReplacement;
14662 // We do the RAUW after we materialize all the combines, because some replaced
14663 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14664 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14665 // yet-to-be-visited CombinesToApply roots.
14667 ValuesToReplace.reserve(CombinesToApply.size());
14668 for (CombineResult Res : CombinesToApply) {
14669 SDValue NewValue = Res.materialize(DAG, Subtarget);
14670 if (!InputRootReplacement) {
14671 assert(Res.Root == N &&
14672 "First element is expected to be the current node");
14673 InputRootReplacement = NewValue;
14674 } else {
14675 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14676 }
14677 }
14678 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14679 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14680 DCI.AddToWorklist(OldNewValues.second.getNode());
14681 }
14682 return InputRootReplacement;
14683}
14684
14685// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14686// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14687// y will be the Passthru and cond will be the Mask.
14689 unsigned Opc = N->getOpcode();
14692
14693 SDValue Y = N->getOperand(0);
14694 SDValue MergeOp = N->getOperand(1);
14695 unsigned MergeOpc = MergeOp.getOpcode();
14696
14697 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14698 return SDValue();
14699
14700 SDValue X = MergeOp->getOperand(1);
14701
14702 if (!MergeOp.hasOneUse())
14703 return SDValue();
14704
14705 // Passthru should be undef
14706 SDValue Passthru = N->getOperand(2);
14707 if (!Passthru.isUndef())
14708 return SDValue();
14709
14710 // Mask should be all ones
14711 SDValue Mask = N->getOperand(3);
14712 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14713 return SDValue();
14714
14715 // False value of MergeOp should be all zeros
14716 SDValue Z = MergeOp->getOperand(2);
14717
14718 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14719 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14720 Z = Z.getOperand(1);
14721
14722 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14723 return SDValue();
14724
14725 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14726 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14727 N->getFlags());
14728}
14729
14732 const RISCVSubtarget &Subtarget) {
14733 [[maybe_unused]] unsigned Opc = N->getOpcode();
14736
14737 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14738 return V;
14739
14740 return combineVWADDSUBWSelect(N, DCI.DAG);
14741}
14742
14743// Helper function for performMemPairCombine.
14744// Try to combine the memory loads/stores LSNode1 and LSNode2
14745// into a single memory pair operation.
14747 LSBaseSDNode *LSNode2, SDValue BasePtr,
14748 uint64_t Imm) {
14750 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14751
14752 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14753 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14754 return SDValue();
14755
14757 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14758
14759 // The new operation has twice the width.
14760 MVT XLenVT = Subtarget.getXLenVT();
14761 EVT MemVT = LSNode1->getMemoryVT();
14762 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14763 MachineMemOperand *MMO = LSNode1->getMemOperand();
14765 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14766
14767 if (LSNode1->getOpcode() == ISD::LOAD) {
14768 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14769 unsigned Opcode;
14770 if (MemVT == MVT::i32)
14771 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14772 else
14773 Opcode = RISCVISD::TH_LDD;
14774
14775 SDValue Res = DAG.getMemIntrinsicNode(
14776 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14777 {LSNode1->getChain(), BasePtr,
14778 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14779 NewMemVT, NewMMO);
14780
14781 SDValue Node1 =
14782 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14783 SDValue Node2 =
14784 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14785
14786 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14787 return Node1;
14788 } else {
14789 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14790
14791 SDValue Res = DAG.getMemIntrinsicNode(
14792 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14793 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14794 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14795 NewMemVT, NewMMO);
14796
14797 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14798 return Res;
14799 }
14800}
14801
14802// Try to combine two adjacent loads/stores to a single pair instruction from
14803// the XTHeadMemPair vendor extension.
14806 SelectionDAG &DAG = DCI.DAG;
14808 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14809
14810 // Target does not support load/store pair.
14811 if (!Subtarget.hasVendorXTHeadMemPair())
14812 return SDValue();
14813
14814 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14815 EVT MemVT = LSNode1->getMemoryVT();
14816 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14817
14818 // No volatile, indexed or atomic loads/stores.
14819 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14820 return SDValue();
14821
14822 // Function to get a base + constant representation from a memory value.
14823 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14824 if (Ptr->getOpcode() == ISD::ADD)
14825 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14826 return {Ptr->getOperand(0), C1->getZExtValue()};
14827 return {Ptr, 0};
14828 };
14829
14830 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14831
14832 SDValue Chain = N->getOperand(0);
14833 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14834 UI != UE; ++UI) {
14835 SDUse &Use = UI.getUse();
14836 if (Use.getUser() != N && Use.getResNo() == 0 &&
14837 Use.getUser()->getOpcode() == N->getOpcode()) {
14838 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14839
14840 // No volatile, indexed or atomic loads/stores.
14841 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14842 continue;
14843
14844 // Check if LSNode1 and LSNode2 have the same type and extension.
14845 if (LSNode1->getOpcode() == ISD::LOAD)
14846 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14847 cast<LoadSDNode>(LSNode1)->getExtensionType())
14848 continue;
14849
14850 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14851 continue;
14852
14853 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14854
14855 // Check if the base pointer is the same for both instruction.
14856 if (Base1 != Base2)
14857 continue;
14858
14859 // Check if the offsets match the XTHeadMemPair encoding contraints.
14860 bool Valid = false;
14861 if (MemVT == MVT::i32) {
14862 // Check for adjacent i32 values and a 2-bit index.
14863 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14864 Valid = true;
14865 } else if (MemVT == MVT::i64) {
14866 // Check for adjacent i64 values and a 2-bit index.
14867 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14868 Valid = true;
14869 }
14870
14871 if (!Valid)
14872 continue;
14873
14874 // Try to combine.
14875 if (SDValue Res =
14876 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14877 return Res;
14878 }
14879 }
14880
14881 return SDValue();
14882}
14883
14884// Fold
14885// (fp_to_int (froundeven X)) -> fcvt X, rne
14886// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14887// (fp_to_int (ffloor X)) -> fcvt X, rdn
14888// (fp_to_int (fceil X)) -> fcvt X, rup
14889// (fp_to_int (fround X)) -> fcvt X, rmm
14890// (fp_to_int (frint X)) -> fcvt X
14893 const RISCVSubtarget &Subtarget) {
14894 SelectionDAG &DAG = DCI.DAG;
14895 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14896 MVT XLenVT = Subtarget.getXLenVT();
14897
14898 SDValue Src = N->getOperand(0);
14899
14900 // Don't do this for strict-fp Src.
14901 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14902 return SDValue();
14903
14904 // Ensure the FP type is legal.
14905 if (!TLI.isTypeLegal(Src.getValueType()))
14906 return SDValue();
14907
14908 // Don't do this for f16 with Zfhmin and not Zfh.
14909 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14910 return SDValue();
14911
14912 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14913 // If the result is invalid, we didn't find a foldable instruction.
14914 if (FRM == RISCVFPRndMode::Invalid)
14915 return SDValue();
14916
14917 SDLoc DL(N);
14918 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14919 EVT VT = N->getValueType(0);
14920
14921 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14922 MVT SrcVT = Src.getSimpleValueType();
14923 MVT SrcContainerVT = SrcVT;
14924 MVT ContainerVT = VT.getSimpleVT();
14925 SDValue XVal = Src.getOperand(0);
14926
14927 // For widening and narrowing conversions we just combine it into a
14928 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14929 // end up getting lowered to their appropriate pseudo instructions based on
14930 // their operand types
14931 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14932 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14933 return SDValue();
14934
14935 // Make fixed-length vectors scalable first
14936 if (SrcVT.isFixedLengthVector()) {
14937 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14938 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14939 ContainerVT =
14940 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14941 }
14942
14943 auto [Mask, VL] =
14944 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14945
14946 SDValue FpToInt;
14947 if (FRM == RISCVFPRndMode::RTZ) {
14948 // Use the dedicated trunc static rounding mode if we're truncating so we
14949 // don't need to generate calls to fsrmi/fsrm
14950 unsigned Opc =
14952 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14953 } else if (FRM == RISCVFPRndMode::DYN) {
14954 unsigned Opc =
14956 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14957 } else {
14958 unsigned Opc =
14960 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14961 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14962 }
14963
14964 // If converted from fixed-length to scalable, convert back
14965 if (VT.isFixedLengthVector())
14966 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14967
14968 return FpToInt;
14969 }
14970
14971 // Only handle XLen or i32 types. Other types narrower than XLen will
14972 // eventually be legalized to XLenVT.
14973 if (VT != MVT::i32 && VT != XLenVT)
14974 return SDValue();
14975
14976 unsigned Opc;
14977 if (VT == XLenVT)
14978 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14979 else
14981
14982 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14983 DAG.getTargetConstant(FRM, DL, XLenVT));
14984 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14985}
14986
14987// Fold
14988// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14989// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14990// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14991// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14992// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14993// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14996 const RISCVSubtarget &Subtarget) {
14997 SelectionDAG &DAG = DCI.DAG;
14998 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14999 MVT XLenVT = Subtarget.getXLenVT();
15000
15001 // Only handle XLen types. Other types narrower than XLen will eventually be
15002 // legalized to XLenVT.
15003 EVT DstVT = N->getValueType(0);
15004 if (DstVT != XLenVT)
15005 return SDValue();
15006
15007 SDValue Src = N->getOperand(0);
15008
15009 // Don't do this for strict-fp Src.
15010 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
15011 return SDValue();
15012
15013 // Ensure the FP type is also legal.
15014 if (!TLI.isTypeLegal(Src.getValueType()))
15015 return SDValue();
15016
15017 // Don't do this for f16 with Zfhmin and not Zfh.
15018 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
15019 return SDValue();
15020
15021 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15022
15023 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
15024 if (FRM == RISCVFPRndMode::Invalid)
15025 return SDValue();
15026
15027 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
15028
15029 unsigned Opc;
15030 if (SatVT == DstVT)
15031 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
15032 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
15034 else
15035 return SDValue();
15036 // FIXME: Support other SatVTs by clamping before or after the conversion.
15037
15038 Src = Src.getOperand(0);
15039
15040 SDLoc DL(N);
15041 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
15042 DAG.getTargetConstant(FRM, DL, XLenVT));
15043
15044 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
15045 // extend.
15046 if (Opc == RISCVISD::FCVT_WU_RV64)
15047 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
15048
15049 // RISC-V FP-to-int conversions saturate to the destination register size, but
15050 // don't produce 0 for nan.
15051 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
15052 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
15053}
15054
15055// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
15056// smaller than XLenVT.
15058 const RISCVSubtarget &Subtarget) {
15059 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
15060
15061 SDValue Src = N->getOperand(0);
15062 if (Src.getOpcode() != ISD::BSWAP)
15063 return SDValue();
15064
15065 EVT VT = N->getValueType(0);
15066 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
15067 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
15068 return SDValue();
15069
15070 SDLoc DL(N);
15071 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
15072}
15073
15074// Convert from one FMA opcode to another based on whether we are negating the
15075// multiply result and/or the accumulator.
15076// NOTE: Only supports RVV operations with VL.
15077static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
15078 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
15079 if (NegMul) {
15080 // clang-format off
15081 switch (Opcode) {
15082 default: llvm_unreachable("Unexpected opcode");
15083 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15084 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15085 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15086 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15091 }
15092 // clang-format on
15093 }
15094
15095 // Negating the accumulator changes ADD<->SUB.
15096 if (NegAcc) {
15097 // clang-format off
15098 switch (Opcode) {
15099 default: llvm_unreachable("Unexpected opcode");
15100 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
15101 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
15102 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
15103 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
15108 }
15109 // clang-format on
15110 }
15111
15112 return Opcode;
15113}
15114
15116 // Fold FNEG_VL into FMA opcodes.
15117 // The first operand of strict-fp is chain.
15118 unsigned Offset = N->isTargetStrictFPOpcode();
15119 SDValue A = N->getOperand(0 + Offset);
15120 SDValue B = N->getOperand(1 + Offset);
15121 SDValue C = N->getOperand(2 + Offset);
15122 SDValue Mask = N->getOperand(3 + Offset);
15123 SDValue VL = N->getOperand(4 + Offset);
15124
15125 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
15126 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
15127 V.getOperand(2) == VL) {
15128 // Return the negated input.
15129 V = V.getOperand(0);
15130 return true;
15131 }
15132
15133 return false;
15134 };
15135
15136 bool NegA = invertIfNegative(A);
15137 bool NegB = invertIfNegative(B);
15138 bool NegC = invertIfNegative(C);
15139
15140 // If no operands are negated, we're done.
15141 if (!NegA && !NegB && !NegC)
15142 return SDValue();
15143
15144 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
15145 if (N->isTargetStrictFPOpcode())
15146 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
15147 {N->getOperand(0), A, B, C, Mask, VL});
15148 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
15149 VL);
15150}
15151
15153 const RISCVSubtarget &Subtarget) {
15155 return V;
15156
15157 if (N->getValueType(0).isScalableVector() &&
15158 N->getValueType(0).getVectorElementType() == MVT::f32 &&
15159 (Subtarget.hasVInstructionsF16Minimal() &&
15160 !Subtarget.hasVInstructionsF16())) {
15161 return SDValue();
15162 }
15163
15164 // FIXME: Ignore strict opcodes for now.
15165 if (N->isTargetStrictFPOpcode())
15166 return SDValue();
15167
15168 // Try to form widening FMA.
15169 SDValue Op0 = N->getOperand(0);
15170 SDValue Op1 = N->getOperand(1);
15171 SDValue Mask = N->getOperand(3);
15172 SDValue VL = N->getOperand(4);
15173
15174 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
15176 return SDValue();
15177
15178 // TODO: Refactor to handle more complex cases similar to
15179 // combineBinOp_VLToVWBinOp_VL.
15180 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
15181 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
15182 return SDValue();
15183
15184 // Check the mask and VL are the same.
15185 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
15186 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
15187 return SDValue();
15188
15189 unsigned NewOpc;
15190 switch (N->getOpcode()) {
15191 default:
15192 llvm_unreachable("Unexpected opcode");
15194 NewOpc = RISCVISD::VFWMADD_VL;
15195 break;
15197 NewOpc = RISCVISD::VFWNMSUB_VL;
15198 break;
15200 NewOpc = RISCVISD::VFWNMADD_VL;
15201 break;
15203 NewOpc = RISCVISD::VFWMSUB_VL;
15204 break;
15205 }
15206
15207 Op0 = Op0.getOperand(0);
15208 Op1 = Op1.getOperand(0);
15209
15210 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
15211 N->getOperand(2), Mask, VL);
15212}
15213
15215 const RISCVSubtarget &Subtarget) {
15216 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
15217
15218 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
15219 return SDValue();
15220
15221 if (!isa<ConstantSDNode>(N->getOperand(1)))
15222 return SDValue();
15223 uint64_t ShAmt = N->getConstantOperandVal(1);
15224 if (ShAmt > 32)
15225 return SDValue();
15226
15227 SDValue N0 = N->getOperand(0);
15228
15229 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
15230 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
15231 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
15232 if (ShAmt < 32 &&
15233 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
15234 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
15235 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
15236 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
15237 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
15238 if (LShAmt < 32) {
15239 SDLoc ShlDL(N0.getOperand(0));
15240 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
15241 N0.getOperand(0).getOperand(0),
15242 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
15243 SDLoc DL(N);
15244 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
15245 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
15246 }
15247 }
15248
15249 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
15250 // FIXME: Should this be a generic combine? There's a similar combine on X86.
15251 //
15252 // Also try these folds where an add or sub is in the middle.
15253 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
15254 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
15255 SDValue Shl;
15256 ConstantSDNode *AddC = nullptr;
15257
15258 // We might have an ADD or SUB between the SRA and SHL.
15259 bool IsAdd = N0.getOpcode() == ISD::ADD;
15260 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
15261 // Other operand needs to be a constant we can modify.
15262 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
15263 if (!AddC)
15264 return SDValue();
15265
15266 // AddC needs to have at least 32 trailing zeros.
15267 if (AddC->getAPIntValue().countr_zero() < 32)
15268 return SDValue();
15269
15270 // All users should be a shift by constant less than or equal to 32. This
15271 // ensures we'll do this optimization for each of them to produce an
15272 // add/sub+sext_inreg they can all share.
15273 for (SDNode *U : N0->uses()) {
15274 if (U->getOpcode() != ISD::SRA ||
15275 !isa<ConstantSDNode>(U->getOperand(1)) ||
15276 U->getConstantOperandVal(1) > 32)
15277 return SDValue();
15278 }
15279
15280 Shl = N0.getOperand(IsAdd ? 0 : 1);
15281 } else {
15282 // Not an ADD or SUB.
15283 Shl = N0;
15284 }
15285
15286 // Look for a shift left by 32.
15287 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
15288 Shl.getConstantOperandVal(1) != 32)
15289 return SDValue();
15290
15291 // We if we didn't look through an add/sub, then the shl should have one use.
15292 // If we did look through an add/sub, the sext_inreg we create is free so
15293 // we're only creating 2 new instructions. It's enough to only remove the
15294 // original sra+add/sub.
15295 if (!AddC && !Shl.hasOneUse())
15296 return SDValue();
15297
15298 SDLoc DL(N);
15299 SDValue In = Shl.getOperand(0);
15300
15301 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
15302 // constant.
15303 if (AddC) {
15304 SDValue ShiftedAddC =
15305 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
15306 if (IsAdd)
15307 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
15308 else
15309 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
15310 }
15311
15312 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
15313 DAG.getValueType(MVT::i32));
15314 if (ShAmt == 32)
15315 return SExt;
15316
15317 return DAG.getNode(
15318 ISD::SHL, DL, MVT::i64, SExt,
15319 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
15320}
15321
15322// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
15323// the result is used as the conditon of a br_cc or select_cc we can invert,
15324// inverting the setcc is free, and Z is 0/1. Caller will invert the
15325// br_cc/select_cc.
15327 bool IsAnd = Cond.getOpcode() == ISD::AND;
15328 if (!IsAnd && Cond.getOpcode() != ISD::OR)
15329 return SDValue();
15330
15331 if (!Cond.hasOneUse())
15332 return SDValue();
15333
15334 SDValue Setcc = Cond.getOperand(0);
15335 SDValue Xor = Cond.getOperand(1);
15336 // Canonicalize setcc to LHS.
15337 if (Setcc.getOpcode() != ISD::SETCC)
15338 std::swap(Setcc, Xor);
15339 // LHS should be a setcc and RHS should be an xor.
15340 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
15341 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
15342 return SDValue();
15343
15344 // If the condition is an And, SimplifyDemandedBits may have changed
15345 // (xor Z, 1) to (not Z).
15346 SDValue Xor1 = Xor.getOperand(1);
15347 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
15348 return SDValue();
15349
15350 EVT VT = Cond.getValueType();
15351 SDValue Xor0 = Xor.getOperand(0);
15352
15353 // The LHS of the xor needs to be 0/1.
15355 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15356 return SDValue();
15357
15358 // We can only invert integer setccs.
15359 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15360 if (!SetCCOpVT.isScalarInteger())
15361 return SDValue();
15362
15363 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15364 if (ISD::isIntEqualitySetCC(CCVal)) {
15365 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15366 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15367 Setcc.getOperand(1), CCVal);
15368 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15369 // Invert (setlt 0, X) by converting to (setlt X, 1).
15370 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15371 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15372 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15373 // (setlt X, 1) by converting to (setlt 0, X).
15374 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15375 DAG.getConstant(0, SDLoc(Setcc), VT),
15376 Setcc.getOperand(0), CCVal);
15377 } else
15378 return SDValue();
15379
15380 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15381 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15382}
15383
15384// Perform common combines for BR_CC and SELECT_CC condtions.
15385static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15386 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15387 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15388
15389 // As far as arithmetic right shift always saves the sign,
15390 // shift can be omitted.
15391 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15392 // setge (sra X, N), 0 -> setge X, 0
15393 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15394 LHS.getOpcode() == ISD::SRA) {
15395 LHS = LHS.getOperand(0);
15396 return true;
15397 }
15398
15399 if (!ISD::isIntEqualitySetCC(CCVal))
15400 return false;
15401
15402 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15403 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15404 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15405 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15406 // If we're looking for eq 0 instead of ne 0, we need to invert the
15407 // condition.
15408 bool Invert = CCVal == ISD::SETEQ;
15409 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15410 if (Invert)
15411 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15412
15413 RHS = LHS.getOperand(1);
15414 LHS = LHS.getOperand(0);
15415 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15416
15417 CC = DAG.getCondCode(CCVal);
15418 return true;
15419 }
15420
15421 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15422 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15423 RHS = LHS.getOperand(1);
15424 LHS = LHS.getOperand(0);
15425 return true;
15426 }
15427
15428 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15429 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15430 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15431 SDValue LHS0 = LHS.getOperand(0);
15432 if (LHS0.getOpcode() == ISD::AND &&
15433 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15434 uint64_t Mask = LHS0.getConstantOperandVal(1);
15435 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15436 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15437 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15438 CC = DAG.getCondCode(CCVal);
15439
15440 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15441 LHS = LHS0.getOperand(0);
15442 if (ShAmt != 0)
15443 LHS =
15444 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15445 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15446 return true;
15447 }
15448 }
15449 }
15450
15451 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15452 // This can occur when legalizing some floating point comparisons.
15453 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15454 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15455 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15456 CC = DAG.getCondCode(CCVal);
15457 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15458 return true;
15459 }
15460
15461 if (isNullConstant(RHS)) {
15462 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15463 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15464 CC = DAG.getCondCode(CCVal);
15465 LHS = NewCond;
15466 return true;
15467 }
15468 }
15469
15470 return false;
15471}
15472
15473// Fold
15474// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15475// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15476// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15477// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15479 SDValue TrueVal, SDValue FalseVal,
15480 bool Swapped) {
15481 bool Commutative = true;
15482 unsigned Opc = TrueVal.getOpcode();
15483 switch (Opc) {
15484 default:
15485 return SDValue();
15486 case ISD::SHL:
15487 case ISD::SRA:
15488 case ISD::SRL:
15489 case ISD::SUB:
15490 Commutative = false;
15491 break;
15492 case ISD::ADD:
15493 case ISD::OR:
15494 case ISD::XOR:
15495 break;
15496 }
15497
15498 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15499 return SDValue();
15500
15501 unsigned OpToFold;
15502 if (FalseVal == TrueVal.getOperand(0))
15503 OpToFold = 0;
15504 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15505 OpToFold = 1;
15506 else
15507 return SDValue();
15508
15509 EVT VT = N->getValueType(0);
15510 SDLoc DL(N);
15511 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15512 EVT OtherOpVT = OtherOp.getValueType();
15513 SDValue IdentityOperand =
15514 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15515 if (!Commutative)
15516 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15517 assert(IdentityOperand && "No identity operand!");
15518
15519 if (Swapped)
15520 std::swap(OtherOp, IdentityOperand);
15521 SDValue NewSel =
15522 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15523 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15524}
15525
15526// This tries to get rid of `select` and `icmp` that are being used to handle
15527// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15529 SDValue Cond = N->getOperand(0);
15530
15531 // This represents either CTTZ or CTLZ instruction.
15532 SDValue CountZeroes;
15533
15534 SDValue ValOnZero;
15535
15536 if (Cond.getOpcode() != ISD::SETCC)
15537 return SDValue();
15538
15539 if (!isNullConstant(Cond->getOperand(1)))
15540 return SDValue();
15541
15542 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15543 if (CCVal == ISD::CondCode::SETEQ) {
15544 CountZeroes = N->getOperand(2);
15545 ValOnZero = N->getOperand(1);
15546 } else if (CCVal == ISD::CondCode::SETNE) {
15547 CountZeroes = N->getOperand(1);
15548 ValOnZero = N->getOperand(2);
15549 } else {
15550 return SDValue();
15551 }
15552
15553 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15554 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15555 CountZeroes = CountZeroes.getOperand(0);
15556
15557 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15558 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15559 CountZeroes.getOpcode() != ISD::CTLZ &&
15560 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15561 return SDValue();
15562
15563 if (!isNullConstant(ValOnZero))
15564 return SDValue();
15565
15566 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15567 if (Cond->getOperand(0) != CountZeroesArgument)
15568 return SDValue();
15569
15570 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15571 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15572 CountZeroes.getValueType(), CountZeroesArgument);
15573 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15574 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15575 CountZeroes.getValueType(), CountZeroesArgument);
15576 }
15577
15578 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15579 SDValue BitWidthMinusOne =
15580 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15581
15582 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15583 CountZeroes, BitWidthMinusOne);
15584 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15585}
15586
15588 const RISCVSubtarget &Subtarget) {
15589 SDValue Cond = N->getOperand(0);
15590 SDValue True = N->getOperand(1);
15591 SDValue False = N->getOperand(2);
15592 SDLoc DL(N);
15593 EVT VT = N->getValueType(0);
15594 EVT CondVT = Cond.getValueType();
15595
15596 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15597 return SDValue();
15598
15599 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15600 // BEXTI, where C is power of 2.
15601 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15602 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15603 SDValue LHS = Cond.getOperand(0);
15604 SDValue RHS = Cond.getOperand(1);
15605 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15606 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15607 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15608 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15609 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15610 return DAG.getSelect(DL, VT,
15611 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15612 False, True);
15613 }
15614 }
15615 return SDValue();
15616}
15617
15619 const RISCVSubtarget &Subtarget) {
15620 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15621 return Folded;
15622
15623 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15624 return V;
15625
15626 if (Subtarget.hasConditionalMoveFusion())
15627 return SDValue();
15628
15629 SDValue TrueVal = N->getOperand(1);
15630 SDValue FalseVal = N->getOperand(2);
15631 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15632 return V;
15633 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15634}
15635
15636/// If we have a build_vector where each lane is binop X, C, where C
15637/// is a constant (but not necessarily the same constant on all lanes),
15638/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15639/// We assume that materializing a constant build vector will be no more
15640/// expensive that performing O(n) binops.
15642 const RISCVSubtarget &Subtarget,
15643 const RISCVTargetLowering &TLI) {
15644 SDLoc DL(N);
15645 EVT VT = N->getValueType(0);
15646
15647 assert(!VT.isScalableVector() && "unexpected build vector");
15648
15649 if (VT.getVectorNumElements() == 1)
15650 return SDValue();
15651
15652 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15653 if (!TLI.isBinOp(Opcode))
15654 return SDValue();
15655
15656 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15657 return SDValue();
15658
15659 // This BUILD_VECTOR involves an implicit truncation, and sinking
15660 // truncates through binops is non-trivial.
15661 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15662 return SDValue();
15663
15664 SmallVector<SDValue> LHSOps;
15665 SmallVector<SDValue> RHSOps;
15666 for (SDValue Op : N->ops()) {
15667 if (Op.isUndef()) {
15668 // We can't form a divide or remainder from undef.
15669 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15670 return SDValue();
15671
15672 LHSOps.push_back(Op);
15673 RHSOps.push_back(Op);
15674 continue;
15675 }
15676
15677 // TODO: We can handle operations which have an neutral rhs value
15678 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15679 // of profit in a more explicit manner.
15680 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15681 return SDValue();
15682
15683 LHSOps.push_back(Op.getOperand(0));
15684 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15685 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15686 return SDValue();
15687 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15688 // have different LHS and RHS types.
15689 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15690 return SDValue();
15691
15692 RHSOps.push_back(Op.getOperand(1));
15693 }
15694
15695 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15696 DAG.getBuildVector(VT, DL, RHSOps));
15697}
15698
15700 const RISCVSubtarget &Subtarget,
15701 const RISCVTargetLowering &TLI) {
15702 SDValue InVec = N->getOperand(0);
15703 SDValue InVal = N->getOperand(1);
15704 SDValue EltNo = N->getOperand(2);
15705 SDLoc DL(N);
15706
15707 EVT VT = InVec.getValueType();
15708 if (VT.isScalableVector())
15709 return SDValue();
15710
15711 if (!InVec.hasOneUse())
15712 return SDValue();
15713
15714 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15715 // move the insert_vector_elts into the arms of the binop. Note that
15716 // the new RHS must be a constant.
15717 const unsigned InVecOpcode = InVec->getOpcode();
15718 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15719 InVal.hasOneUse()) {
15720 SDValue InVecLHS = InVec->getOperand(0);
15721 SDValue InVecRHS = InVec->getOperand(1);
15722 SDValue InValLHS = InVal->getOperand(0);
15723 SDValue InValRHS = InVal->getOperand(1);
15724
15726 return SDValue();
15727 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15728 return SDValue();
15729 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15730 // have different LHS and RHS types.
15731 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15732 return SDValue();
15734 InVecLHS, InValLHS, EltNo);
15736 InVecRHS, InValRHS, EltNo);
15737 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15738 }
15739
15740 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15741 // move the insert_vector_elt to the source operand of the concat_vector.
15742 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15743 return SDValue();
15744
15745 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15746 if (!IndexC)
15747 return SDValue();
15748 unsigned Elt = IndexC->getZExtValue();
15749
15750 EVT ConcatVT = InVec.getOperand(0).getValueType();
15751 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15752 return SDValue();
15753 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15754 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15755
15756 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15757 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15758 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15759 ConcatOp, InVal, NewIdx);
15760
15761 SmallVector<SDValue> ConcatOps;
15762 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15763 ConcatOps[ConcatOpIdx] = ConcatOp;
15764 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15765}
15766
15767// If we're concatenating a series of vector loads like
15768// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15769// Then we can turn this into a strided load by widening the vector elements
15770// vlse32 p, stride=n
15772 const RISCVSubtarget &Subtarget,
15773 const RISCVTargetLowering &TLI) {
15774 SDLoc DL(N);
15775 EVT VT = N->getValueType(0);
15776
15777 // Only perform this combine on legal MVTs.
15778 if (!TLI.isTypeLegal(VT))
15779 return SDValue();
15780
15781 // TODO: Potentially extend this to scalable vectors
15782 if (VT.isScalableVector())
15783 return SDValue();
15784
15785 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15786 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15787 !SDValue(BaseLd, 0).hasOneUse())
15788 return SDValue();
15789
15790 EVT BaseLdVT = BaseLd->getValueType(0);
15791
15792 // Go through the loads and check that they're strided
15794 Lds.push_back(BaseLd);
15795 Align Align = BaseLd->getAlign();
15796 for (SDValue Op : N->ops().drop_front()) {
15797 auto *Ld = dyn_cast<LoadSDNode>(Op);
15798 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15799 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15800 Ld->getValueType(0) != BaseLdVT)
15801 return SDValue();
15802
15803 Lds.push_back(Ld);
15804
15805 // The common alignment is the most restrictive (smallest) of all the loads
15806 Align = std::min(Align, Ld->getAlign());
15807 }
15808
15809 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15810 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15811 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15812 // If the load ptrs can be decomposed into a common (Base + Index) with a
15813 // common constant stride, then return the constant stride.
15814 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15815 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15816 if (BIO1.equalBaseIndex(BIO2, DAG))
15817 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15818
15819 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15820 SDValue P1 = Ld1->getBasePtr();
15821 SDValue P2 = Ld2->getBasePtr();
15822 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15823 return {{P2.getOperand(1), false}};
15824 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15825 return {{P1.getOperand(1), true}};
15826
15827 return std::nullopt;
15828 };
15829
15830 // Get the distance between the first and second loads
15831 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15832 if (!BaseDiff)
15833 return SDValue();
15834
15835 // Check all the loads are the same distance apart
15836 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15837 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15838 return SDValue();
15839
15840 // TODO: At this point, we've successfully matched a generalized gather
15841 // load. Maybe we should emit that, and then move the specialized
15842 // matchers above and below into a DAG combine?
15843
15844 // Get the widened scalar type, e.g. v4i8 -> i64
15845 unsigned WideScalarBitWidth =
15846 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15847 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15848
15849 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15850 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15851 if (!TLI.isTypeLegal(WideVecVT))
15852 return SDValue();
15853
15854 // Check that the operation is legal
15855 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15856 return SDValue();
15857
15858 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15859 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15860 ? std::get<SDValue>(StrideVariant)
15861 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15862 Lds[0]->getOffset().getValueType());
15863 if (MustNegateStride)
15864 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15865
15866 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15867 SDValue IntID =
15868 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15869 Subtarget.getXLenVT());
15870
15871 SDValue AllOneMask =
15872 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15873 DAG.getConstant(1, DL, MVT::i1));
15874
15875 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15876 BaseLd->getBasePtr(), Stride, AllOneMask};
15877
15878 uint64_t MemSize;
15879 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15880 ConstStride && ConstStride->getSExtValue() >= 0)
15881 // total size = (elsize * n) + (stride - elsize) * (n-1)
15882 // = elsize + stride * (n-1)
15883 MemSize = WideScalarVT.getSizeInBits() +
15884 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15885 else
15886 // If Stride isn't constant, then we can't know how much it will load
15888
15890 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15891 Align);
15892
15893 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15894 Ops, WideVecVT, MMO);
15895 for (SDValue Ld : N->ops())
15896 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15897
15898 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15899}
15900
15902 const RISCVSubtarget &Subtarget) {
15903
15904 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15905
15906 if (N->getValueType(0).isFixedLengthVector())
15907 return SDValue();
15908
15909 SDValue Addend = N->getOperand(0);
15910 SDValue MulOp = N->getOperand(1);
15911
15912 if (N->getOpcode() == RISCVISD::ADD_VL) {
15913 SDValue AddMergeOp = N->getOperand(2);
15914 if (!AddMergeOp.isUndef())
15915 return SDValue();
15916 }
15917
15918 auto IsVWMulOpc = [](unsigned Opc) {
15919 switch (Opc) {
15920 case RISCVISD::VWMUL_VL:
15923 return true;
15924 default:
15925 return false;
15926 }
15927 };
15928
15929 if (!IsVWMulOpc(MulOp.getOpcode()))
15930 std::swap(Addend, MulOp);
15931
15932 if (!IsVWMulOpc(MulOp.getOpcode()))
15933 return SDValue();
15934
15935 SDValue MulMergeOp = MulOp.getOperand(2);
15936
15937 if (!MulMergeOp.isUndef())
15938 return SDValue();
15939
15940 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15941 const RISCVSubtarget &Subtarget) {
15942 if (N->getOpcode() == ISD::ADD) {
15943 SDLoc DL(N);
15944 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15945 Subtarget);
15946 }
15947 return std::make_pair(N->getOperand(3), N->getOperand(4));
15948 }(N, DAG, Subtarget);
15949
15950 SDValue MulMask = MulOp.getOperand(3);
15951 SDValue MulVL = MulOp.getOperand(4);
15952
15953 if (AddMask != MulMask || AddVL != MulVL)
15954 return SDValue();
15955
15956 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15957 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15958 "Unexpected opcode after VWMACC_VL");
15959 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15960 "Unexpected opcode after VWMACC_VL!");
15961 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15962 "Unexpected opcode after VWMUL_VL!");
15963 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15964 "Unexpected opcode after VWMUL_VL!");
15965
15966 SDLoc DL(N);
15967 EVT VT = N->getValueType(0);
15968 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15969 AddVL};
15970 return DAG.getNode(Opc, DL, VT, Ops);
15971}
15972
15974 ISD::MemIndexType &IndexType,
15976 if (!DCI.isBeforeLegalize())
15977 return false;
15978
15979 SelectionDAG &DAG = DCI.DAG;
15980 const MVT XLenVT =
15981 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15982
15983 const EVT IndexVT = Index.getValueType();
15984
15985 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15986 // mode, so anything else must be manually legalized.
15987 if (!isIndexTypeSigned(IndexType))
15988 return false;
15989
15990 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15991 // Any index legalization should first promote to XLenVT, so we don't lose
15992 // bits when scaling. This may create an illegal index type so we let
15993 // LLVM's legalization take care of the splitting.
15994 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15996 IndexVT.changeVectorElementType(XLenVT), Index);
15997 }
15998 IndexType = ISD::UNSIGNED_SCALED;
15999 return true;
16000}
16001
16002/// Match the index vector of a scatter or gather node as the shuffle mask
16003/// which performs the rearrangement if possible. Will only match if
16004/// all lanes are touched, and thus replacing the scatter or gather with
16005/// a unit strided access and shuffle is legal.
16007 SmallVector<int> &ShuffleMask) {
16008 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16009 return false;
16011 return false;
16012
16013 const unsigned ElementSize = VT.getScalarStoreSize();
16014 const unsigned NumElems = VT.getVectorNumElements();
16015
16016 // Create the shuffle mask and check all bits active
16017 assert(ShuffleMask.empty());
16018 BitVector ActiveLanes(NumElems);
16019 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16020 // TODO: We've found an active bit of UB, and could be
16021 // more aggressive here if desired.
16022 if (Index->getOperand(i)->isUndef())
16023 return false;
16024 uint64_t C = Index->getConstantOperandVal(i);
16025 if (C % ElementSize != 0)
16026 return false;
16027 C = C / ElementSize;
16028 if (C >= NumElems)
16029 return false;
16030 ShuffleMask.push_back(C);
16031 ActiveLanes.set(C);
16032 }
16033 return ActiveLanes.all();
16034}
16035
16036/// Match the index of a gather or scatter operation as an operation
16037/// with twice the element width and half the number of elements. This is
16038/// generally profitable (if legal) because these operations are linear
16039/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
16040/// come out ahead.
16042 Align BaseAlign, const RISCVSubtarget &ST) {
16043 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
16044 return false;
16046 return false;
16047
16048 // Attempt a doubling. If we can use a element type 4x or 8x in
16049 // size, this will happen via multiply iterations of the transform.
16050 const unsigned NumElems = VT.getVectorNumElements();
16051 if (NumElems % 2 != 0)
16052 return false;
16053
16054 const unsigned ElementSize = VT.getScalarStoreSize();
16055 const unsigned WiderElementSize = ElementSize * 2;
16056 if (WiderElementSize > ST.getELen()/8)
16057 return false;
16058
16059 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
16060 return false;
16061
16062 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
16063 // TODO: We've found an active bit of UB, and could be
16064 // more aggressive here if desired.
16065 if (Index->getOperand(i)->isUndef())
16066 return false;
16067 // TODO: This offset check is too strict if we support fully
16068 // misaligned memory operations.
16069 uint64_t C = Index->getConstantOperandVal(i);
16070 if (i % 2 == 0) {
16071 if (C % WiderElementSize != 0)
16072 return false;
16073 continue;
16074 }
16075 uint64_t Last = Index->getConstantOperandVal(i-1);
16076 if (C != Last + ElementSize)
16077 return false;
16078 }
16079 return true;
16080}
16081
16082
16084 DAGCombinerInfo &DCI) const {
16085 SelectionDAG &DAG = DCI.DAG;
16086 const MVT XLenVT = Subtarget.getXLenVT();
16087 SDLoc DL(N);
16088
16089 // Helper to call SimplifyDemandedBits on an operand of N where only some low
16090 // bits are demanded. N will be added to the Worklist if it was not deleted.
16091 // Caller should return SDValue(N, 0) if this returns true.
16092 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
16093 SDValue Op = N->getOperand(OpNo);
16094 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
16095 if (!SimplifyDemandedBits(Op, Mask, DCI))
16096 return false;
16097
16098 if (N->getOpcode() != ISD::DELETED_NODE)
16099 DCI.AddToWorklist(N);
16100 return true;
16101 };
16102
16103 switch (N->getOpcode()) {
16104 default:
16105 break;
16106 case RISCVISD::SplitF64: {
16107 SDValue Op0 = N->getOperand(0);
16108 // If the input to SplitF64 is just BuildPairF64 then the operation is
16109 // redundant. Instead, use BuildPairF64's operands directly.
16110 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
16111 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
16112
16113 if (Op0->isUndef()) {
16114 SDValue Lo = DAG.getUNDEF(MVT::i32);
16115 SDValue Hi = DAG.getUNDEF(MVT::i32);
16116 return DCI.CombineTo(N, Lo, Hi);
16117 }
16118
16119 // It's cheaper to materialise two 32-bit integers than to load a double
16120 // from the constant pool and transfer it to integer registers through the
16121 // stack.
16122 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
16123 APInt V = C->getValueAPF().bitcastToAPInt();
16124 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
16125 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
16126 return DCI.CombineTo(N, Lo, Hi);
16127 }
16128
16129 // This is a target-specific version of a DAGCombine performed in
16130 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16131 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16132 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16133 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16134 !Op0.getNode()->hasOneUse())
16135 break;
16136 SDValue NewSplitF64 =
16137 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
16138 Op0.getOperand(0));
16139 SDValue Lo = NewSplitF64.getValue(0);
16140 SDValue Hi = NewSplitF64.getValue(1);
16141 APInt SignBit = APInt::getSignMask(32);
16142 if (Op0.getOpcode() == ISD::FNEG) {
16143 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
16144 DAG.getConstant(SignBit, DL, MVT::i32));
16145 return DCI.CombineTo(N, Lo, NewHi);
16146 }
16147 assert(Op0.getOpcode() == ISD::FABS);
16148 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
16149 DAG.getConstant(~SignBit, DL, MVT::i32));
16150 return DCI.CombineTo(N, Lo, NewHi);
16151 }
16152 case RISCVISD::SLLW:
16153 case RISCVISD::SRAW:
16154 case RISCVISD::SRLW:
16155 case RISCVISD::RORW:
16156 case RISCVISD::ROLW: {
16157 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
16158 if (SimplifyDemandedLowBitsHelper(0, 32) ||
16159 SimplifyDemandedLowBitsHelper(1, 5))
16160 return SDValue(N, 0);
16161
16162 break;
16163 }
16164 case RISCVISD::CLZW:
16165 case RISCVISD::CTZW: {
16166 // Only the lower 32 bits of the first operand are read
16167 if (SimplifyDemandedLowBitsHelper(0, 32))
16168 return SDValue(N, 0);
16169 break;
16170 }
16172 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
16173 // conversion is unnecessary and can be replaced with the
16174 // FMV_X_ANYEXTW_RV64 operand.
16175 SDValue Op0 = N->getOperand(0);
16177 return Op0.getOperand(0);
16178 break;
16179 }
16182 SDLoc DL(N);
16183 SDValue Op0 = N->getOperand(0);
16184 MVT VT = N->getSimpleValueType(0);
16185 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
16186 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
16187 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
16188 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
16189 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
16190 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
16191 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
16192 assert(Op0.getOperand(0).getValueType() == VT &&
16193 "Unexpected value type!");
16194 return Op0.getOperand(0);
16195 }
16196
16197 // This is a target-specific version of a DAGCombine performed in
16198 // DAGCombiner::visitBITCAST. It performs the equivalent of:
16199 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
16200 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
16201 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
16202 !Op0.getNode()->hasOneUse())
16203 break;
16204 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
16205 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
16206 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
16207 if (Op0.getOpcode() == ISD::FNEG)
16208 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
16209 DAG.getConstant(SignBit, DL, VT));
16210
16211 assert(Op0.getOpcode() == ISD::FABS);
16212 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
16213 DAG.getConstant(~SignBit, DL, VT));
16214 }
16215 case ISD::ABS: {
16216 EVT VT = N->getValueType(0);
16217 SDValue N0 = N->getOperand(0);
16218 // abs (sext) -> zext (abs)
16219 // abs (zext) -> zext (handled elsewhere)
16220 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
16221 SDValue Src = N0.getOperand(0);
16222 SDLoc DL(N);
16223 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
16224 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
16225 }
16226 break;
16227 }
16228 case ISD::ADD: {
16229 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16230 return V;
16231 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
16232 return V;
16233 return performADDCombine(N, DAG, Subtarget);
16234 }
16235 case ISD::SUB: {
16236 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16237 return V;
16238 return performSUBCombine(N, DAG, Subtarget);
16239 }
16240 case ISD::AND:
16241 return performANDCombine(N, DCI, Subtarget);
16242 case ISD::OR: {
16243 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16244 return V;
16245 return performORCombine(N, DCI, Subtarget);
16246 }
16247 case ISD::XOR:
16248 return performXORCombine(N, DAG, Subtarget);
16249 case ISD::MUL:
16250 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16251 return V;
16252 return performMULCombine(N, DAG, DCI, Subtarget);
16253 case ISD::SDIV:
16254 case ISD::UDIV:
16255 case ISD::SREM:
16256 case ISD::UREM:
16257 if (SDValue V = combineBinOpOfZExt(N, DAG))
16258 return V;
16259 break;
16260 case ISD::FADD:
16261 case ISD::UMAX:
16262 case ISD::UMIN:
16263 case ISD::SMAX:
16264 case ISD::SMIN:
16265 case ISD::FMAXNUM:
16266 case ISD::FMINNUM: {
16267 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16268 return V;
16269 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16270 return V;
16271 return SDValue();
16272 }
16273 case ISD::SETCC:
16274 return performSETCCCombine(N, DAG, Subtarget);
16276 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
16277 case ISD::ZERO_EXTEND:
16278 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
16279 // type legalization. This is safe because fp_to_uint produces poison if
16280 // it overflows.
16281 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
16282 SDValue Src = N->getOperand(0);
16283 if (Src.getOpcode() == ISD::FP_TO_UINT &&
16284 isTypeLegal(Src.getOperand(0).getValueType()))
16285 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
16286 Src.getOperand(0));
16287 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
16288 isTypeLegal(Src.getOperand(1).getValueType())) {
16289 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
16290 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
16291 Src.getOperand(0), Src.getOperand(1));
16292 DCI.CombineTo(N, Res);
16293 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
16294 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
16295 return SDValue(N, 0); // Return N so it doesn't get rechecked.
16296 }
16297 }
16298 return SDValue();
16300 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
16301 // This would be benefit for the cases where X and Y are both the same value
16302 // type of low precision vectors. Since the truncate would be lowered into
16303 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
16304 // restriction, such pattern would be expanded into a series of "vsetvli"
16305 // and "vnsrl" instructions later to reach this point.
16306 auto IsTruncNode = [](SDValue V) {
16307 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
16308 return false;
16309 SDValue VL = V.getOperand(2);
16310 auto *C = dyn_cast<ConstantSDNode>(VL);
16311 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
16312 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
16313 (isa<RegisterSDNode>(VL) &&
16314 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
16315 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
16316 IsVLMAXForVMSET;
16317 };
16318
16319 SDValue Op = N->getOperand(0);
16320
16321 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
16322 // to distinguish such pattern.
16323 while (IsTruncNode(Op)) {
16324 if (!Op.hasOneUse())
16325 return SDValue();
16326 Op = Op.getOperand(0);
16327 }
16328
16329 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
16330 SDValue N0 = Op.getOperand(0);
16331 SDValue N1 = Op.getOperand(1);
16332 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
16333 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
16334 SDValue N00 = N0.getOperand(0);
16335 SDValue N10 = N1.getOperand(0);
16336 if (N00.getValueType().isVector() &&
16337 N00.getValueType() == N10.getValueType() &&
16338 N->getValueType(0) == N10.getValueType()) {
16339 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
16340 SDValue SMin = DAG.getNode(
16341 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
16342 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
16343 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
16344 }
16345 }
16346 }
16347 break;
16348 }
16349 case ISD::TRUNCATE:
16350 return performTRUNCATECombine(N, DAG, Subtarget);
16351 case ISD::SELECT:
16352 return performSELECTCombine(N, DAG, Subtarget);
16354 case RISCVISD::CZERO_NEZ: {
16355 SDValue Val = N->getOperand(0);
16356 SDValue Cond = N->getOperand(1);
16357
16358 unsigned Opc = N->getOpcode();
16359
16360 // czero_eqz x, x -> x
16361 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
16362 return Val;
16363
16364 unsigned InvOpc =
16366
16367 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
16368 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
16369 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
16370 SDValue NewCond = Cond.getOperand(0);
16371 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
16372 if (DAG.MaskedValueIsZero(NewCond, Mask))
16373 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
16374 }
16375 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
16376 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
16377 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
16378 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
16379 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
16380 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16381 if (ISD::isIntEqualitySetCC(CCVal))
16382 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
16383 N->getValueType(0), Val, Cond.getOperand(0));
16384 }
16385 return SDValue();
16386 }
16387 case RISCVISD::SELECT_CC: {
16388 // Transform
16389 SDValue LHS = N->getOperand(0);
16390 SDValue RHS = N->getOperand(1);
16391 SDValue CC = N->getOperand(2);
16392 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16393 SDValue TrueV = N->getOperand(3);
16394 SDValue FalseV = N->getOperand(4);
16395 SDLoc DL(N);
16396 EVT VT = N->getValueType(0);
16397
16398 // If the True and False values are the same, we don't need a select_cc.
16399 if (TrueV == FalseV)
16400 return TrueV;
16401
16402 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16403 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16404 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16405 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16406 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16407 if (CCVal == ISD::CondCode::SETGE)
16408 std::swap(TrueV, FalseV);
16409
16410 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16411 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16412 // Only handle simm12, if it is not in this range, it can be considered as
16413 // register.
16414 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16415 isInt<12>(TrueSImm - FalseSImm)) {
16416 SDValue SRA =
16417 DAG.getNode(ISD::SRA, DL, VT, LHS,
16418 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16419 SDValue AND =
16420 DAG.getNode(ISD::AND, DL, VT, SRA,
16421 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16422 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16423 }
16424
16425 if (CCVal == ISD::CondCode::SETGE)
16426 std::swap(TrueV, FalseV);
16427 }
16428
16429 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16430 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16431 {LHS, RHS, CC, TrueV, FalseV});
16432
16433 if (!Subtarget.hasConditionalMoveFusion()) {
16434 // (select c, -1, y) -> -c | y
16435 if (isAllOnesConstant(TrueV)) {
16436 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16437 SDValue Neg = DAG.getNegative(C, DL, VT);
16438 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16439 }
16440 // (select c, y, -1) -> -!c | y
16441 if (isAllOnesConstant(FalseV)) {
16442 SDValue C =
16443 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16444 SDValue Neg = DAG.getNegative(C, DL, VT);
16445 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16446 }
16447
16448 // (select c, 0, y) -> -!c & y
16449 if (isNullConstant(TrueV)) {
16450 SDValue C =
16451 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16452 SDValue Neg = DAG.getNegative(C, DL, VT);
16453 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16454 }
16455 // (select c, y, 0) -> -c & y
16456 if (isNullConstant(FalseV)) {
16457 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16458 SDValue Neg = DAG.getNegative(C, DL, VT);
16459 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16460 }
16461 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16462 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16463 if (((isOneConstant(FalseV) && LHS == TrueV &&
16464 CCVal == ISD::CondCode::SETNE) ||
16465 (isOneConstant(TrueV) && LHS == FalseV &&
16466 CCVal == ISD::CondCode::SETEQ)) &&
16468 // freeze it to be safe.
16469 LHS = DAG.getFreeze(LHS);
16471 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16472 }
16473 }
16474
16475 // If both true/false are an xor with 1, pull through the select.
16476 // This can occur after op legalization if both operands are setccs that
16477 // require an xor to invert.
16478 // FIXME: Generalize to other binary ops with identical operand?
16479 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16480 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16481 isOneConstant(TrueV.getOperand(1)) &&
16482 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16483 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16484 TrueV.getOperand(0), FalseV.getOperand(0));
16485 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16486 }
16487
16488 return SDValue();
16489 }
16490 case RISCVISD::BR_CC: {
16491 SDValue LHS = N->getOperand(1);
16492 SDValue RHS = N->getOperand(2);
16493 SDValue CC = N->getOperand(3);
16494 SDLoc DL(N);
16495
16496 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16497 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16498 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16499
16500 return SDValue();
16501 }
16502 case ISD::BITREVERSE:
16503 return performBITREVERSECombine(N, DAG, Subtarget);
16504 case ISD::FP_TO_SINT:
16505 case ISD::FP_TO_UINT:
16506 return performFP_TO_INTCombine(N, DCI, Subtarget);
16509 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16510 case ISD::FCOPYSIGN: {
16511 EVT VT = N->getValueType(0);
16512 if (!VT.isVector())
16513 break;
16514 // There is a form of VFSGNJ which injects the negated sign of its second
16515 // operand. Try and bubble any FNEG up after the extend/round to produce
16516 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16517 // TRUNC=1.
16518 SDValue In2 = N->getOperand(1);
16519 // Avoid cases where the extend/round has multiple uses, as duplicating
16520 // those is typically more expensive than removing a fneg.
16521 if (!In2.hasOneUse())
16522 break;
16523 if (In2.getOpcode() != ISD::FP_EXTEND &&
16524 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16525 break;
16526 In2 = In2.getOperand(0);
16527 if (In2.getOpcode() != ISD::FNEG)
16528 break;
16529 SDLoc DL(N);
16530 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16531 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16532 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16533 }
16534 case ISD::MGATHER: {
16535 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16536 const EVT VT = N->getValueType(0);
16537 SDValue Index = MGN->getIndex();
16538 SDValue ScaleOp = MGN->getScale();
16539 ISD::MemIndexType IndexType = MGN->getIndexType();
16540 assert(!MGN->isIndexScaled() &&
16541 "Scaled gather/scatter should not be formed");
16542
16543 SDLoc DL(N);
16544 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16545 return DAG.getMaskedGather(
16546 N->getVTList(), MGN->getMemoryVT(), DL,
16547 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16548 MGN->getBasePtr(), Index, ScaleOp},
16549 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16550
16551 if (narrowIndex(Index, IndexType, DAG))
16552 return DAG.getMaskedGather(
16553 N->getVTList(), MGN->getMemoryVT(), DL,
16554 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16555 MGN->getBasePtr(), Index, ScaleOp},
16556 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16557
16558 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16559 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16560 // The sequence will be XLenVT, not the type of Index. Tell
16561 // isSimpleVIDSequence this so we avoid overflow.
16562 if (std::optional<VIDSequence> SimpleVID =
16563 isSimpleVIDSequence(Index, Subtarget.getXLen());
16564 SimpleVID && SimpleVID->StepDenominator == 1) {
16565 const int64_t StepNumerator = SimpleVID->StepNumerator;
16566 const int64_t Addend = SimpleVID->Addend;
16567
16568 // Note: We don't need to check alignment here since (by assumption
16569 // from the existance of the gather), our offsets must be sufficiently
16570 // aligned.
16571
16572 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16573 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16574 assert(IndexType == ISD::UNSIGNED_SCALED);
16575 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16576 DAG.getConstant(Addend, DL, PtrVT));
16577
16578 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16579 SDValue IntID =
16580 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16581 XLenVT);
16582 SDValue Ops[] =
16583 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16584 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16586 Ops, VT, MGN->getMemOperand());
16587 }
16588 }
16589
16590 SmallVector<int> ShuffleMask;
16591 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16592 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16593 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16594 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16595 MGN->getMask(), DAG.getUNDEF(VT),
16596 MGN->getMemoryVT(), MGN->getMemOperand(),
16598 SDValue Shuffle =
16599 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16600 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16601 }
16602
16603 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16604 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16605 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16606 SmallVector<SDValue> NewIndices;
16607 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16608 NewIndices.push_back(Index.getOperand(i));
16609 EVT IndexVT = Index.getValueType()
16611 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16612
16613 unsigned ElementSize = VT.getScalarStoreSize();
16614 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16615 auto EltCnt = VT.getVectorElementCount();
16616 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16617 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16618 EltCnt.divideCoefficientBy(2));
16619 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16620 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16621 EltCnt.divideCoefficientBy(2));
16622 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16623
16624 SDValue Gather =
16625 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16626 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16627 Index, ScaleOp},
16628 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16629 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16630 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16631 }
16632 break;
16633 }
16634 case ISD::MSCATTER:{
16635 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16636 SDValue Index = MSN->getIndex();
16637 SDValue ScaleOp = MSN->getScale();
16638 ISD::MemIndexType IndexType = MSN->getIndexType();
16639 assert(!MSN->isIndexScaled() &&
16640 "Scaled gather/scatter should not be formed");
16641
16642 SDLoc DL(N);
16643 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16644 return DAG.getMaskedScatter(
16645 N->getVTList(), MSN->getMemoryVT(), DL,
16646 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16647 Index, ScaleOp},
16648 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16649
16650 if (narrowIndex(Index, IndexType, DAG))
16651 return DAG.getMaskedScatter(
16652 N->getVTList(), MSN->getMemoryVT(), DL,
16653 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16654 Index, ScaleOp},
16655 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16656
16657 EVT VT = MSN->getValue()->getValueType(0);
16658 SmallVector<int> ShuffleMask;
16659 if (!MSN->isTruncatingStore() &&
16660 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16661 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16662 DAG.getUNDEF(VT), ShuffleMask);
16663 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16664 DAG.getUNDEF(XLenVT), MSN->getMask(),
16665 MSN->getMemoryVT(), MSN->getMemOperand(),
16666 ISD::UNINDEXED, false);
16667 }
16668 break;
16669 }
16670 case ISD::VP_GATHER: {
16671 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16672 SDValue Index = VPGN->getIndex();
16673 SDValue ScaleOp = VPGN->getScale();
16674 ISD::MemIndexType IndexType = VPGN->getIndexType();
16675 assert(!VPGN->isIndexScaled() &&
16676 "Scaled gather/scatter should not be formed");
16677
16678 SDLoc DL(N);
16679 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16680 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16681 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16682 ScaleOp, VPGN->getMask(),
16683 VPGN->getVectorLength()},
16684 VPGN->getMemOperand(), IndexType);
16685
16686 if (narrowIndex(Index, IndexType, DAG))
16687 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16688 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16689 ScaleOp, VPGN->getMask(),
16690 VPGN->getVectorLength()},
16691 VPGN->getMemOperand(), IndexType);
16692
16693 break;
16694 }
16695 case ISD::VP_SCATTER: {
16696 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16697 SDValue Index = VPSN->getIndex();
16698 SDValue ScaleOp = VPSN->getScale();
16699 ISD::MemIndexType IndexType = VPSN->getIndexType();
16700 assert(!VPSN->isIndexScaled() &&
16701 "Scaled gather/scatter should not be formed");
16702
16703 SDLoc DL(N);
16704 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16705 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16706 {VPSN->getChain(), VPSN->getValue(),
16707 VPSN->getBasePtr(), Index, ScaleOp,
16708 VPSN->getMask(), VPSN->getVectorLength()},
16709 VPSN->getMemOperand(), IndexType);
16710
16711 if (narrowIndex(Index, IndexType, DAG))
16712 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16713 {VPSN->getChain(), VPSN->getValue(),
16714 VPSN->getBasePtr(), Index, ScaleOp,
16715 VPSN->getMask(), VPSN->getVectorLength()},
16716 VPSN->getMemOperand(), IndexType);
16717 break;
16718 }
16719 case RISCVISD::SHL_VL:
16720 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16721 return V;
16722 [[fallthrough]];
16723 case RISCVISD::SRA_VL:
16724 case RISCVISD::SRL_VL: {
16725 SDValue ShAmt = N->getOperand(1);
16727 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16728 SDLoc DL(N);
16729 SDValue VL = N->getOperand(4);
16730 EVT VT = N->getValueType(0);
16731 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16732 ShAmt.getOperand(1), VL);
16733 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16734 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16735 }
16736 break;
16737 }
16738 case ISD::SRA:
16739 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16740 return V;
16741 [[fallthrough]];
16742 case ISD::SRL:
16743 case ISD::SHL: {
16744 if (N->getOpcode() == ISD::SHL) {
16745 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16746 return V;
16747 }
16748 SDValue ShAmt = N->getOperand(1);
16750 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16751 SDLoc DL(N);
16752 EVT VT = N->getValueType(0);
16753 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16754 ShAmt.getOperand(1),
16755 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16756 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16757 }
16758 break;
16759 }
16760 case RISCVISD::ADD_VL:
16761 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16762 return V;
16763 return combineToVWMACC(N, DAG, Subtarget);
16768 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16769 case RISCVISD::SUB_VL:
16770 case RISCVISD::MUL_VL:
16771 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16780 return performVFMADD_VLCombine(N, DAG, Subtarget);
16781 case RISCVISD::FADD_VL:
16782 case RISCVISD::FSUB_VL:
16783 case RISCVISD::FMUL_VL:
16785 case RISCVISD::VFWSUB_W_VL: {
16786 if (N->getValueType(0).isScalableVector() &&
16787 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16788 (Subtarget.hasVInstructionsF16Minimal() &&
16789 !Subtarget.hasVInstructionsF16()))
16790 return SDValue();
16791 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16792 }
16793 case ISD::LOAD:
16794 case ISD::STORE: {
16795 if (DCI.isAfterLegalizeDAG())
16796 if (SDValue V = performMemPairCombine(N, DCI))
16797 return V;
16798
16799 if (N->getOpcode() != ISD::STORE)
16800 break;
16801
16802 auto *Store = cast<StoreSDNode>(N);
16803 SDValue Chain = Store->getChain();
16804 EVT MemVT = Store->getMemoryVT();
16805 SDValue Val = Store->getValue();
16806 SDLoc DL(N);
16807
16808 bool IsScalarizable =
16809 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16810 Store->isSimple() &&
16811 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16812 isPowerOf2_64(MemVT.getSizeInBits()) &&
16813 MemVT.getSizeInBits() <= Subtarget.getXLen();
16814
16815 // If sufficiently aligned we can scalarize stores of constant vectors of
16816 // any power-of-two size up to XLen bits, provided that they aren't too
16817 // expensive to materialize.
16818 // vsetivli zero, 2, e8, m1, ta, ma
16819 // vmv.v.i v8, 4
16820 // vse64.v v8, (a0)
16821 // ->
16822 // li a1, 1028
16823 // sh a1, 0(a0)
16824 if (DCI.isBeforeLegalize() && IsScalarizable &&
16826 // Get the constant vector bits
16827 APInt NewC(Val.getValueSizeInBits(), 0);
16828 uint64_t EltSize = Val.getScalarValueSizeInBits();
16829 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16830 if (Val.getOperand(i).isUndef())
16831 continue;
16832 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16833 i * EltSize);
16834 }
16835 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16836
16837 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16838 true) <= 2 &&
16840 NewVT, *Store->getMemOperand())) {
16841 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16842 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16843 Store->getPointerInfo(), Store->getOriginalAlign(),
16844 Store->getMemOperand()->getFlags());
16845 }
16846 }
16847
16848 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16849 // vsetivli zero, 2, e16, m1, ta, ma
16850 // vle16.v v8, (a0)
16851 // vse16.v v8, (a1)
16852 if (auto *L = dyn_cast<LoadSDNode>(Val);
16853 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16854 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16855 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16856 L->getMemoryVT() == MemVT) {
16857 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16859 NewVT, *Store->getMemOperand()) &&
16861 NewVT, *L->getMemOperand())) {
16862 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16863 L->getPointerInfo(), L->getOriginalAlign(),
16864 L->getMemOperand()->getFlags());
16865 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16866 Store->getPointerInfo(), Store->getOriginalAlign(),
16867 Store->getMemOperand()->getFlags());
16868 }
16869 }
16870
16871 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16872 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16873 // any illegal types.
16874 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16875 (DCI.isAfterLegalizeDAG() &&
16877 isNullConstant(Val.getOperand(1)))) {
16878 SDValue Src = Val.getOperand(0);
16879 MVT VecVT = Src.getSimpleValueType();
16880 // VecVT should be scalable and memory VT should match the element type.
16881 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16882 MemVT == VecVT.getVectorElementType()) {
16883 SDLoc DL(N);
16884 MVT MaskVT = getMaskTypeFor(VecVT);
16885 return DAG.getStoreVP(
16886 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16887 DAG.getConstant(1, DL, MaskVT),
16888 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16889 Store->getMemOperand(), Store->getAddressingMode(),
16890 Store->isTruncatingStore(), /*IsCompress*/ false);
16891 }
16892 }
16893
16894 break;
16895 }
16896 case ISD::SPLAT_VECTOR: {
16897 EVT VT = N->getValueType(0);
16898 // Only perform this combine on legal MVT types.
16899 if (!isTypeLegal(VT))
16900 break;
16901 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16902 DAG, Subtarget))
16903 return Gather;
16904 break;
16905 }
16906 case ISD::BUILD_VECTOR:
16907 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16908 return V;
16909 break;
16911 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16912 return V;
16913 break;
16915 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16916 return V;
16917 break;
16918 case RISCVISD::VFMV_V_F_VL: {
16919 const MVT VT = N->getSimpleValueType(0);
16920 SDValue Passthru = N->getOperand(0);
16921 SDValue Scalar = N->getOperand(1);
16922 SDValue VL = N->getOperand(2);
16923
16924 // If VL is 1, we can use vfmv.s.f.
16925 if (isOneConstant(VL))
16926 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16927 break;
16928 }
16929 case RISCVISD::VMV_V_X_VL: {
16930 const MVT VT = N->getSimpleValueType(0);
16931 SDValue Passthru = N->getOperand(0);
16932 SDValue Scalar = N->getOperand(1);
16933 SDValue VL = N->getOperand(2);
16934
16935 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16936 // scalar input.
16937 unsigned ScalarSize = Scalar.getValueSizeInBits();
16938 unsigned EltWidth = VT.getScalarSizeInBits();
16939 if (ScalarSize > EltWidth && Passthru.isUndef())
16940 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16941 return SDValue(N, 0);
16942
16943 // If VL is 1 and the scalar value won't benefit from immediate, we can
16944 // use vmv.s.x.
16945 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16946 if (isOneConstant(VL) &&
16947 (!Const || Const->isZero() ||
16948 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16949 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16950
16951 break;
16952 }
16953 case RISCVISD::VFMV_S_F_VL: {
16954 SDValue Src = N->getOperand(1);
16955 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16956 // into an undef vector.
16957 // TODO: Could use a vslide or vmv.v.v for non-undef.
16958 if (N->getOperand(0).isUndef() &&
16959 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16960 isNullConstant(Src.getOperand(1)) &&
16961 Src.getOperand(0).getValueType().isScalableVector()) {
16962 EVT VT = N->getValueType(0);
16963 EVT SrcVT = Src.getOperand(0).getValueType();
16965 // Widths match, just return the original vector.
16966 if (SrcVT == VT)
16967 return Src.getOperand(0);
16968 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16969 }
16970 [[fallthrough]];
16971 }
16972 case RISCVISD::VMV_S_X_VL: {
16973 const MVT VT = N->getSimpleValueType(0);
16974 SDValue Passthru = N->getOperand(0);
16975 SDValue Scalar = N->getOperand(1);
16976 SDValue VL = N->getOperand(2);
16977
16978 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
16979 Scalar.getOperand(0).getValueType() == N->getValueType(0))
16980 return Scalar.getOperand(0);
16981
16982 // Use M1 or smaller to avoid over constraining register allocation
16983 const MVT M1VT = getLMUL1VT(VT);
16984 if (M1VT.bitsLT(VT)) {
16985 SDValue M1Passthru =
16986 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16987 DAG.getVectorIdxConstant(0, DL));
16988 SDValue Result =
16989 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16990 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16991 DAG.getVectorIdxConstant(0, DL));
16992 return Result;
16993 }
16994
16995 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16996 // higher would involve overly constraining the register allocator for
16997 // no purpose.
16998 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16999 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
17000 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
17001 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
17002
17003 break;
17004 }
17005 case RISCVISD::VMV_X_S: {
17006 SDValue Vec = N->getOperand(0);
17007 MVT VecVT = N->getOperand(0).getSimpleValueType();
17008 const MVT M1VT = getLMUL1VT(VecVT);
17009 if (M1VT.bitsLT(VecVT)) {
17010 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
17011 DAG.getVectorIdxConstant(0, DL));
17012 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
17013 }
17014 break;
17015 }
17019 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
17020 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
17021 switch (IntNo) {
17022 // By default we do not combine any intrinsic.
17023 default:
17024 return SDValue();
17025 case Intrinsic::riscv_masked_strided_load: {
17026 MVT VT = N->getSimpleValueType(0);
17027 auto *Load = cast<MemIntrinsicSDNode>(N);
17028 SDValue PassThru = N->getOperand(2);
17029 SDValue Base = N->getOperand(3);
17030 SDValue Stride = N->getOperand(4);
17031 SDValue Mask = N->getOperand(5);
17032
17033 // If the stride is equal to the element size in bytes, we can use
17034 // a masked.load.
17035 const unsigned ElementSize = VT.getScalarStoreSize();
17036 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17037 StrideC && StrideC->getZExtValue() == ElementSize)
17038 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
17039 DAG.getUNDEF(XLenVT), Mask, PassThru,
17040 Load->getMemoryVT(), Load->getMemOperand(),
17042 return SDValue();
17043 }
17044 case Intrinsic::riscv_masked_strided_store: {
17045 auto *Store = cast<MemIntrinsicSDNode>(N);
17046 SDValue Value = N->getOperand(2);
17047 SDValue Base = N->getOperand(3);
17048 SDValue Stride = N->getOperand(4);
17049 SDValue Mask = N->getOperand(5);
17050
17051 // If the stride is equal to the element size in bytes, we can use
17052 // a masked.store.
17053 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
17054 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
17055 StrideC && StrideC->getZExtValue() == ElementSize)
17056 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
17057 DAG.getUNDEF(XLenVT), Mask,
17058 Value.getValueType(), Store->getMemOperand(),
17059 ISD::UNINDEXED, false);
17060 return SDValue();
17061 }
17062 case Intrinsic::riscv_vcpop:
17063 case Intrinsic::riscv_vcpop_mask:
17064 case Intrinsic::riscv_vfirst:
17065 case Intrinsic::riscv_vfirst_mask: {
17066 SDValue VL = N->getOperand(2);
17067 if (IntNo == Intrinsic::riscv_vcpop_mask ||
17068 IntNo == Intrinsic::riscv_vfirst_mask)
17069 VL = N->getOperand(3);
17070 if (!isNullConstant(VL))
17071 return SDValue();
17072 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
17073 SDLoc DL(N);
17074 EVT VT = N->getValueType(0);
17075 if (IntNo == Intrinsic::riscv_vfirst ||
17076 IntNo == Intrinsic::riscv_vfirst_mask)
17077 return DAG.getConstant(-1, DL, VT);
17078 return DAG.getConstant(0, DL, VT);
17079 }
17080 }
17081 }
17082 case ISD::BITCAST: {
17084 SDValue N0 = N->getOperand(0);
17085 EVT VT = N->getValueType(0);
17086 EVT SrcVT = N0.getValueType();
17087 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
17088 // type, widen both sides to avoid a trip through memory.
17089 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
17090 VT.isScalarInteger()) {
17091 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
17092 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
17093 Ops[0] = N0;
17094 SDLoc DL(N);
17095 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
17096 N0 = DAG.getBitcast(MVT::i8, N0);
17097 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
17098 }
17099
17100 return SDValue();
17101 }
17102 }
17103
17104 return SDValue();
17105}
17106
17108 EVT XVT, unsigned KeptBits) const {
17109 // For vectors, we don't have a preference..
17110 if (XVT.isVector())
17111 return false;
17112
17113 if (XVT != MVT::i32 && XVT != MVT::i64)
17114 return false;
17115
17116 // We can use sext.w for RV64 or an srai 31 on RV32.
17117 if (KeptBits == 32 || KeptBits == 64)
17118 return true;
17119
17120 // With Zbb we can use sext.h/sext.b.
17121 return Subtarget.hasStdExtZbb() &&
17122 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
17123 KeptBits == 16);
17124}
17125
17127 const SDNode *N, CombineLevel Level) const {
17128 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
17129 N->getOpcode() == ISD::SRL) &&
17130 "Expected shift op");
17131
17132 // The following folds are only desirable if `(OP _, c1 << c2)` can be
17133 // materialised in fewer instructions than `(OP _, c1)`:
17134 //
17135 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
17136 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
17137 SDValue N0 = N->getOperand(0);
17138 EVT Ty = N0.getValueType();
17139 if (Ty.isScalarInteger() &&
17140 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
17141 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17142 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17143 if (C1 && C2) {
17144 const APInt &C1Int = C1->getAPIntValue();
17145 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
17146
17147 // We can materialise `c1 << c2` into an add immediate, so it's "free",
17148 // and the combine should happen, to potentially allow further combines
17149 // later.
17150 if (ShiftedC1Int.getSignificantBits() <= 64 &&
17151 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
17152 return true;
17153
17154 // We can materialise `c1` in an add immediate, so it's "free", and the
17155 // combine should be prevented.
17156 if (C1Int.getSignificantBits() <= 64 &&
17158 return false;
17159
17160 // Neither constant will fit into an immediate, so find materialisation
17161 // costs.
17162 int C1Cost =
17163 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
17164 /*CompressionCost*/ true);
17165 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
17166 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
17167 /*CompressionCost*/ true);
17168
17169 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
17170 // combine should be prevented.
17171 if (C1Cost < ShiftedC1Cost)
17172 return false;
17173 }
17174 }
17175 return true;
17176}
17177
17179 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
17180 TargetLoweringOpt &TLO) const {
17181 // Delay this optimization as late as possible.
17182 if (!TLO.LegalOps)
17183 return false;
17184
17185 EVT VT = Op.getValueType();
17186 if (VT.isVector())
17187 return false;
17188
17189 unsigned Opcode = Op.getOpcode();
17190 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
17191 return false;
17192
17193 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17194 if (!C)
17195 return false;
17196
17197 const APInt &Mask = C->getAPIntValue();
17198
17199 // Clear all non-demanded bits initially.
17200 APInt ShrunkMask = Mask & DemandedBits;
17201
17202 // Try to make a smaller immediate by setting undemanded bits.
17203
17204 APInt ExpandedMask = Mask | ~DemandedBits;
17205
17206 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
17207 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
17208 };
17209 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
17210 if (NewMask == Mask)
17211 return true;
17212 SDLoc DL(Op);
17213 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
17214 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
17215 Op.getOperand(0), NewC);
17216 return TLO.CombineTo(Op, NewOp);
17217 };
17218
17219 // If the shrunk mask fits in sign extended 12 bits, let the target
17220 // independent code apply it.
17221 if (ShrunkMask.isSignedIntN(12))
17222 return false;
17223
17224 // And has a few special cases for zext.
17225 if (Opcode == ISD::AND) {
17226 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
17227 // otherwise use SLLI + SRLI.
17228 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
17229 if (IsLegalMask(NewMask))
17230 return UseMask(NewMask);
17231
17232 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
17233 if (VT == MVT::i64) {
17234 APInt NewMask = APInt(64, 0xffffffff);
17235 if (IsLegalMask(NewMask))
17236 return UseMask(NewMask);
17237 }
17238 }
17239
17240 // For the remaining optimizations, we need to be able to make a negative
17241 // number through a combination of mask and undemanded bits.
17242 if (!ExpandedMask.isNegative())
17243 return false;
17244
17245 // What is the fewest number of bits we need to represent the negative number.
17246 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
17247
17248 // Try to make a 12 bit negative immediate. If that fails try to make a 32
17249 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
17250 // If we can't create a simm12, we shouldn't change opaque constants.
17251 APInt NewMask = ShrunkMask;
17252 if (MinSignedBits <= 12)
17253 NewMask.setBitsFrom(11);
17254 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
17255 NewMask.setBitsFrom(31);
17256 else
17257 return false;
17258
17259 // Check that our new mask is a subset of the demanded mask.
17260 assert(IsLegalMask(NewMask));
17261 return UseMask(NewMask);
17262}
17263
17264static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
17265 static const uint64_t GREVMasks[] = {
17266 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
17267 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
17268
17269 for (unsigned Stage = 0; Stage != 6; ++Stage) {
17270 unsigned Shift = 1 << Stage;
17271 if (ShAmt & Shift) {
17272 uint64_t Mask = GREVMasks[Stage];
17273 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
17274 if (IsGORC)
17275 Res |= x;
17276 x = Res;
17277 }
17278 }
17279
17280 return x;
17281}
17282
17284 KnownBits &Known,
17285 const APInt &DemandedElts,
17286 const SelectionDAG &DAG,
17287 unsigned Depth) const {
17288 unsigned BitWidth = Known.getBitWidth();
17289 unsigned Opc = Op.getOpcode();
17290 assert((Opc >= ISD::BUILTIN_OP_END ||
17291 Opc == ISD::INTRINSIC_WO_CHAIN ||
17292 Opc == ISD::INTRINSIC_W_CHAIN ||
17293 Opc == ISD::INTRINSIC_VOID) &&
17294 "Should use MaskedValueIsZero if you don't know whether Op"
17295 " is a target node!");
17296
17297 Known.resetAll();
17298 switch (Opc) {
17299 default: break;
17300 case RISCVISD::SELECT_CC: {
17301 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
17302 // If we don't know any bits, early out.
17303 if (Known.isUnknown())
17304 break;
17305 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
17306
17307 // Only known if known in both the LHS and RHS.
17308 Known = Known.intersectWith(Known2);
17309 break;
17310 }
17313 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17314 // Result is either all zero or operand 0. We can propagate zeros, but not
17315 // ones.
17316 Known.One.clearAllBits();
17317 break;
17318 case RISCVISD::REMUW: {
17319 KnownBits Known2;
17320 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17321 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17322 // We only care about the lower 32 bits.
17323 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
17324 // Restore the original width by sign extending.
17325 Known = Known.sext(BitWidth);
17326 break;
17327 }
17328 case RISCVISD::DIVUW: {
17329 KnownBits Known2;
17330 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17331 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17332 // We only care about the lower 32 bits.
17333 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
17334 // Restore the original width by sign extending.
17335 Known = Known.sext(BitWidth);
17336 break;
17337 }
17338 case RISCVISD::SLLW: {
17339 KnownBits Known2;
17340 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
17341 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
17342 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
17343 // Restore the original width by sign extending.
17344 Known = Known.sext(BitWidth);
17345 break;
17346 }
17347 case RISCVISD::CTZW: {
17348 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17349 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
17350 unsigned LowBits = llvm::bit_width(PossibleTZ);
17351 Known.Zero.setBitsFrom(LowBits);
17352 break;
17353 }
17354 case RISCVISD::CLZW: {
17355 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17356 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
17357 unsigned LowBits = llvm::bit_width(PossibleLZ);
17358 Known.Zero.setBitsFrom(LowBits);
17359 break;
17360 }
17361 case RISCVISD::BREV8:
17362 case RISCVISD::ORC_B: {
17363 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
17364 // control value of 7 is equivalent to brev8 and orc.b.
17365 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
17366 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
17367 // To compute zeros, we need to invert the value and invert it back after.
17368 Known.Zero =
17369 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
17370 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
17371 break;
17372 }
17373 case RISCVISD::READ_VLENB: {
17374 // We can use the minimum and maximum VLEN values to bound VLENB. We
17375 // know VLEN must be a power of two.
17376 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
17377 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
17378 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
17379 Known.Zero.setLowBits(Log2_32(MinVLenB));
17380 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
17381 if (MaxVLenB == MinVLenB)
17382 Known.One.setBit(Log2_32(MinVLenB));
17383 break;
17384 }
17385 case RISCVISD::FCLASS: {
17386 // fclass will only set one of the low 10 bits.
17387 Known.Zero.setBitsFrom(10);
17388 break;
17389 }
17392 unsigned IntNo =
17393 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17394 switch (IntNo) {
17395 default:
17396 // We can't do anything for most intrinsics.
17397 break;
17398 case Intrinsic::riscv_vsetvli:
17399 case Intrinsic::riscv_vsetvlimax: {
17400 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17401 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17402 RISCVII::VLMUL VLMUL =
17403 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17404 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17405 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17406 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17407 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17408
17409 // Result of vsetvli must be not larger than AVL.
17410 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17411 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17412
17413 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17414 if (BitWidth > KnownZeroFirstBit)
17415 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17416 break;
17417 }
17418 }
17419 break;
17420 }
17421 }
17422}
17423
17425 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17426 unsigned Depth) const {
17427 switch (Op.getOpcode()) {
17428 default:
17429 break;
17430 case RISCVISD::SELECT_CC: {
17431 unsigned Tmp =
17432 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17433 if (Tmp == 1) return 1; // Early out.
17434 unsigned Tmp2 =
17435 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17436 return std::min(Tmp, Tmp2);
17437 }
17440 // Output is either all zero or operand 0. We can propagate sign bit count
17441 // from operand 0.
17442 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17443 case RISCVISD::ABSW: {
17444 // We expand this at isel to negw+max. The result will have 33 sign bits
17445 // if the input has at least 33 sign bits.
17446 unsigned Tmp =
17447 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17448 if (Tmp < 33) return 1;
17449 return 33;
17450 }
17451 case RISCVISD::SLLW:
17452 case RISCVISD::SRAW:
17453 case RISCVISD::SRLW:
17454 case RISCVISD::DIVW:
17455 case RISCVISD::DIVUW:
17456 case RISCVISD::REMUW:
17457 case RISCVISD::ROLW:
17458 case RISCVISD::RORW:
17463 // TODO: As the result is sign-extended, this is conservatively correct. A
17464 // more precise answer could be calculated for SRAW depending on known
17465 // bits in the shift amount.
17466 return 33;
17467 case RISCVISD::VMV_X_S: {
17468 // The number of sign bits of the scalar result is computed by obtaining the
17469 // element type of the input vector operand, subtracting its width from the
17470 // XLEN, and then adding one (sign bit within the element type). If the
17471 // element type is wider than XLen, the least-significant XLEN bits are
17472 // taken.
17473 unsigned XLen = Subtarget.getXLen();
17474 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17475 if (EltBits <= XLen)
17476 return XLen - EltBits + 1;
17477 break;
17478 }
17480 unsigned IntNo = Op.getConstantOperandVal(1);
17481 switch (IntNo) {
17482 default:
17483 break;
17484 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17485 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17486 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17487 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17488 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17489 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17490 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17491 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17492 case Intrinsic::riscv_masked_cmpxchg_i64:
17493 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17494 // narrow atomic operation. These are implemented using atomic
17495 // operations at the minimum supported atomicrmw/cmpxchg width whose
17496 // result is then sign extended to XLEN. With +A, the minimum width is
17497 // 32 for both 64 and 32.
17498 assert(Subtarget.getXLen() == 64);
17500 assert(Subtarget.hasStdExtA());
17501 return 33;
17502 }
17503 break;
17504 }
17505 }
17506
17507 return 1;
17508}
17509
17511 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17512 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17513
17514 // TODO: Add more target nodes.
17515 switch (Op.getOpcode()) {
17517 // Integer select_cc cannot create poison.
17518 // TODO: What are the FP poison semantics?
17519 // TODO: This instruction blocks poison from the unselected operand, can
17520 // we do anything with that?
17521 return !Op.getValueType().isInteger();
17522 }
17524 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17525}
17526
17527const Constant *
17529 assert(Ld && "Unexpected null LoadSDNode");
17530 if (!ISD::isNormalLoad(Ld))
17531 return nullptr;
17532
17533 SDValue Ptr = Ld->getBasePtr();
17534
17535 // Only constant pools with no offset are supported.
17536 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17537 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17538 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17539 CNode->getOffset() != 0)
17540 return nullptr;
17541
17542 return CNode;
17543 };
17544
17545 // Simple case, LLA.
17546 if (Ptr.getOpcode() == RISCVISD::LLA) {
17547 auto *CNode = GetSupportedConstantPool(Ptr);
17548 if (!CNode || CNode->getTargetFlags() != 0)
17549 return nullptr;
17550
17551 return CNode->getConstVal();
17552 }
17553
17554 // Look for a HI and ADD_LO pair.
17555 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17556 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17557 return nullptr;
17558
17559 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17560 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17561
17562 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17563 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17564 return nullptr;
17565
17566 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17567 return nullptr;
17568
17569 return CNodeLo->getConstVal();
17570}
17571
17573 MachineBasicBlock *BB) {
17574 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17575
17576 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17577 // Should the count have wrapped while it was being read, we need to try
17578 // again.
17579 // For example:
17580 // ```
17581 // read:
17582 // csrrs x3, counterh # load high word of counter
17583 // csrrs x2, counter # load low word of counter
17584 // csrrs x4, counterh # load high word of counter
17585 // bne x3, x4, read # check if high word reads match, otherwise try again
17586 // ```
17587
17588 MachineFunction &MF = *BB->getParent();
17589 const BasicBlock *LLVMBB = BB->getBasicBlock();
17591
17592 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17593 MF.insert(It, LoopMBB);
17594
17595 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17596 MF.insert(It, DoneMBB);
17597
17598 // Transfer the remainder of BB and its successor edges to DoneMBB.
17599 DoneMBB->splice(DoneMBB->begin(), BB,
17600 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17602
17603 BB->addSuccessor(LoopMBB);
17604
17606 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17607 Register LoReg = MI.getOperand(0).getReg();
17608 Register HiReg = MI.getOperand(1).getReg();
17609 int64_t LoCounter = MI.getOperand(2).getImm();
17610 int64_t HiCounter = MI.getOperand(3).getImm();
17611 DebugLoc DL = MI.getDebugLoc();
17612
17614 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17615 .addImm(HiCounter)
17616 .addReg(RISCV::X0);
17617 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17618 .addImm(LoCounter)
17619 .addReg(RISCV::X0);
17620 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17621 .addImm(HiCounter)
17622 .addReg(RISCV::X0);
17623
17624 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17625 .addReg(HiReg)
17626 .addReg(ReadAgainReg)
17627 .addMBB(LoopMBB);
17628
17629 LoopMBB->addSuccessor(LoopMBB);
17630 LoopMBB->addSuccessor(DoneMBB);
17631
17632 MI.eraseFromParent();
17633
17634 return DoneMBB;
17635}
17636
17639 const RISCVSubtarget &Subtarget) {
17640 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17641
17642 MachineFunction &MF = *BB->getParent();
17643 DebugLoc DL = MI.getDebugLoc();
17646 Register LoReg = MI.getOperand(0).getReg();
17647 Register HiReg = MI.getOperand(1).getReg();
17648 Register SrcReg = MI.getOperand(2).getReg();
17649
17650 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17651 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17652
17653 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17654 RI, Register());
17656 MachineMemOperand *MMOLo =
17660 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17661 .addFrameIndex(FI)
17662 .addImm(0)
17663 .addMemOperand(MMOLo);
17664 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17665 .addFrameIndex(FI)
17666 .addImm(4)
17667 .addMemOperand(MMOHi);
17668 MI.eraseFromParent(); // The pseudo instruction is gone now.
17669 return BB;
17670}
17671
17674 const RISCVSubtarget &Subtarget) {
17675 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17676 "Unexpected instruction");
17677
17678 MachineFunction &MF = *BB->getParent();
17679 DebugLoc DL = MI.getDebugLoc();
17682 Register DstReg = MI.getOperand(0).getReg();
17683 Register LoReg = MI.getOperand(1).getReg();
17684 Register HiReg = MI.getOperand(2).getReg();
17685
17686 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17687 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17688
17690 MachineMemOperand *MMOLo =
17694 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17695 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17696 .addFrameIndex(FI)
17697 .addImm(0)
17698 .addMemOperand(MMOLo);
17699 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17700 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17701 .addFrameIndex(FI)
17702 .addImm(4)
17703 .addMemOperand(MMOHi);
17704 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17705 MI.eraseFromParent(); // The pseudo instruction is gone now.
17706 return BB;
17707}
17708
17710 switch (MI.getOpcode()) {
17711 default:
17712 return false;
17713 case RISCV::Select_GPR_Using_CC_GPR:
17714 case RISCV::Select_GPR_Using_CC_Imm:
17715 case RISCV::Select_FPR16_Using_CC_GPR:
17716 case RISCV::Select_FPR16INX_Using_CC_GPR:
17717 case RISCV::Select_FPR32_Using_CC_GPR:
17718 case RISCV::Select_FPR32INX_Using_CC_GPR:
17719 case RISCV::Select_FPR64_Using_CC_GPR:
17720 case RISCV::Select_FPR64INX_Using_CC_GPR:
17721 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17722 return true;
17723 }
17724}
17725
17727 unsigned RelOpcode, unsigned EqOpcode,
17728 const RISCVSubtarget &Subtarget) {
17729 DebugLoc DL = MI.getDebugLoc();
17730 Register DstReg = MI.getOperand(0).getReg();
17731 Register Src1Reg = MI.getOperand(1).getReg();
17732 Register Src2Reg = MI.getOperand(2).getReg();
17734 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17736
17737 // Save the current FFLAGS.
17738 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17739
17740 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17741 .addReg(Src1Reg)
17742 .addReg(Src2Reg);
17745
17746 // Restore the FFLAGS.
17747 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17748 .addReg(SavedFFlags, RegState::Kill);
17749
17750 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17751 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17752 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17753 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17756
17757 // Erase the pseudoinstruction.
17758 MI.eraseFromParent();
17759 return BB;
17760}
17761
17762static MachineBasicBlock *
17764 MachineBasicBlock *ThisMBB,
17765 const RISCVSubtarget &Subtarget) {
17766 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17767 // Without this, custom-inserter would have generated:
17768 //
17769 // A
17770 // | \
17771 // | B
17772 // | /
17773 // C
17774 // | \
17775 // | D
17776 // | /
17777 // E
17778 //
17779 // A: X = ...; Y = ...
17780 // B: empty
17781 // C: Z = PHI [X, A], [Y, B]
17782 // D: empty
17783 // E: PHI [X, C], [Z, D]
17784 //
17785 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17786 //
17787 // A
17788 // | \
17789 // | C
17790 // | /|
17791 // |/ |
17792 // | |
17793 // | D
17794 // | /
17795 // E
17796 //
17797 // A: X = ...; Y = ...
17798 // D: empty
17799 // E: PHI [X, A], [X, C], [Y, D]
17800
17801 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17802 const DebugLoc &DL = First.getDebugLoc();
17803 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17804 MachineFunction *F = ThisMBB->getParent();
17805 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17806 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17807 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17808 MachineFunction::iterator It = ++ThisMBB->getIterator();
17809 F->insert(It, FirstMBB);
17810 F->insert(It, SecondMBB);
17811 F->insert(It, SinkMBB);
17812
17813 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17814 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17816 ThisMBB->end());
17817 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17818
17819 // Fallthrough block for ThisMBB.
17820 ThisMBB->addSuccessor(FirstMBB);
17821 // Fallthrough block for FirstMBB.
17822 FirstMBB->addSuccessor(SecondMBB);
17823 ThisMBB->addSuccessor(SinkMBB);
17824 FirstMBB->addSuccessor(SinkMBB);
17825 // This is fallthrough.
17826 SecondMBB->addSuccessor(SinkMBB);
17827
17828 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17829 Register FLHS = First.getOperand(1).getReg();
17830 Register FRHS = First.getOperand(2).getReg();
17831 // Insert appropriate branch.
17832 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17833 .addReg(FLHS)
17834 .addReg(FRHS)
17835 .addMBB(SinkMBB);
17836
17837 Register SLHS = Second.getOperand(1).getReg();
17838 Register SRHS = Second.getOperand(2).getReg();
17839 Register Op1Reg4 = First.getOperand(4).getReg();
17840 Register Op1Reg5 = First.getOperand(5).getReg();
17841
17842 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17843 // Insert appropriate branch.
17844 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17845 .addReg(SLHS)
17846 .addReg(SRHS)
17847 .addMBB(SinkMBB);
17848
17849 Register DestReg = Second.getOperand(0).getReg();
17850 Register Op2Reg4 = Second.getOperand(4).getReg();
17851 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17852 .addReg(Op2Reg4)
17853 .addMBB(ThisMBB)
17854 .addReg(Op1Reg4)
17855 .addMBB(FirstMBB)
17856 .addReg(Op1Reg5)
17857 .addMBB(SecondMBB);
17858
17859 // Now remove the Select_FPRX_s.
17860 First.eraseFromParent();
17861 Second.eraseFromParent();
17862 return SinkMBB;
17863}
17864
17867 const RISCVSubtarget &Subtarget) {
17868 // To "insert" Select_* instructions, we actually have to insert the triangle
17869 // control-flow pattern. The incoming instructions know the destination vreg
17870 // to set, the condition code register to branch on, the true/false values to
17871 // select between, and the condcode to use to select the appropriate branch.
17872 //
17873 // We produce the following control flow:
17874 // HeadMBB
17875 // | \
17876 // | IfFalseMBB
17877 // | /
17878 // TailMBB
17879 //
17880 // When we find a sequence of selects we attempt to optimize their emission
17881 // by sharing the control flow. Currently we only handle cases where we have
17882 // multiple selects with the exact same condition (same LHS, RHS and CC).
17883 // The selects may be interleaved with other instructions if the other
17884 // instructions meet some requirements we deem safe:
17885 // - They are not pseudo instructions.
17886 // - They are debug instructions. Otherwise,
17887 // - They do not have side-effects, do not access memory and their inputs do
17888 // not depend on the results of the select pseudo-instructions.
17889 // The TrueV/FalseV operands of the selects cannot depend on the result of
17890 // previous selects in the sequence.
17891 // These conditions could be further relaxed. See the X86 target for a
17892 // related approach and more information.
17893 //
17894 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17895 // is checked here and handled by a separate function -
17896 // EmitLoweredCascadedSelect.
17897 Register LHS = MI.getOperand(1).getReg();
17898 Register RHS;
17899 if (MI.getOperand(2).isReg())
17900 RHS = MI.getOperand(2).getReg();
17901 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17902
17903 SmallVector<MachineInstr *, 4> SelectDebugValues;
17904 SmallSet<Register, 4> SelectDests;
17905 SelectDests.insert(MI.getOperand(0).getReg());
17906
17907 MachineInstr *LastSelectPseudo = &MI;
17908 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17909 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
17910 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
17911 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
17912 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17913 Next->getOperand(5).isKill()) {
17914 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17915 }
17916
17917 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17918 SequenceMBBI != E; ++SequenceMBBI) {
17919 if (SequenceMBBI->isDebugInstr())
17920 continue;
17921 if (isSelectPseudo(*SequenceMBBI)) {
17922 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17923 !SequenceMBBI->getOperand(2).isReg() ||
17924 SequenceMBBI->getOperand(2).getReg() != RHS ||
17925 SequenceMBBI->getOperand(3).getImm() != CC ||
17926 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17927 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17928 break;
17929 LastSelectPseudo = &*SequenceMBBI;
17930 SequenceMBBI->collectDebugValues(SelectDebugValues);
17931 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17932 continue;
17933 }
17934 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17935 SequenceMBBI->mayLoadOrStore() ||
17936 SequenceMBBI->usesCustomInsertionHook())
17937 break;
17938 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17939 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17940 }))
17941 break;
17942 }
17943
17944 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17945 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17946 DebugLoc DL = MI.getDebugLoc();
17948
17949 MachineBasicBlock *HeadMBB = BB;
17950 MachineFunction *F = BB->getParent();
17951 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17952 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17953
17954 F->insert(I, IfFalseMBB);
17955 F->insert(I, TailMBB);
17956
17957 // Transfer debug instructions associated with the selects to TailMBB.
17958 for (MachineInstr *DebugInstr : SelectDebugValues) {
17959 TailMBB->push_back(DebugInstr->removeFromParent());
17960 }
17961
17962 // Move all instructions after the sequence to TailMBB.
17963 TailMBB->splice(TailMBB->end(), HeadMBB,
17964 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17965 // Update machine-CFG edges by transferring all successors of the current
17966 // block to the new block which will contain the Phi nodes for the selects.
17967 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17968 // Set the successors for HeadMBB.
17969 HeadMBB->addSuccessor(IfFalseMBB);
17970 HeadMBB->addSuccessor(TailMBB);
17971
17972 // Insert appropriate branch.
17973 if (MI.getOperand(2).isImm())
17974 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
17975 .addReg(LHS)
17976 .addImm(MI.getOperand(2).getImm())
17977 .addMBB(TailMBB);
17978 else
17979 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17980 .addReg(LHS)
17981 .addReg(RHS)
17982 .addMBB(TailMBB);
17983
17984 // IfFalseMBB just falls through to TailMBB.
17985 IfFalseMBB->addSuccessor(TailMBB);
17986
17987 // Create PHIs for all of the select pseudo-instructions.
17988 auto SelectMBBI = MI.getIterator();
17989 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17990 auto InsertionPoint = TailMBB->begin();
17991 while (SelectMBBI != SelectEnd) {
17992 auto Next = std::next(SelectMBBI);
17993 if (isSelectPseudo(*SelectMBBI)) {
17994 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17995 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17996 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17997 .addReg(SelectMBBI->getOperand(4).getReg())
17998 .addMBB(HeadMBB)
17999 .addReg(SelectMBBI->getOperand(5).getReg())
18000 .addMBB(IfFalseMBB);
18001 SelectMBBI->eraseFromParent();
18002 }
18003 SelectMBBI = Next;
18004 }
18005
18006 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
18007 return TailMBB;
18008}
18009
18010// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
18011static const RISCV::RISCVMaskedPseudoInfo *
18012lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
18014 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
18015 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
18016 const RISCV::RISCVMaskedPseudoInfo *Masked =
18017 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
18018 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
18019 return Masked;
18020}
18021
18024 unsigned CVTXOpc) {
18025 DebugLoc DL = MI.getDebugLoc();
18026
18028
18030 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18031
18032 // Save the old value of FFLAGS.
18033 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
18034
18035 assert(MI.getNumOperands() == 7);
18036
18037 // Emit a VFCVT_X_F
18038 const TargetRegisterInfo *TRI =
18040 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
18041 Register Tmp = MRI.createVirtualRegister(RC);
18042 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
18043 .add(MI.getOperand(1))
18044 .add(MI.getOperand(2))
18045 .add(MI.getOperand(3))
18046 .add(MachineOperand::CreateImm(7)) // frm = DYN
18047 .add(MI.getOperand(4))
18048 .add(MI.getOperand(5))
18049 .add(MI.getOperand(6))
18050 .add(MachineOperand::CreateReg(RISCV::FRM,
18051 /*IsDef*/ false,
18052 /*IsImp*/ true));
18053
18054 // Emit a VFCVT_F_X
18055 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
18056 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
18057 // There is no E8 variant for VFCVT_F_X.
18058 assert(Log2SEW >= 4);
18059 unsigned CVTFOpc =
18060 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
18061 ->MaskedPseudo;
18062
18063 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
18064 .add(MI.getOperand(0))
18065 .add(MI.getOperand(1))
18066 .addReg(Tmp)
18067 .add(MI.getOperand(3))
18068 .add(MachineOperand::CreateImm(7)) // frm = DYN
18069 .add(MI.getOperand(4))
18070 .add(MI.getOperand(5))
18071 .add(MI.getOperand(6))
18072 .add(MachineOperand::CreateReg(RISCV::FRM,
18073 /*IsDef*/ false,
18074 /*IsImp*/ true));
18075
18076 // Restore FFLAGS.
18077 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
18078 .addReg(SavedFFLAGS, RegState::Kill);
18079
18080 // Erase the pseudoinstruction.
18081 MI.eraseFromParent();
18082 return BB;
18083}
18084
18086 const RISCVSubtarget &Subtarget) {
18087 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
18088 const TargetRegisterClass *RC;
18089 switch (MI.getOpcode()) {
18090 default:
18091 llvm_unreachable("Unexpected opcode");
18092 case RISCV::PseudoFROUND_H:
18093 CmpOpc = RISCV::FLT_H;
18094 F2IOpc = RISCV::FCVT_W_H;
18095 I2FOpc = RISCV::FCVT_H_W;
18096 FSGNJOpc = RISCV::FSGNJ_H;
18097 FSGNJXOpc = RISCV::FSGNJX_H;
18098 RC = &RISCV::FPR16RegClass;
18099 break;
18100 case RISCV::PseudoFROUND_H_INX:
18101 CmpOpc = RISCV::FLT_H_INX;
18102 F2IOpc = RISCV::FCVT_W_H_INX;
18103 I2FOpc = RISCV::FCVT_H_W_INX;
18104 FSGNJOpc = RISCV::FSGNJ_H_INX;
18105 FSGNJXOpc = RISCV::FSGNJX_H_INX;
18106 RC = &RISCV::GPRF16RegClass;
18107 break;
18108 case RISCV::PseudoFROUND_S:
18109 CmpOpc = RISCV::FLT_S;
18110 F2IOpc = RISCV::FCVT_W_S;
18111 I2FOpc = RISCV::FCVT_S_W;
18112 FSGNJOpc = RISCV::FSGNJ_S;
18113 FSGNJXOpc = RISCV::FSGNJX_S;
18114 RC = &RISCV::FPR32RegClass;
18115 break;
18116 case RISCV::PseudoFROUND_S_INX:
18117 CmpOpc = RISCV::FLT_S_INX;
18118 F2IOpc = RISCV::FCVT_W_S_INX;
18119 I2FOpc = RISCV::FCVT_S_W_INX;
18120 FSGNJOpc = RISCV::FSGNJ_S_INX;
18121 FSGNJXOpc = RISCV::FSGNJX_S_INX;
18122 RC = &RISCV::GPRF32RegClass;
18123 break;
18124 case RISCV::PseudoFROUND_D:
18125 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18126 CmpOpc = RISCV::FLT_D;
18127 F2IOpc = RISCV::FCVT_L_D;
18128 I2FOpc = RISCV::FCVT_D_L;
18129 FSGNJOpc = RISCV::FSGNJ_D;
18130 FSGNJXOpc = RISCV::FSGNJX_D;
18131 RC = &RISCV::FPR64RegClass;
18132 break;
18133 case RISCV::PseudoFROUND_D_INX:
18134 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
18135 CmpOpc = RISCV::FLT_D_INX;
18136 F2IOpc = RISCV::FCVT_L_D_INX;
18137 I2FOpc = RISCV::FCVT_D_L_INX;
18138 FSGNJOpc = RISCV::FSGNJ_D_INX;
18139 FSGNJXOpc = RISCV::FSGNJX_D_INX;
18140 RC = &RISCV::GPRRegClass;
18141 break;
18142 }
18143
18144 const BasicBlock *BB = MBB->getBasicBlock();
18145 DebugLoc DL = MI.getDebugLoc();
18147
18149 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
18150 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
18151
18152 F->insert(I, CvtMBB);
18153 F->insert(I, DoneMBB);
18154 // Move all instructions after the sequence to DoneMBB.
18155 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
18156 MBB->end());
18157 // Update machine-CFG edges by transferring all successors of the current
18158 // block to the new block which will contain the Phi nodes for the selects.
18160 // Set the successors for MBB.
18161 MBB->addSuccessor(CvtMBB);
18162 MBB->addSuccessor(DoneMBB);
18163
18164 Register DstReg = MI.getOperand(0).getReg();
18165 Register SrcReg = MI.getOperand(1).getReg();
18166 Register MaxReg = MI.getOperand(2).getReg();
18167 int64_t FRM = MI.getOperand(3).getImm();
18168
18169 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
18171
18172 Register FabsReg = MRI.createVirtualRegister(RC);
18173 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
18174
18175 // Compare the FP value to the max value.
18176 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18177 auto MIB =
18178 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
18181
18182 // Insert branch.
18183 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
18184 .addReg(CmpReg)
18185 .addReg(RISCV::X0)
18186 .addMBB(DoneMBB);
18187
18188 CvtMBB->addSuccessor(DoneMBB);
18189
18190 // Convert to integer.
18191 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
18192 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
18195
18196 // Convert back to FP.
18197 Register I2FReg = MRI.createVirtualRegister(RC);
18198 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
18201
18202 // Restore the sign bit.
18203 Register CvtReg = MRI.createVirtualRegister(RC);
18204 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
18205
18206 // Merge the results.
18207 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
18208 .addReg(SrcReg)
18209 .addMBB(MBB)
18210 .addReg(CvtReg)
18211 .addMBB(CvtMBB);
18212
18213 MI.eraseFromParent();
18214 return DoneMBB;
18215}
18216
18219 MachineBasicBlock *BB) const {
18220 switch (MI.getOpcode()) {
18221 default:
18222 llvm_unreachable("Unexpected instr type to insert");
18223 case RISCV::ReadCounterWide:
18224 assert(!Subtarget.is64Bit() &&
18225 "ReadCounterWide is only to be used on riscv32");
18226 return emitReadCounterWidePseudo(MI, BB);
18227 case RISCV::Select_GPR_Using_CC_GPR:
18228 case RISCV::Select_GPR_Using_CC_Imm:
18229 case RISCV::Select_FPR16_Using_CC_GPR:
18230 case RISCV::Select_FPR16INX_Using_CC_GPR:
18231 case RISCV::Select_FPR32_Using_CC_GPR:
18232 case RISCV::Select_FPR32INX_Using_CC_GPR:
18233 case RISCV::Select_FPR64_Using_CC_GPR:
18234 case RISCV::Select_FPR64INX_Using_CC_GPR:
18235 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
18236 return emitSelectPseudo(MI, BB, Subtarget);
18237 case RISCV::BuildPairF64Pseudo:
18238 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
18239 case RISCV::SplitF64Pseudo:
18240 return emitSplitF64Pseudo(MI, BB, Subtarget);
18241 case RISCV::PseudoQuietFLE_H:
18242 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
18243 case RISCV::PseudoQuietFLE_H_INX:
18244 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
18245 case RISCV::PseudoQuietFLT_H:
18246 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
18247 case RISCV::PseudoQuietFLT_H_INX:
18248 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
18249 case RISCV::PseudoQuietFLE_S:
18250 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
18251 case RISCV::PseudoQuietFLE_S_INX:
18252 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
18253 case RISCV::PseudoQuietFLT_S:
18254 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
18255 case RISCV::PseudoQuietFLT_S_INX:
18256 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
18257 case RISCV::PseudoQuietFLE_D:
18258 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
18259 case RISCV::PseudoQuietFLE_D_INX:
18260 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
18261 case RISCV::PseudoQuietFLE_D_IN32X:
18262 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
18263 Subtarget);
18264 case RISCV::PseudoQuietFLT_D:
18265 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
18266 case RISCV::PseudoQuietFLT_D_INX:
18267 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
18268 case RISCV::PseudoQuietFLT_D_IN32X:
18269 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
18270 Subtarget);
18271
18272 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
18273 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
18274 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
18275 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
18276 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
18277 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
18278 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
18279 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
18280 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
18281 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
18282 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
18283 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
18284 case RISCV::PseudoFROUND_H:
18285 case RISCV::PseudoFROUND_H_INX:
18286 case RISCV::PseudoFROUND_S:
18287 case RISCV::PseudoFROUND_S_INX:
18288 case RISCV::PseudoFROUND_D:
18289 case RISCV::PseudoFROUND_D_INX:
18290 case RISCV::PseudoFROUND_D_IN32X:
18291 return emitFROUND(MI, BB, Subtarget);
18292 case TargetOpcode::STATEPOINT:
18293 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
18294 // while jal call instruction (where statepoint will be lowered at the end)
18295 // has implicit def. This def is early-clobber as it will be set at
18296 // the moment of the call and earlier than any use is read.
18297 // Add this implicit dead def here as a workaround.
18298 MI.addOperand(*MI.getMF(),
18300 RISCV::X1, /*isDef*/ true,
18301 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
18302 /*isUndef*/ false, /*isEarlyClobber*/ true));
18303 [[fallthrough]];
18304 case TargetOpcode::STACKMAP:
18305 case TargetOpcode::PATCHPOINT:
18306 if (!Subtarget.is64Bit())
18307 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
18308 "supported on 64-bit targets");
18309 return emitPatchPoint(MI, BB);
18310 }
18311}
18312
18314 SDNode *Node) const {
18315 // Add FRM dependency to any instructions with dynamic rounding mode.
18316 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
18317 if (Idx < 0) {
18318 // Vector pseudos have FRM index indicated by TSFlags.
18319 Idx = RISCVII::getFRMOpNum(MI.getDesc());
18320 if (Idx < 0)
18321 return;
18322 }
18323 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
18324 return;
18325 // If the instruction already reads FRM, don't add another read.
18326 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
18327 return;
18328 MI.addOperand(
18329 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
18330}
18331
18332// Calling Convention Implementation.
18333// The expectations for frontend ABI lowering vary from target to target.
18334// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
18335// details, but this is a longer term goal. For now, we simply try to keep the
18336// role of the frontend as simple and well-defined as possible. The rules can
18337// be summarised as:
18338// * Never split up large scalar arguments. We handle them here.
18339// * If a hardfloat calling convention is being used, and the struct may be
18340// passed in a pair of registers (fp+fp, int+fp), and both registers are
18341// available, then pass as two separate arguments. If either the GPRs or FPRs
18342// are exhausted, then pass according to the rule below.
18343// * If a struct could never be passed in registers or directly in a stack
18344// slot (as it is larger than 2*XLEN and the floating point rules don't
18345// apply), then pass it using a pointer with the byval attribute.
18346// * If a struct is less than 2*XLEN, then coerce to either a two-element
18347// word-sized array or a 2*XLEN scalar (depending on alignment).
18348// * The frontend can determine whether a struct is returned by reference or
18349// not based on its size and fields. If it will be returned by reference, the
18350// frontend must modify the prototype so a pointer with the sret annotation is
18351// passed as the first argument. This is not necessary for large scalar
18352// returns.
18353// * Struct return values and varargs should be coerced to structs containing
18354// register-size fields in the same situations they would be for fixed
18355// arguments.
18356
18357static const MCPhysReg ArgFPR16s[] = {
18358 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
18359 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
18360};
18361static const MCPhysReg ArgFPR32s[] = {
18362 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
18363 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
18364};
18365static const MCPhysReg ArgFPR64s[] = {
18366 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
18367 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
18368};
18369// This is an interim calling convention and it may be changed in the future.
18370static const MCPhysReg ArgVRs[] = {
18371 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
18372 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
18373 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
18374static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
18375 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
18376 RISCV::V20M2, RISCV::V22M2};
18377static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
18378 RISCV::V20M4};
18379static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
18380
18382 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
18383 // the ILP32E ABI.
18384 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18385 RISCV::X13, RISCV::X14, RISCV::X15,
18386 RISCV::X16, RISCV::X17};
18387 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
18388 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18389 RISCV::X13, RISCV::X14, RISCV::X15};
18390
18391 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18392 return ArrayRef(ArgEGPRs);
18393
18394 return ArrayRef(ArgIGPRs);
18395}
18396
18398 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
18399 // for save-restore libcall, so we don't use them.
18400 static const MCPhysReg FastCCIGPRs[] = {
18401 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
18402 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
18403 RISCV::X29, RISCV::X30, RISCV::X31};
18404
18405 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
18406 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
18407 RISCV::X13, RISCV::X14, RISCV::X15,
18408 RISCV::X7};
18409
18410 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
18411 return ArrayRef(FastCCEGPRs);
18412
18413 return ArrayRef(FastCCIGPRs);
18414}
18415
18416// Pass a 2*XLEN argument that has been split into two XLEN values through
18417// registers or the stack as necessary.
18418static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
18419 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
18420 MVT ValVT2, MVT LocVT2,
18421 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
18422 unsigned XLenInBytes = XLen / 8;
18423 const RISCVSubtarget &STI =
18426
18427 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18428 // At least one half can be passed via register.
18429 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18430 VA1.getLocVT(), CCValAssign::Full));
18431 } else {
18432 // Both halves must be passed on the stack, with proper alignment.
18433 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18434 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18435 Align StackAlign(XLenInBytes);
18436 if (!EABI || XLen != 32)
18437 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18438 State.addLoc(
18440 State.AllocateStack(XLenInBytes, StackAlign),
18441 VA1.getLocVT(), CCValAssign::Full));
18443 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18444 LocVT2, CCValAssign::Full));
18445 return false;
18446 }
18447
18448 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18449 // The second half can also be passed via register.
18450 State.addLoc(
18451 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18452 } else {
18453 // The second half is passed via the stack, without additional alignment.
18455 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18456 LocVT2, CCValAssign::Full));
18457 }
18458
18459 return false;
18460}
18461
18462// Implements the RISC-V calling convention. Returns true upon failure.
18463bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18464 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18465 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18466 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18467 RVVArgDispatcher &RVVDispatcher) {
18468 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18469 assert(XLen == 32 || XLen == 64);
18470 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18471
18472 // Static chain parameter must not be passed in normal argument registers,
18473 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18474 if (ArgFlags.isNest()) {
18475 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18476 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18477 return false;
18478 }
18479 }
18480
18481 // Any return value split in to more than two values can't be returned
18482 // directly. Vectors are returned via the available vector registers.
18483 if (!LocVT.isVector() && IsRet && ValNo > 1)
18484 return true;
18485
18486 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18487 // variadic argument, or if no F16/F32 argument registers are available.
18488 bool UseGPRForF16_F32 = true;
18489 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18490 // variadic argument, or if no F64 argument registers are available.
18491 bool UseGPRForF64 = true;
18492
18493 switch (ABI) {
18494 default:
18495 llvm_unreachable("Unexpected ABI");
18498 case RISCVABI::ABI_LP64:
18500 break;
18503 UseGPRForF16_F32 = !IsFixed;
18504 break;
18507 UseGPRForF16_F32 = !IsFixed;
18508 UseGPRForF64 = !IsFixed;
18509 break;
18510 }
18511
18512 // FPR16, FPR32, and FPR64 alias each other.
18513 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18514 UseGPRForF16_F32 = true;
18515 UseGPRForF64 = true;
18516 }
18517
18518 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18519 // similar local variables rather than directly checking against the target
18520 // ABI.
18521
18522 if (UseGPRForF16_F32 &&
18523 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18524 LocVT = XLenVT;
18525 LocInfo = CCValAssign::BCvt;
18526 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18527 LocVT = MVT::i64;
18528 LocInfo = CCValAssign::BCvt;
18529 }
18530
18532
18533 // If this is a variadic argument, the RISC-V calling convention requires
18534 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18535 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18536 // be used regardless of whether the original argument was split during
18537 // legalisation or not. The argument will not be passed by registers if the
18538 // original type is larger than 2*XLEN, so the register alignment rule does
18539 // not apply.
18540 // TODO: To be compatible with GCC's behaviors, we don't align registers
18541 // currently if we are using ILP32E calling convention. This behavior may be
18542 // changed when RV32E/ILP32E is ratified.
18543 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18544 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18545 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18546 ABI != RISCVABI::ABI_ILP32E) {
18547 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18548 // Skip 'odd' register if necessary.
18549 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18550 State.AllocateReg(ArgGPRs);
18551 }
18552
18553 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18554 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18555 State.getPendingArgFlags();
18556
18557 assert(PendingLocs.size() == PendingArgFlags.size() &&
18558 "PendingLocs and PendingArgFlags out of sync");
18559
18560 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18561 // registers are exhausted.
18562 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18563 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18564 // Depending on available argument GPRS, f64 may be passed in a pair of
18565 // GPRs, split between a GPR and the stack, or passed completely on the
18566 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18567 // cases.
18568 Register Reg = State.AllocateReg(ArgGPRs);
18569 if (!Reg) {
18570 unsigned StackOffset = State.AllocateStack(8, Align(8));
18571 State.addLoc(
18572 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18573 return false;
18574 }
18575 LocVT = MVT::i32;
18576 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18577 Register HiReg = State.AllocateReg(ArgGPRs);
18578 if (HiReg) {
18579 State.addLoc(
18580 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18581 } else {
18582 unsigned StackOffset = State.AllocateStack(4, Align(4));
18583 State.addLoc(
18584 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18585 }
18586 return false;
18587 }
18588
18589 // Fixed-length vectors are located in the corresponding scalable-vector
18590 // container types.
18591 if (ValVT.isFixedLengthVector())
18592 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18593
18594 // Split arguments might be passed indirectly, so keep track of the pending
18595 // values. Split vectors are passed via a mix of registers and indirectly, so
18596 // treat them as we would any other argument.
18597 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18598 LocVT = XLenVT;
18599 LocInfo = CCValAssign::Indirect;
18600 PendingLocs.push_back(
18601 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18602 PendingArgFlags.push_back(ArgFlags);
18603 if (!ArgFlags.isSplitEnd()) {
18604 return false;
18605 }
18606 }
18607
18608 // If the split argument only had two elements, it should be passed directly
18609 // in registers or on the stack.
18610 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18611 PendingLocs.size() <= 2) {
18612 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18613 // Apply the normal calling convention rules to the first half of the
18614 // split argument.
18615 CCValAssign VA = PendingLocs[0];
18616 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18617 PendingLocs.clear();
18618 PendingArgFlags.clear();
18619 return CC_RISCVAssign2XLen(
18620 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18621 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18622 }
18623
18624 // Allocate to a register if possible, or else a stack slot.
18625 Register Reg;
18626 unsigned StoreSizeBytes = XLen / 8;
18627 Align StackAlign = Align(XLen / 8);
18628
18629 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18630 Reg = State.AllocateReg(ArgFPR16s);
18631 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18632 Reg = State.AllocateReg(ArgFPR32s);
18633 else if (ValVT == MVT::f64 && !UseGPRForF64)
18634 Reg = State.AllocateReg(ArgFPR64s);
18635 else if (ValVT.isVector()) {
18636 Reg = RVVDispatcher.getNextPhysReg();
18637 if (!Reg) {
18638 // For return values, the vector must be passed fully via registers or
18639 // via the stack.
18640 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18641 // but we're using all of them.
18642 if (IsRet)
18643 return true;
18644 // Try using a GPR to pass the address
18645 if ((Reg = State.AllocateReg(ArgGPRs))) {
18646 LocVT = XLenVT;
18647 LocInfo = CCValAssign::Indirect;
18648 } else if (ValVT.isScalableVector()) {
18649 LocVT = XLenVT;
18650 LocInfo = CCValAssign::Indirect;
18651 } else {
18652 // Pass fixed-length vectors on the stack.
18653 LocVT = ValVT;
18654 StoreSizeBytes = ValVT.getStoreSize();
18655 // Align vectors to their element sizes, being careful for vXi1
18656 // vectors.
18657 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18658 }
18659 }
18660 } else {
18661 Reg = State.AllocateReg(ArgGPRs);
18662 }
18663
18664 unsigned StackOffset =
18665 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18666
18667 // If we reach this point and PendingLocs is non-empty, we must be at the
18668 // end of a split argument that must be passed indirectly.
18669 if (!PendingLocs.empty()) {
18670 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18671 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18672
18673 for (auto &It : PendingLocs) {
18674 if (Reg)
18675 It.convertToReg(Reg);
18676 else
18677 It.convertToMem(StackOffset);
18678 State.addLoc(It);
18679 }
18680 PendingLocs.clear();
18681 PendingArgFlags.clear();
18682 return false;
18683 }
18684
18685 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18686 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18687 "Expected an XLenVT or vector types at this stage");
18688
18689 if (Reg) {
18690 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18691 return false;
18692 }
18693
18694 // When a scalar floating-point value is passed on the stack, no
18695 // bit-conversion is needed.
18696 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18697 assert(!ValVT.isVector());
18698 LocVT = ValVT;
18699 LocInfo = CCValAssign::Full;
18700 }
18701 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18702 return false;
18703}
18704
18705template <typename ArgTy>
18706static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18707 for (const auto &ArgIdx : enumerate(Args)) {
18708 MVT ArgVT = ArgIdx.value().VT;
18709 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18710 return ArgIdx.index();
18711 }
18712 return std::nullopt;
18713}
18714
18715void RISCVTargetLowering::analyzeInputArgs(
18716 MachineFunction &MF, CCState &CCInfo,
18717 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18718 RISCVCCAssignFn Fn) const {
18719 unsigned NumArgs = Ins.size();
18721
18722 RVVArgDispatcher Dispatcher;
18723 if (IsRet) {
18724 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(Ins)};
18725 } else {
18726 SmallVector<Type *, 4> TypeList;
18727 for (const Argument &Arg : MF.getFunction().args())
18728 TypeList.push_back(Arg.getType());
18729 Dispatcher = RVVArgDispatcher{&MF, this, ArrayRef(TypeList)};
18730 }
18731
18732 for (unsigned i = 0; i != NumArgs; ++i) {
18733 MVT ArgVT = Ins[i].VT;
18734 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18735
18736 Type *ArgTy = nullptr;
18737 if (IsRet)
18738 ArgTy = FType->getReturnType();
18739 else if (Ins[i].isOrigArg())
18740 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18741
18743 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18744 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18745 Dispatcher)) {
18746 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18747 << ArgVT << '\n');
18748 llvm_unreachable(nullptr);
18749 }
18750 }
18751}
18752
18753void RISCVTargetLowering::analyzeOutputArgs(
18754 MachineFunction &MF, CCState &CCInfo,
18755 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18756 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18757 unsigned NumArgs = Outs.size();
18758
18759 SmallVector<Type *, 4> TypeList;
18760 if (IsRet)
18761 TypeList.push_back(MF.getFunction().getReturnType());
18762 else if (CLI)
18763 for (const TargetLowering::ArgListEntry &Arg : CLI->getArgs())
18764 TypeList.push_back(Arg.Ty);
18765 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(TypeList)};
18766
18767 for (unsigned i = 0; i != NumArgs; i++) {
18768 MVT ArgVT = Outs[i].VT;
18769 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18770 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18771
18773 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18774 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18775 Dispatcher)) {
18776 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18777 << ArgVT << "\n");
18778 llvm_unreachable(nullptr);
18779 }
18780 }
18781}
18782
18783// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18784// values.
18786 const CCValAssign &VA, const SDLoc &DL,
18787 const RISCVSubtarget &Subtarget) {
18788 switch (VA.getLocInfo()) {
18789 default:
18790 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18791 case CCValAssign::Full:
18793 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18794 break;
18795 case CCValAssign::BCvt:
18796 if (VA.getLocVT().isInteger() &&
18797 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18798 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18799 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18800 if (RV64LegalI32) {
18801 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18802 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18803 } else {
18804 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18805 }
18806 } else {
18807 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18808 }
18809 break;
18810 }
18811 return Val;
18812}
18813
18814// The caller is responsible for loading the full value if the argument is
18815// passed with CCValAssign::Indirect.
18817 const CCValAssign &VA, const SDLoc &DL,
18818 const ISD::InputArg &In,
18819 const RISCVTargetLowering &TLI) {
18822 EVT LocVT = VA.getLocVT();
18823 SDValue Val;
18824 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18825 Register VReg = RegInfo.createVirtualRegister(RC);
18826 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18827 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18828
18829 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18830 if (In.isOrigArg()) {
18831 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18832 if (OrigArg->getType()->isIntegerTy()) {
18833 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18834 // An input zero extended from i31 can also be considered sign extended.
18835 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18836 (BitWidth < 32 && In.Flags.isZExt())) {
18838 RVFI->addSExt32Register(VReg);
18839 }
18840 }
18841 }
18842
18844 return Val;
18845
18846 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18847}
18848
18850 const CCValAssign &VA, const SDLoc &DL,
18851 const RISCVSubtarget &Subtarget) {
18852 EVT LocVT = VA.getLocVT();
18853
18854 switch (VA.getLocInfo()) {
18855 default:
18856 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18857 case CCValAssign::Full:
18858 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18859 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18860 break;
18861 case CCValAssign::BCvt:
18862 if (LocVT.isInteger() &&
18863 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18864 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18865 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18866 if (RV64LegalI32) {
18867 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18868 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18869 } else {
18870 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18871 }
18872 } else {
18873 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18874 }
18875 break;
18876 }
18877 return Val;
18878}
18879
18880// The caller is responsible for loading the full value if the argument is
18881// passed with CCValAssign::Indirect.
18883 const CCValAssign &VA, const SDLoc &DL) {
18885 MachineFrameInfo &MFI = MF.getFrameInfo();
18886 EVT LocVT = VA.getLocVT();
18887 EVT ValVT = VA.getValVT();
18889 if (ValVT.isScalableVector()) {
18890 // When the value is a scalable vector, we save the pointer which points to
18891 // the scalable vector value in the stack. The ValVT will be the pointer
18892 // type, instead of the scalable vector type.
18893 ValVT = LocVT;
18894 }
18895 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18896 /*IsImmutable=*/true);
18897 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18898 SDValue Val;
18899
18900 ISD::LoadExtType ExtType;
18901 switch (VA.getLocInfo()) {
18902 default:
18903 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18904 case CCValAssign::Full:
18906 case CCValAssign::BCvt:
18907 ExtType = ISD::NON_EXTLOAD;
18908 break;
18909 }
18910 Val = DAG.getExtLoad(
18911 ExtType, DL, LocVT, Chain, FIN,
18913 return Val;
18914}
18915
18917 const CCValAssign &VA,
18918 const CCValAssign &HiVA,
18919 const SDLoc &DL) {
18920 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18921 "Unexpected VA");
18923 MachineFrameInfo &MFI = MF.getFrameInfo();
18925
18926 assert(VA.isRegLoc() && "Expected register VA assignment");
18927
18928 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18929 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18930 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18931 SDValue Hi;
18932 if (HiVA.isMemLoc()) {
18933 // Second half of f64 is passed on the stack.
18934 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18935 /*IsImmutable=*/true);
18936 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18937 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18939 } else {
18940 // Second half of f64 is passed in another GPR.
18941 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18942 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18943 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18944 }
18945 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18946}
18947
18948// FastCC has less than 1% performance improvement for some particular
18949// benchmark. But theoretically, it may has benenfit for some cases.
18951 unsigned ValNo, MVT ValVT, MVT LocVT,
18952 CCValAssign::LocInfo LocInfo,
18953 ISD::ArgFlagsTy ArgFlags, CCState &State,
18954 bool IsFixed, bool IsRet, Type *OrigTy,
18955 const RISCVTargetLowering &TLI,
18956 RVVArgDispatcher &RVVDispatcher) {
18957 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18958 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18959 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18960 return false;
18961 }
18962 }
18963
18964 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18965
18966 if (LocVT == MVT::f16 &&
18967 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18968 static const MCPhysReg FPR16List[] = {
18969 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18970 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18971 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18972 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18973 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18974 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18975 return false;
18976 }
18977 }
18978
18979 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18980 static const MCPhysReg FPR32List[] = {
18981 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18982 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18983 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18984 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18985 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18986 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18987 return false;
18988 }
18989 }
18990
18991 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18992 static const MCPhysReg FPR64List[] = {
18993 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18994 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18995 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18996 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18997 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18998 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18999 return false;
19000 }
19001 }
19002
19003 // Check if there is an available GPR before hitting the stack.
19004 if ((LocVT == MVT::f16 &&
19005 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
19006 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19007 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
19008 Subtarget.hasStdExtZdinx())) {
19009 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19010 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19011 return false;
19012 }
19013 }
19014
19015 if (LocVT == MVT::f16) {
19016 unsigned Offset2 = State.AllocateStack(2, Align(2));
19017 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
19018 return false;
19019 }
19020
19021 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
19022 unsigned Offset4 = State.AllocateStack(4, Align(4));
19023 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
19024 return false;
19025 }
19026
19027 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
19028 unsigned Offset5 = State.AllocateStack(8, Align(8));
19029 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
19030 return false;
19031 }
19032
19033 if (LocVT.isVector()) {
19034 MCPhysReg AllocatedVReg = RVVDispatcher.getNextPhysReg();
19035 if (AllocatedVReg) {
19036 // Fixed-length vectors are located in the corresponding scalable-vector
19037 // container types.
19038 if (ValVT.isFixedLengthVector())
19039 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
19040 State.addLoc(
19041 CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
19042 } else {
19043 // Try and pass the address via a "fast" GPR.
19044 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
19045 LocInfo = CCValAssign::Indirect;
19046 LocVT = TLI.getSubtarget().getXLenVT();
19047 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
19048 } else if (ValVT.isFixedLengthVector()) {
19049 auto StackAlign =
19051 unsigned StackOffset =
19052 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
19053 State.addLoc(
19054 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
19055 } else {
19056 // Can't pass scalable vectors on the stack.
19057 return true;
19058 }
19059 }
19060
19061 return false;
19062 }
19063
19064 return true; // CC didn't match.
19065}
19066
19067bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
19068 CCValAssign::LocInfo LocInfo,
19069 ISD::ArgFlagsTy ArgFlags, CCState &State) {
19070 if (ArgFlags.isNest()) {
19072 "Attribute 'nest' is not supported in GHC calling convention");
19073 }
19074
19075 static const MCPhysReg GPRList[] = {
19076 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
19077 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
19078
19079 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
19080 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
19081 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
19082 if (unsigned Reg = State.AllocateReg(GPRList)) {
19083 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19084 return false;
19085 }
19086 }
19087
19088 const RISCVSubtarget &Subtarget =
19090
19091 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
19092 // Pass in STG registers: F1, ..., F6
19093 // fs0 ... fs5
19094 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
19095 RISCV::F18_F, RISCV::F19_F,
19096 RISCV::F20_F, RISCV::F21_F};
19097 if (unsigned Reg = State.AllocateReg(FPR32List)) {
19098 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19099 return false;
19100 }
19101 }
19102
19103 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
19104 // Pass in STG registers: D1, ..., D6
19105 // fs6 ... fs11
19106 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
19107 RISCV::F24_D, RISCV::F25_D,
19108 RISCV::F26_D, RISCV::F27_D};
19109 if (unsigned Reg = State.AllocateReg(FPR64List)) {
19110 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19111 return false;
19112 }
19113 }
19114
19115 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
19116 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
19117 Subtarget.is64Bit())) {
19118 if (unsigned Reg = State.AllocateReg(GPRList)) {
19119 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
19120 return false;
19121 }
19122 }
19123
19124 report_fatal_error("No registers left in GHC calling convention");
19125 return true;
19126}
19127
19128// Transform physical registers into virtual registers.
19130 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19131 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19132 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19133
19135
19136 switch (CallConv) {
19137 default:
19138 report_fatal_error("Unsupported calling convention");
19139 case CallingConv::C:
19140 case CallingConv::Fast:
19142 case CallingConv::GRAAL:
19144 break;
19145 case CallingConv::GHC:
19146 if (Subtarget.hasStdExtE())
19147 report_fatal_error("GHC calling convention is not supported on RVE!");
19148 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19149 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19150 "(Zdinx/D) instruction set extensions");
19151 }
19152
19153 const Function &Func = MF.getFunction();
19154 if (Func.hasFnAttribute("interrupt")) {
19155 if (!Func.arg_empty())
19157 "Functions with the interrupt attribute cannot have arguments!");
19158
19159 StringRef Kind =
19160 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19161
19162 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19164 "Function interrupt attribute argument not supported!");
19165 }
19166
19167 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19168 MVT XLenVT = Subtarget.getXLenVT();
19169 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19170 // Used with vargs to acumulate store chains.
19171 std::vector<SDValue> OutChains;
19172
19173 // Assign locations to all of the incoming arguments.
19175 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19176
19177 if (CallConv == CallingConv::GHC)
19179 else
19180 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19182 : RISCV::CC_RISCV);
19183
19184 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19185 CCValAssign &VA = ArgLocs[i];
19186 SDValue ArgValue;
19187 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19188 // case.
19189 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19190 assert(VA.needsCustom());
19191 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19192 } else if (VA.isRegLoc())
19193 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19194 else
19195 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19196
19197 if (VA.getLocInfo() == CCValAssign::Indirect) {
19198 // If the original argument was split and passed by reference (e.g. i128
19199 // on RV32), we need to load all parts of it here (using the same
19200 // address). Vectors may be partly split to registers and partly to the
19201 // stack, in which case the base address is partly offset and subsequent
19202 // stores are relative to that.
19203 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19205 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19206 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19207 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19208 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19209 CCValAssign &PartVA = ArgLocs[i + 1];
19210 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19211 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19212 if (PartVA.getValVT().isScalableVector())
19213 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19214 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19215 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19217 ++i;
19218 ++InsIdx;
19219 }
19220 continue;
19221 }
19222 InVals.push_back(ArgValue);
19223 }
19224
19225 if (any_of(ArgLocs,
19226 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19227 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19228
19229 if (IsVarArg) {
19230 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19231 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19232 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19233 MachineFrameInfo &MFI = MF.getFrameInfo();
19234 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19236
19237 // Size of the vararg save area. For now, the varargs save area is either
19238 // zero or large enough to hold a0-a7.
19239 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19240 int FI;
19241
19242 // If all registers are allocated, then all varargs must be passed on the
19243 // stack and we don't need to save any argregs.
19244 if (VarArgsSaveSize == 0) {
19245 int VaArgOffset = CCInfo.getStackSize();
19246 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19247 } else {
19248 int VaArgOffset = -VarArgsSaveSize;
19249 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19250
19251 // If saving an odd number of registers then create an extra stack slot to
19252 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19253 // offsets to even-numbered registered remain 2*XLEN-aligned.
19254 if (Idx % 2) {
19256 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19257 VarArgsSaveSize += XLenInBytes;
19258 }
19259
19260 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19261
19262 // Copy the integer registers that may have been used for passing varargs
19263 // to the vararg save area.
19264 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19265 const Register Reg = RegInfo.createVirtualRegister(RC);
19266 RegInfo.addLiveIn(ArgRegs[I], Reg);
19267 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19268 SDValue Store = DAG.getStore(
19269 Chain, DL, ArgValue, FIN,
19270 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19271 OutChains.push_back(Store);
19272 FIN =
19273 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19274 }
19275 }
19276
19277 // Record the frame index of the first variable argument
19278 // which is a value necessary to VASTART.
19279 RVFI->setVarArgsFrameIndex(FI);
19280 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19281 }
19282
19283 // All stores are grouped in one node to allow the matching between
19284 // the size of Ins and InVals. This only happens for vararg functions.
19285 if (!OutChains.empty()) {
19286 OutChains.push_back(Chain);
19287 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
19288 }
19289
19290 return Chain;
19291}
19292
19293/// isEligibleForTailCallOptimization - Check whether the call is eligible
19294/// for tail call optimization.
19295/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
19296bool RISCVTargetLowering::isEligibleForTailCallOptimization(
19297 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
19298 const SmallVector<CCValAssign, 16> &ArgLocs) const {
19299
19300 auto CalleeCC = CLI.CallConv;
19301 auto &Outs = CLI.Outs;
19302 auto &Caller = MF.getFunction();
19303 auto CallerCC = Caller.getCallingConv();
19304
19305 // Exception-handling functions need a special set of instructions to
19306 // indicate a return to the hardware. Tail-calling another function would
19307 // probably break this.
19308 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
19309 // should be expanded as new function attributes are introduced.
19310 if (Caller.hasFnAttribute("interrupt"))
19311 return false;
19312
19313 // Do not tail call opt if the stack is used to pass parameters.
19314 if (CCInfo.getStackSize() != 0)
19315 return false;
19316
19317 // Do not tail call opt if any parameters need to be passed indirectly.
19318 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
19319 // passed indirectly. So the address of the value will be passed in a
19320 // register, or if not available, then the address is put on the stack. In
19321 // order to pass indirectly, space on the stack often needs to be allocated
19322 // in order to store the value. In this case the CCInfo.getNextStackOffset()
19323 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
19324 // are passed CCValAssign::Indirect.
19325 for (auto &VA : ArgLocs)
19326 if (VA.getLocInfo() == CCValAssign::Indirect)
19327 return false;
19328
19329 // Do not tail call opt if either caller or callee uses struct return
19330 // semantics.
19331 auto IsCallerStructRet = Caller.hasStructRetAttr();
19332 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
19333 if (IsCallerStructRet || IsCalleeStructRet)
19334 return false;
19335
19336 // The callee has to preserve all registers the caller needs to preserve.
19337 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
19338 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
19339 if (CalleeCC != CallerCC) {
19340 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
19341 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
19342 return false;
19343 }
19344
19345 // Byval parameters hand the function a pointer directly into the stack area
19346 // we want to reuse during a tail call. Working around this *is* possible
19347 // but less efficient and uglier in LowerCall.
19348 for (auto &Arg : Outs)
19349 if (Arg.Flags.isByVal())
19350 return false;
19351
19352 return true;
19353}
19354
19356 return DAG.getDataLayout().getPrefTypeAlign(
19357 VT.getTypeForEVT(*DAG.getContext()));
19358}
19359
19360// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
19361// and output parameter nodes.
19363 SmallVectorImpl<SDValue> &InVals) const {
19364 SelectionDAG &DAG = CLI.DAG;
19365 SDLoc &DL = CLI.DL;
19367 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
19369 SDValue Chain = CLI.Chain;
19370 SDValue Callee = CLI.Callee;
19371 bool &IsTailCall = CLI.IsTailCall;
19372 CallingConv::ID CallConv = CLI.CallConv;
19373 bool IsVarArg = CLI.IsVarArg;
19374 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19375 MVT XLenVT = Subtarget.getXLenVT();
19376
19378
19379 // Analyze the operands of the call, assigning locations to each operand.
19381 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19382
19383 if (CallConv == CallingConv::GHC) {
19384 if (Subtarget.hasStdExtE())
19385 report_fatal_error("GHC calling convention is not supported on RVE!");
19387 } else
19388 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
19390 : RISCV::CC_RISCV);
19391
19392 // Check if it's really possible to do a tail call.
19393 if (IsTailCall)
19394 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
19395
19396 if (IsTailCall)
19397 ++NumTailCalls;
19398 else if (CLI.CB && CLI.CB->isMustTailCall())
19399 report_fatal_error("failed to perform tail call elimination on a call "
19400 "site marked musttail");
19401
19402 // Get a count of how many bytes are to be pushed on the stack.
19403 unsigned NumBytes = ArgCCInfo.getStackSize();
19404
19405 // Create local copies for byval args
19406 SmallVector<SDValue, 8> ByValArgs;
19407 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19408 ISD::ArgFlagsTy Flags = Outs[i].Flags;
19409 if (!Flags.isByVal())
19410 continue;
19411
19412 SDValue Arg = OutVals[i];
19413 unsigned Size = Flags.getByValSize();
19414 Align Alignment = Flags.getNonZeroByValAlign();
19415
19416 int FI =
19417 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19418 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19419 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19420
19421 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19422 /*IsVolatile=*/false,
19423 /*AlwaysInline=*/false, IsTailCall,
19425 ByValArgs.push_back(FIPtr);
19426 }
19427
19428 if (!IsTailCall)
19429 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19430
19431 // Copy argument values to their designated locations.
19433 SmallVector<SDValue, 8> MemOpChains;
19434 SDValue StackPtr;
19435 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19436 ++i, ++OutIdx) {
19437 CCValAssign &VA = ArgLocs[i];
19438 SDValue ArgValue = OutVals[OutIdx];
19439 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19440
19441 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19442 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19443 assert(VA.isRegLoc() && "Expected register VA assignment");
19444 assert(VA.needsCustom());
19445 SDValue SplitF64 = DAG.getNode(
19446 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19447 SDValue Lo = SplitF64.getValue(0);
19448 SDValue Hi = SplitF64.getValue(1);
19449
19450 Register RegLo = VA.getLocReg();
19451 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19452
19453 // Get the CCValAssign for the Hi part.
19454 CCValAssign &HiVA = ArgLocs[++i];
19455
19456 if (HiVA.isMemLoc()) {
19457 // Second half of f64 is passed on the stack.
19458 if (!StackPtr.getNode())
19459 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19461 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19462 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19463 // Emit the store.
19464 MemOpChains.push_back(
19465 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19466 } else {
19467 // Second half of f64 is passed in another GPR.
19468 Register RegHigh = HiVA.getLocReg();
19469 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19470 }
19471 continue;
19472 }
19473
19474 // Promote the value if needed.
19475 // For now, only handle fully promoted and indirect arguments.
19476 if (VA.getLocInfo() == CCValAssign::Indirect) {
19477 // Store the argument in a stack slot and pass its address.
19478 Align StackAlign =
19479 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19480 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19481 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19482 // If the original argument was split (e.g. i128), we need
19483 // to store the required parts of it here (and pass just one address).
19484 // Vectors may be partly split to registers and partly to the stack, in
19485 // which case the base address is partly offset and subsequent stores are
19486 // relative to that.
19487 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19488 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19489 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19490 // Calculate the total size to store. We don't have access to what we're
19491 // actually storing other than performing the loop and collecting the
19492 // info.
19494 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19495 SDValue PartValue = OutVals[OutIdx + 1];
19496 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19497 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19498 EVT PartVT = PartValue.getValueType();
19499 if (PartVT.isScalableVector())
19500 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19501 StoredSize += PartVT.getStoreSize();
19502 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19503 Parts.push_back(std::make_pair(PartValue, Offset));
19504 ++i;
19505 ++OutIdx;
19506 }
19507 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19508 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19509 MemOpChains.push_back(
19510 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19512 for (const auto &Part : Parts) {
19513 SDValue PartValue = Part.first;
19514 SDValue PartOffset = Part.second;
19516 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19517 MemOpChains.push_back(
19518 DAG.getStore(Chain, DL, PartValue, Address,
19520 }
19521 ArgValue = SpillSlot;
19522 } else {
19523 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19524 }
19525
19526 // Use local copy if it is a byval arg.
19527 if (Flags.isByVal())
19528 ArgValue = ByValArgs[j++];
19529
19530 if (VA.isRegLoc()) {
19531 // Queue up the argument copies and emit them at the end.
19532 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19533 } else {
19534 assert(VA.isMemLoc() && "Argument not register or memory");
19535 assert(!IsTailCall && "Tail call not allowed if stack is used "
19536 "for passing parameters");
19537
19538 // Work out the address of the stack slot.
19539 if (!StackPtr.getNode())
19540 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19542 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19544
19545 // Emit the store.
19546 MemOpChains.push_back(
19547 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19548 }
19549 }
19550
19551 // Join the stores, which are independent of one another.
19552 if (!MemOpChains.empty())
19553 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19554
19555 SDValue Glue;
19556
19557 // Build a sequence of copy-to-reg nodes, chained and glued together.
19558 for (auto &Reg : RegsToPass) {
19559 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19560 Glue = Chain.getValue(1);
19561 }
19562
19563 // Validate that none of the argument registers have been marked as
19564 // reserved, if so report an error. Do the same for the return address if this
19565 // is not a tailcall.
19566 validateCCReservedRegs(RegsToPass, MF);
19567 if (!IsTailCall &&
19570 MF.getFunction(),
19571 "Return address register required, but has been reserved."});
19572
19573 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19574 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19575 // split it and then direct call can be matched by PseudoCALL.
19576 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19577 const GlobalValue *GV = S->getGlobal();
19578 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19579 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19580 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19581 }
19582
19583 // The first call operand is the chain and the second is the target address.
19585 Ops.push_back(Chain);
19586 Ops.push_back(Callee);
19587
19588 // Add argument registers to the end of the list so that they are
19589 // known live into the call.
19590 for (auto &Reg : RegsToPass)
19591 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19592
19593 if (!IsTailCall) {
19594 // Add a register mask operand representing the call-preserved registers.
19595 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19596 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19597 assert(Mask && "Missing call preserved mask for calling convention");
19598 Ops.push_back(DAG.getRegisterMask(Mask));
19599 }
19600
19601 // Glue the call to the argument copies, if any.
19602 if (Glue.getNode())
19603 Ops.push_back(Glue);
19604
19605 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19606 "Unexpected CFI type for a direct call");
19607
19608 // Emit the call.
19609 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19610
19611 if (IsTailCall) {
19613 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19614 if (CLI.CFIType)
19615 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19616 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19617 return Ret;
19618 }
19619
19620 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19621 if (CLI.CFIType)
19622 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19623 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19624 Glue = Chain.getValue(1);
19625
19626 // Mark the end of the call, which is glued to the call itself.
19627 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19628 Glue = Chain.getValue(1);
19629
19630 // Assign locations to each value returned by this call.
19632 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19633 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19634
19635 // Copy all of the result registers out of their specified physreg.
19636 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19637 auto &VA = RVLocs[i];
19638 // Copy the value out
19639 SDValue RetValue =
19640 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19641 // Glue the RetValue to the end of the call sequence
19642 Chain = RetValue.getValue(1);
19643 Glue = RetValue.getValue(2);
19644
19645 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19646 assert(VA.needsCustom());
19647 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19648 MVT::i32, Glue);
19649 Chain = RetValue2.getValue(1);
19650 Glue = RetValue2.getValue(2);
19651 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19652 RetValue2);
19653 }
19654
19655 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19656
19657 InVals.push_back(RetValue);
19658 }
19659
19660 return Chain;
19661}
19662
19664 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19665 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19667 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19668
19669 RVVArgDispatcher Dispatcher{&MF, this, ArrayRef(Outs)};
19670
19671 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19672 MVT VT = Outs[i].VT;
19673 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19674 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19675 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19676 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
19677 nullptr, *this, Dispatcher))
19678 return false;
19679 }
19680 return true;
19681}
19682
19683SDValue
19685 bool IsVarArg,
19687 const SmallVectorImpl<SDValue> &OutVals,
19688 const SDLoc &DL, SelectionDAG &DAG) const {
19690 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19691
19692 // Stores the assignment of the return value to a location.
19694
19695 // Info about the registers and stack slot.
19696 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19697 *DAG.getContext());
19698
19699 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19700 nullptr, RISCV::CC_RISCV);
19701
19702 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19703 report_fatal_error("GHC functions return void only");
19704
19705 SDValue Glue;
19706 SmallVector<SDValue, 4> RetOps(1, Chain);
19707
19708 // Copy the result values into the output registers.
19709 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19710 SDValue Val = OutVals[OutIdx];
19711 CCValAssign &VA = RVLocs[i];
19712 assert(VA.isRegLoc() && "Can only return in registers!");
19713
19714 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19715 // Handle returning f64 on RV32D with a soft float ABI.
19716 assert(VA.isRegLoc() && "Expected return via registers");
19717 assert(VA.needsCustom());
19718 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19719 DAG.getVTList(MVT::i32, MVT::i32), Val);
19720 SDValue Lo = SplitF64.getValue(0);
19721 SDValue Hi = SplitF64.getValue(1);
19722 Register RegLo = VA.getLocReg();
19723 Register RegHi = RVLocs[++i].getLocReg();
19724
19725 if (STI.isRegisterReservedByUser(RegLo) ||
19726 STI.isRegisterReservedByUser(RegHi))
19728 MF.getFunction(),
19729 "Return value register required, but has been reserved."});
19730
19731 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19732 Glue = Chain.getValue(1);
19733 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19734 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19735 Glue = Chain.getValue(1);
19736 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19737 } else {
19738 // Handle a 'normal' return.
19739 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19740 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19741
19742 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19744 MF.getFunction(),
19745 "Return value register required, but has been reserved."});
19746
19747 // Guarantee that all emitted copies are stuck together.
19748 Glue = Chain.getValue(1);
19749 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19750 }
19751 }
19752
19753 RetOps[0] = Chain; // Update chain.
19754
19755 // Add the glue node if we have it.
19756 if (Glue.getNode()) {
19757 RetOps.push_back(Glue);
19758 }
19759
19760 if (any_of(RVLocs,
19761 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19762 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19763
19764 unsigned RetOpc = RISCVISD::RET_GLUE;
19765 // Interrupt service routines use different return instructions.
19766 const Function &Func = DAG.getMachineFunction().getFunction();
19767 if (Func.hasFnAttribute("interrupt")) {
19768 if (!Func.getReturnType()->isVoidTy())
19770 "Functions with the interrupt attribute must have void return type!");
19771
19773 StringRef Kind =
19774 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19775
19776 if (Kind == "supervisor")
19777 RetOpc = RISCVISD::SRET_GLUE;
19778 else
19779 RetOpc = RISCVISD::MRET_GLUE;
19780 }
19781
19782 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19783}
19784
19785void RISCVTargetLowering::validateCCReservedRegs(
19786 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19787 MachineFunction &MF) const {
19788 const Function &F = MF.getFunction();
19789 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19790
19791 if (llvm::any_of(Regs, [&STI](auto Reg) {
19792 return STI.isRegisterReservedByUser(Reg.first);
19793 }))
19794 F.getContext().diagnose(DiagnosticInfoUnsupported{
19795 F, "Argument register required, but has been reserved."});
19796}
19797
19798// Check if the result of the node is only used as a return value, as
19799// otherwise we can't perform a tail-call.
19801 if (N->getNumValues() != 1)
19802 return false;
19803 if (!N->hasNUsesOfValue(1, 0))
19804 return false;
19805
19806 SDNode *Copy = *N->use_begin();
19807
19808 if (Copy->getOpcode() == ISD::BITCAST) {
19809 return isUsedByReturnOnly(Copy, Chain);
19810 }
19811
19812 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19813 // with soft float ABIs.
19814 if (Copy->getOpcode() != ISD::CopyToReg) {
19815 return false;
19816 }
19817
19818 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19819 // isn't safe to perform a tail call.
19820 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19821 return false;
19822
19823 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19824 bool HasRet = false;
19825 for (SDNode *Node : Copy->uses()) {
19826 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19827 return false;
19828 HasRet = true;
19829 }
19830 if (!HasRet)
19831 return false;
19832
19833 Chain = Copy->getOperand(0);
19834 return true;
19835}
19836
19838 return CI->isTailCall();
19839}
19840
19841const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19842#define NODE_NAME_CASE(NODE) \
19843 case RISCVISD::NODE: \
19844 return "RISCVISD::" #NODE;
19845 // clang-format off
19846 switch ((RISCVISD::NodeType)Opcode) {
19848 break;
19849 NODE_NAME_CASE(RET_GLUE)
19850 NODE_NAME_CASE(SRET_GLUE)
19851 NODE_NAME_CASE(MRET_GLUE)
19852 NODE_NAME_CASE(CALL)
19853 NODE_NAME_CASE(SELECT_CC)
19854 NODE_NAME_CASE(BR_CC)
19855 NODE_NAME_CASE(BuildPairF64)
19856 NODE_NAME_CASE(SplitF64)
19857 NODE_NAME_CASE(TAIL)
19858 NODE_NAME_CASE(ADD_LO)
19859 NODE_NAME_CASE(HI)
19860 NODE_NAME_CASE(LLA)
19861 NODE_NAME_CASE(ADD_TPREL)
19862 NODE_NAME_CASE(MULHSU)
19863 NODE_NAME_CASE(SHL_ADD)
19864 NODE_NAME_CASE(SLLW)
19865 NODE_NAME_CASE(SRAW)
19866 NODE_NAME_CASE(SRLW)
19867 NODE_NAME_CASE(DIVW)
19868 NODE_NAME_CASE(DIVUW)
19869 NODE_NAME_CASE(REMUW)
19870 NODE_NAME_CASE(ROLW)
19871 NODE_NAME_CASE(RORW)
19872 NODE_NAME_CASE(CLZW)
19873 NODE_NAME_CASE(CTZW)
19874 NODE_NAME_CASE(ABSW)
19875 NODE_NAME_CASE(FMV_H_X)
19876 NODE_NAME_CASE(FMV_X_ANYEXTH)
19877 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19878 NODE_NAME_CASE(FMV_W_X_RV64)
19879 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19880 NODE_NAME_CASE(FCVT_X)
19881 NODE_NAME_CASE(FCVT_XU)
19882 NODE_NAME_CASE(FCVT_W_RV64)
19883 NODE_NAME_CASE(FCVT_WU_RV64)
19884 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19885 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19886 NODE_NAME_CASE(FP_ROUND_BF16)
19887 NODE_NAME_CASE(FP_EXTEND_BF16)
19888 NODE_NAME_CASE(FROUND)
19889 NODE_NAME_CASE(FCLASS)
19890 NODE_NAME_CASE(FMAX)
19891 NODE_NAME_CASE(FMIN)
19892 NODE_NAME_CASE(READ_COUNTER_WIDE)
19893 NODE_NAME_CASE(BREV8)
19894 NODE_NAME_CASE(ORC_B)
19895 NODE_NAME_CASE(ZIP)
19896 NODE_NAME_CASE(UNZIP)
19897 NODE_NAME_CASE(CLMUL)
19898 NODE_NAME_CASE(CLMULH)
19899 NODE_NAME_CASE(CLMULR)
19900 NODE_NAME_CASE(MOPR)
19901 NODE_NAME_CASE(MOPRR)
19902 NODE_NAME_CASE(SHA256SIG0)
19903 NODE_NAME_CASE(SHA256SIG1)
19904 NODE_NAME_CASE(SHA256SUM0)
19905 NODE_NAME_CASE(SHA256SUM1)
19906 NODE_NAME_CASE(SM4KS)
19907 NODE_NAME_CASE(SM4ED)
19908 NODE_NAME_CASE(SM3P0)
19909 NODE_NAME_CASE(SM3P1)
19910 NODE_NAME_CASE(TH_LWD)
19911 NODE_NAME_CASE(TH_LWUD)
19912 NODE_NAME_CASE(TH_LDD)
19913 NODE_NAME_CASE(TH_SWD)
19914 NODE_NAME_CASE(TH_SDD)
19915 NODE_NAME_CASE(VMV_V_V_VL)
19916 NODE_NAME_CASE(VMV_V_X_VL)
19917 NODE_NAME_CASE(VFMV_V_F_VL)
19918 NODE_NAME_CASE(VMV_X_S)
19919 NODE_NAME_CASE(VMV_S_X_VL)
19920 NODE_NAME_CASE(VFMV_S_F_VL)
19921 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19922 NODE_NAME_CASE(READ_VLENB)
19923 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19924 NODE_NAME_CASE(VSLIDEUP_VL)
19925 NODE_NAME_CASE(VSLIDE1UP_VL)
19926 NODE_NAME_CASE(VSLIDEDOWN_VL)
19927 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19928 NODE_NAME_CASE(VFSLIDE1UP_VL)
19929 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19930 NODE_NAME_CASE(VID_VL)
19931 NODE_NAME_CASE(VFNCVT_ROD_VL)
19932 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19933 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19934 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19935 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19936 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19937 NODE_NAME_CASE(VECREDUCE_AND_VL)
19938 NODE_NAME_CASE(VECREDUCE_OR_VL)
19939 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19940 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19941 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19942 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19943 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19944 NODE_NAME_CASE(ADD_VL)
19945 NODE_NAME_CASE(AND_VL)
19946 NODE_NAME_CASE(MUL_VL)
19947 NODE_NAME_CASE(OR_VL)
19948 NODE_NAME_CASE(SDIV_VL)
19949 NODE_NAME_CASE(SHL_VL)
19950 NODE_NAME_CASE(SREM_VL)
19951 NODE_NAME_CASE(SRA_VL)
19952 NODE_NAME_CASE(SRL_VL)
19953 NODE_NAME_CASE(ROTL_VL)
19954 NODE_NAME_CASE(ROTR_VL)
19955 NODE_NAME_CASE(SUB_VL)
19956 NODE_NAME_CASE(UDIV_VL)
19957 NODE_NAME_CASE(UREM_VL)
19958 NODE_NAME_CASE(XOR_VL)
19959 NODE_NAME_CASE(AVGFLOORU_VL)
19960 NODE_NAME_CASE(AVGCEILU_VL)
19961 NODE_NAME_CASE(SADDSAT_VL)
19962 NODE_NAME_CASE(UADDSAT_VL)
19963 NODE_NAME_CASE(SSUBSAT_VL)
19964 NODE_NAME_CASE(USUBSAT_VL)
19965 NODE_NAME_CASE(FADD_VL)
19966 NODE_NAME_CASE(FSUB_VL)
19967 NODE_NAME_CASE(FMUL_VL)
19968 NODE_NAME_CASE(FDIV_VL)
19969 NODE_NAME_CASE(FNEG_VL)
19970 NODE_NAME_CASE(FABS_VL)
19971 NODE_NAME_CASE(FSQRT_VL)
19972 NODE_NAME_CASE(FCLASS_VL)
19973 NODE_NAME_CASE(VFMADD_VL)
19974 NODE_NAME_CASE(VFNMADD_VL)
19975 NODE_NAME_CASE(VFMSUB_VL)
19976 NODE_NAME_CASE(VFNMSUB_VL)
19977 NODE_NAME_CASE(VFWMADD_VL)
19978 NODE_NAME_CASE(VFWNMADD_VL)
19979 NODE_NAME_CASE(VFWMSUB_VL)
19980 NODE_NAME_CASE(VFWNMSUB_VL)
19981 NODE_NAME_CASE(FCOPYSIGN_VL)
19982 NODE_NAME_CASE(SMIN_VL)
19983 NODE_NAME_CASE(SMAX_VL)
19984 NODE_NAME_CASE(UMIN_VL)
19985 NODE_NAME_CASE(UMAX_VL)
19986 NODE_NAME_CASE(BITREVERSE_VL)
19987 NODE_NAME_CASE(BSWAP_VL)
19988 NODE_NAME_CASE(CTLZ_VL)
19989 NODE_NAME_CASE(CTTZ_VL)
19990 NODE_NAME_CASE(CTPOP_VL)
19991 NODE_NAME_CASE(VFMIN_VL)
19992 NODE_NAME_CASE(VFMAX_VL)
19993 NODE_NAME_CASE(MULHS_VL)
19994 NODE_NAME_CASE(MULHU_VL)
19995 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19996 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19997 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19998 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19999 NODE_NAME_CASE(VFCVT_X_F_VL)
20000 NODE_NAME_CASE(VFCVT_XU_F_VL)
20001 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20002 NODE_NAME_CASE(SINT_TO_FP_VL)
20003 NODE_NAME_CASE(UINT_TO_FP_VL)
20004 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20005 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20006 NODE_NAME_CASE(FP_EXTEND_VL)
20007 NODE_NAME_CASE(FP_ROUND_VL)
20008 NODE_NAME_CASE(STRICT_FADD_VL)
20009 NODE_NAME_CASE(STRICT_FSUB_VL)
20010 NODE_NAME_CASE(STRICT_FMUL_VL)
20011 NODE_NAME_CASE(STRICT_FDIV_VL)
20012 NODE_NAME_CASE(STRICT_FSQRT_VL)
20013 NODE_NAME_CASE(STRICT_VFMADD_VL)
20014 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20015 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20016 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20017 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20018 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20019 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20020 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20021 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20022 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20023 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20024 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20025 NODE_NAME_CASE(STRICT_FSETCC_VL)
20026 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20027 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20028 NODE_NAME_CASE(VWMUL_VL)
20029 NODE_NAME_CASE(VWMULU_VL)
20030 NODE_NAME_CASE(VWMULSU_VL)
20031 NODE_NAME_CASE(VWADD_VL)
20032 NODE_NAME_CASE(VWADDU_VL)
20033 NODE_NAME_CASE(VWSUB_VL)
20034 NODE_NAME_CASE(VWSUBU_VL)
20035 NODE_NAME_CASE(VWADD_W_VL)
20036 NODE_NAME_CASE(VWADDU_W_VL)
20037 NODE_NAME_CASE(VWSUB_W_VL)
20038 NODE_NAME_CASE(VWSUBU_W_VL)
20039 NODE_NAME_CASE(VWSLL_VL)
20040 NODE_NAME_CASE(VFWMUL_VL)
20041 NODE_NAME_CASE(VFWADD_VL)
20042 NODE_NAME_CASE(VFWSUB_VL)
20043 NODE_NAME_CASE(VFWADD_W_VL)
20044 NODE_NAME_CASE(VFWSUB_W_VL)
20045 NODE_NAME_CASE(VWMACC_VL)
20046 NODE_NAME_CASE(VWMACCU_VL)
20047 NODE_NAME_CASE(VWMACCSU_VL)
20048 NODE_NAME_CASE(VNSRL_VL)
20049 NODE_NAME_CASE(SETCC_VL)
20050 NODE_NAME_CASE(VMERGE_VL)
20051 NODE_NAME_CASE(VMAND_VL)
20052 NODE_NAME_CASE(VMOR_VL)
20053 NODE_NAME_CASE(VMXOR_VL)
20054 NODE_NAME_CASE(VMCLR_VL)
20055 NODE_NAME_CASE(VMSET_VL)
20056 NODE_NAME_CASE(VRGATHER_VX_VL)
20057 NODE_NAME_CASE(VRGATHER_VV_VL)
20058 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20059 NODE_NAME_CASE(VSEXT_VL)
20060 NODE_NAME_CASE(VZEXT_VL)
20061 NODE_NAME_CASE(VCPOP_VL)
20062 NODE_NAME_CASE(VFIRST_VL)
20063 NODE_NAME_CASE(READ_CSR)
20064 NODE_NAME_CASE(WRITE_CSR)
20065 NODE_NAME_CASE(SWAP_CSR)
20066 NODE_NAME_CASE(CZERO_EQZ)
20067 NODE_NAME_CASE(CZERO_NEZ)
20068 NODE_NAME_CASE(SF_VC_XV_SE)
20069 NODE_NAME_CASE(SF_VC_IV_SE)
20070 NODE_NAME_CASE(SF_VC_VV_SE)
20071 NODE_NAME_CASE(SF_VC_FV_SE)
20072 NODE_NAME_CASE(SF_VC_XVV_SE)
20073 NODE_NAME_CASE(SF_VC_IVV_SE)
20074 NODE_NAME_CASE(SF_VC_VVV_SE)
20075 NODE_NAME_CASE(SF_VC_FVV_SE)
20076 NODE_NAME_CASE(SF_VC_XVW_SE)
20077 NODE_NAME_CASE(SF_VC_IVW_SE)
20078 NODE_NAME_CASE(SF_VC_VVW_SE)
20079 NODE_NAME_CASE(SF_VC_FVW_SE)
20080 NODE_NAME_CASE(SF_VC_V_X_SE)
20081 NODE_NAME_CASE(SF_VC_V_I_SE)
20082 NODE_NAME_CASE(SF_VC_V_XV_SE)
20083 NODE_NAME_CASE(SF_VC_V_IV_SE)
20084 NODE_NAME_CASE(SF_VC_V_VV_SE)
20085 NODE_NAME_CASE(SF_VC_V_FV_SE)
20086 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20087 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20088 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20089 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20090 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20091 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20092 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20093 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20094 }
20095 // clang-format on
20096 return nullptr;
20097#undef NODE_NAME_CASE
20098}
20099
20100/// getConstraintType - Given a constraint letter, return the type of
20101/// constraint it is for this target.
20104 if (Constraint.size() == 1) {
20105 switch (Constraint[0]) {
20106 default:
20107 break;
20108 case 'f':
20109 return C_RegisterClass;
20110 case 'I':
20111 case 'J':
20112 case 'K':
20113 return C_Immediate;
20114 case 'A':
20115 return C_Memory;
20116 case 's':
20117 case 'S': // A symbolic address
20118 return C_Other;
20119 }
20120 } else {
20121 if (Constraint == "vr" || Constraint == "vm")
20122 return C_RegisterClass;
20123 }
20124 return TargetLowering::getConstraintType(Constraint);
20125}
20126
20127std::pair<unsigned, const TargetRegisterClass *>
20129 StringRef Constraint,
20130 MVT VT) const {
20131 // First, see if this is a constraint that directly corresponds to a RISC-V
20132 // register class.
20133 if (Constraint.size() == 1) {
20134 switch (Constraint[0]) {
20135 case 'r':
20136 // TODO: Support fixed vectors up to XLen for P extension?
20137 if (VT.isVector())
20138 break;
20139 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20140 return std::make_pair(0U, &RISCV::GPRF16RegClass);
20141 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20142 return std::make_pair(0U, &RISCV::GPRF32RegClass);
20143 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20144 return std::make_pair(0U, &RISCV::GPRPairRegClass);
20145 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20146 case 'f':
20147 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
20148 return std::make_pair(0U, &RISCV::FPR16RegClass);
20149 if (Subtarget.hasStdExtF() && VT == MVT::f32)
20150 return std::make_pair(0U, &RISCV::FPR32RegClass);
20151 if (Subtarget.hasStdExtD() && VT == MVT::f64)
20152 return std::make_pair(0U, &RISCV::FPR64RegClass);
20153 break;
20154 default:
20155 break;
20156 }
20157 } else if (Constraint == "vr") {
20158 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
20159 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20160 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20161 return std::make_pair(0U, RC);
20162 }
20163 } else if (Constraint == "vm") {
20164 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20165 return std::make_pair(0U, &RISCV::VMV0RegClass);
20166 }
20167
20168 // Clang will correctly decode the usage of register name aliases into their
20169 // official names. However, other frontends like `rustc` do not. This allows
20170 // users of these frontends to use the ABI names for registers in LLVM-style
20171 // register constraints.
20172 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20173 .Case("{zero}", RISCV::X0)
20174 .Case("{ra}", RISCV::X1)
20175 .Case("{sp}", RISCV::X2)
20176 .Case("{gp}", RISCV::X3)
20177 .Case("{tp}", RISCV::X4)
20178 .Case("{t0}", RISCV::X5)
20179 .Case("{t1}", RISCV::X6)
20180 .Case("{t2}", RISCV::X7)
20181 .Cases("{s0}", "{fp}", RISCV::X8)
20182 .Case("{s1}", RISCV::X9)
20183 .Case("{a0}", RISCV::X10)
20184 .Case("{a1}", RISCV::X11)
20185 .Case("{a2}", RISCV::X12)
20186 .Case("{a3}", RISCV::X13)
20187 .Case("{a4}", RISCV::X14)
20188 .Case("{a5}", RISCV::X15)
20189 .Case("{a6}", RISCV::X16)
20190 .Case("{a7}", RISCV::X17)
20191 .Case("{s2}", RISCV::X18)
20192 .Case("{s3}", RISCV::X19)
20193 .Case("{s4}", RISCV::X20)
20194 .Case("{s5}", RISCV::X21)
20195 .Case("{s6}", RISCV::X22)
20196 .Case("{s7}", RISCV::X23)
20197 .Case("{s8}", RISCV::X24)
20198 .Case("{s9}", RISCV::X25)
20199 .Case("{s10}", RISCV::X26)
20200 .Case("{s11}", RISCV::X27)
20201 .Case("{t3}", RISCV::X28)
20202 .Case("{t4}", RISCV::X29)
20203 .Case("{t5}", RISCV::X30)
20204 .Case("{t6}", RISCV::X31)
20205 .Default(RISCV::NoRegister);
20206 if (XRegFromAlias != RISCV::NoRegister)
20207 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
20208
20209 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
20210 // TableGen record rather than the AsmName to choose registers for InlineAsm
20211 // constraints, plus we want to match those names to the widest floating point
20212 // register type available, manually select floating point registers here.
20213 //
20214 // The second case is the ABI name of the register, so that frontends can also
20215 // use the ABI names in register constraint lists.
20216 if (Subtarget.hasStdExtF()) {
20217 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
20218 .Cases("{f0}", "{ft0}", RISCV::F0_F)
20219 .Cases("{f1}", "{ft1}", RISCV::F1_F)
20220 .Cases("{f2}", "{ft2}", RISCV::F2_F)
20221 .Cases("{f3}", "{ft3}", RISCV::F3_F)
20222 .Cases("{f4}", "{ft4}", RISCV::F4_F)
20223 .Cases("{f5}", "{ft5}", RISCV::F5_F)
20224 .Cases("{f6}", "{ft6}", RISCV::F6_F)
20225 .Cases("{f7}", "{ft7}", RISCV::F7_F)
20226 .Cases("{f8}", "{fs0}", RISCV::F8_F)
20227 .Cases("{f9}", "{fs1}", RISCV::F9_F)
20228 .Cases("{f10}", "{fa0}", RISCV::F10_F)
20229 .Cases("{f11}", "{fa1}", RISCV::F11_F)
20230 .Cases("{f12}", "{fa2}", RISCV::F12_F)
20231 .Cases("{f13}", "{fa3}", RISCV::F13_F)
20232 .Cases("{f14}", "{fa4}", RISCV::F14_F)
20233 .Cases("{f15}", "{fa5}", RISCV::F15_F)
20234 .Cases("{f16}", "{fa6}", RISCV::F16_F)
20235 .Cases("{f17}", "{fa7}", RISCV::F17_F)
20236 .Cases("{f18}", "{fs2}", RISCV::F18_F)
20237 .Cases("{f19}", "{fs3}", RISCV::F19_F)
20238 .Cases("{f20}", "{fs4}", RISCV::F20_F)
20239 .Cases("{f21}", "{fs5}", RISCV::F21_F)
20240 .Cases("{f22}", "{fs6}", RISCV::F22_F)
20241 .Cases("{f23}", "{fs7}", RISCV::F23_F)
20242 .Cases("{f24}", "{fs8}", RISCV::F24_F)
20243 .Cases("{f25}", "{fs9}", RISCV::F25_F)
20244 .Cases("{f26}", "{fs10}", RISCV::F26_F)
20245 .Cases("{f27}", "{fs11}", RISCV::F27_F)
20246 .Cases("{f28}", "{ft8}", RISCV::F28_F)
20247 .Cases("{f29}", "{ft9}", RISCV::F29_F)
20248 .Cases("{f30}", "{ft10}", RISCV::F30_F)
20249 .Cases("{f31}", "{ft11}", RISCV::F31_F)
20250 .Default(RISCV::NoRegister);
20251 if (FReg != RISCV::NoRegister) {
20252 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
20253 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
20254 unsigned RegNo = FReg - RISCV::F0_F;
20255 unsigned DReg = RISCV::F0_D + RegNo;
20256 return std::make_pair(DReg, &RISCV::FPR64RegClass);
20257 }
20258 if (VT == MVT::f32 || VT == MVT::Other)
20259 return std::make_pair(FReg, &RISCV::FPR32RegClass);
20260 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
20261 unsigned RegNo = FReg - RISCV::F0_F;
20262 unsigned HReg = RISCV::F0_H + RegNo;
20263 return std::make_pair(HReg, &RISCV::FPR16RegClass);
20264 }
20265 }
20266 }
20267
20268 if (Subtarget.hasVInstructions()) {
20269 Register VReg = StringSwitch<Register>(Constraint.lower())
20270 .Case("{v0}", RISCV::V0)
20271 .Case("{v1}", RISCV::V1)
20272 .Case("{v2}", RISCV::V2)
20273 .Case("{v3}", RISCV::V3)
20274 .Case("{v4}", RISCV::V4)
20275 .Case("{v5}", RISCV::V5)
20276 .Case("{v6}", RISCV::V6)
20277 .Case("{v7}", RISCV::V7)
20278 .Case("{v8}", RISCV::V8)
20279 .Case("{v9}", RISCV::V9)
20280 .Case("{v10}", RISCV::V10)
20281 .Case("{v11}", RISCV::V11)
20282 .Case("{v12}", RISCV::V12)
20283 .Case("{v13}", RISCV::V13)
20284 .Case("{v14}", RISCV::V14)
20285 .Case("{v15}", RISCV::V15)
20286 .Case("{v16}", RISCV::V16)
20287 .Case("{v17}", RISCV::V17)
20288 .Case("{v18}", RISCV::V18)
20289 .Case("{v19}", RISCV::V19)
20290 .Case("{v20}", RISCV::V20)
20291 .Case("{v21}", RISCV::V21)
20292 .Case("{v22}", RISCV::V22)
20293 .Case("{v23}", RISCV::V23)
20294 .Case("{v24}", RISCV::V24)
20295 .Case("{v25}", RISCV::V25)
20296 .Case("{v26}", RISCV::V26)
20297 .Case("{v27}", RISCV::V27)
20298 .Case("{v28}", RISCV::V28)
20299 .Case("{v29}", RISCV::V29)
20300 .Case("{v30}", RISCV::V30)
20301 .Case("{v31}", RISCV::V31)
20302 .Default(RISCV::NoRegister);
20303 if (VReg != RISCV::NoRegister) {
20304 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
20305 return std::make_pair(VReg, &RISCV::VMRegClass);
20306 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
20307 return std::make_pair(VReg, &RISCV::VRRegClass);
20308 for (const auto *RC :
20309 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
20310 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
20311 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
20312 return std::make_pair(VReg, RC);
20313 }
20314 }
20315 }
20316 }
20317
20318 std::pair<Register, const TargetRegisterClass *> Res =
20320
20321 // If we picked one of the Zfinx register classes, remap it to the GPR class.
20322 // FIXME: When Zfinx is supported in CodeGen this will need to take the
20323 // Subtarget into account.
20324 if (Res.second == &RISCV::GPRF16RegClass ||
20325 Res.second == &RISCV::GPRF32RegClass ||
20326 Res.second == &RISCV::GPRPairRegClass)
20327 return std::make_pair(Res.first, &RISCV::GPRRegClass);
20328
20329 return Res;
20330}
20331
20334 // Currently only support length 1 constraints.
20335 if (ConstraintCode.size() == 1) {
20336 switch (ConstraintCode[0]) {
20337 case 'A':
20339 default:
20340 break;
20341 }
20342 }
20343
20344 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
20345}
20346
20348 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
20349 SelectionDAG &DAG) const {
20350 // Currently only support length 1 constraints.
20351 if (Constraint.size() == 1) {
20352 switch (Constraint[0]) {
20353 case 'I':
20354 // Validate & create a 12-bit signed immediate operand.
20355 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20356 uint64_t CVal = C->getSExtValue();
20357 if (isInt<12>(CVal))
20358 Ops.push_back(
20359 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20360 }
20361 return;
20362 case 'J':
20363 // Validate & create an integer zero operand.
20364 if (isNullConstant(Op))
20365 Ops.push_back(
20366 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
20367 return;
20368 case 'K':
20369 // Validate & create a 5-bit unsigned immediate operand.
20370 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
20371 uint64_t CVal = C->getZExtValue();
20372 if (isUInt<5>(CVal))
20373 Ops.push_back(
20374 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
20375 }
20376 return;
20377 case 'S':
20379 return;
20380 default:
20381 break;
20382 }
20383 }
20384 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20385}
20386
20388 Instruction *Inst,
20389 AtomicOrdering Ord) const {
20390 if (Subtarget.hasStdExtZtso()) {
20391 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20392 return Builder.CreateFence(Ord);
20393 return nullptr;
20394 }
20395
20396 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20397 return Builder.CreateFence(Ord);
20398 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
20399 return Builder.CreateFence(AtomicOrdering::Release);
20400 return nullptr;
20401}
20402
20404 Instruction *Inst,
20405 AtomicOrdering Ord) const {
20406 if (Subtarget.hasStdExtZtso()) {
20407 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
20408 return Builder.CreateFence(Ord);
20409 return nullptr;
20410 }
20411
20412 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
20413 return Builder.CreateFence(AtomicOrdering::Acquire);
20414 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20417 return nullptr;
20418}
20419
20422 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20423 // point operations can't be used in an lr/sc sequence without breaking the
20424 // forward-progress guarantee.
20425 if (AI->isFloatingPointOperation() ||
20429
20430 // Don't expand forced atomics, we want to have __sync libcalls instead.
20431 if (Subtarget.hasForcedAtomics())
20433
20434 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20435 if (AI->getOperation() == AtomicRMWInst::Nand) {
20436 if (Subtarget.hasStdExtZacas() &&
20437 (Size >= 32 || Subtarget.hasStdExtZabha()))
20439 if (Size < 32)
20441 }
20442
20443 if (Size < 32 && !Subtarget.hasStdExtZabha())
20445
20447}
20448
20449static Intrinsic::ID
20451 if (XLen == 32) {
20452 switch (BinOp) {
20453 default:
20454 llvm_unreachable("Unexpected AtomicRMW BinOp");
20456 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20457 case AtomicRMWInst::Add:
20458 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20459 case AtomicRMWInst::Sub:
20460 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20462 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20463 case AtomicRMWInst::Max:
20464 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20465 case AtomicRMWInst::Min:
20466 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20468 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20470 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20471 }
20472 }
20473
20474 if (XLen == 64) {
20475 switch (BinOp) {
20476 default:
20477 llvm_unreachable("Unexpected AtomicRMW BinOp");
20479 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20480 case AtomicRMWInst::Add:
20481 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20482 case AtomicRMWInst::Sub:
20483 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20485 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20486 case AtomicRMWInst::Max:
20487 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20488 case AtomicRMWInst::Min:
20489 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20491 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20493 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20494 }
20495 }
20496
20497 llvm_unreachable("Unexpected XLen\n");
20498}
20499
20501 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20502 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20503 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20504 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20505 // mask, as this produces better code than the LR/SC loop emitted by
20506 // int_riscv_masked_atomicrmw_xchg.
20507 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20508 isa<ConstantInt>(AI->getValOperand())) {
20509 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20510 if (CVal->isZero())
20511 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20512 Builder.CreateNot(Mask, "Inv_Mask"),
20513 AI->getAlign(), Ord);
20514 if (CVal->isMinusOne())
20515 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20516 AI->getAlign(), Ord);
20517 }
20518
20519 unsigned XLen = Subtarget.getXLen();
20520 Value *Ordering =
20521 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20522 Type *Tys[] = {AlignedAddr->getType()};
20523 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20524 AI->getModule(),
20526
20527 if (XLen == 64) {
20528 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20529 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20530 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20531 }
20532
20533 Value *Result;
20534
20535 // Must pass the shift amount needed to sign extend the loaded value prior
20536 // to performing a signed comparison for min/max. ShiftAmt is the number of
20537 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20538 // is the number of bits to left+right shift the value in order to
20539 // sign-extend.
20540 if (AI->getOperation() == AtomicRMWInst::Min ||
20542 const DataLayout &DL = AI->getModule()->getDataLayout();
20543 unsigned ValWidth =
20544 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20545 Value *SextShamt =
20546 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20547 Result = Builder.CreateCall(LrwOpScwLoop,
20548 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20549 } else {
20550 Result =
20551 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20552 }
20553
20554 if (XLen == 64)
20555 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20556 return Result;
20557}
20558
20561 AtomicCmpXchgInst *CI) const {
20562 // Don't expand forced atomics, we want to have __sync libcalls instead.
20563 if (Subtarget.hasForcedAtomics())
20565
20567 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20568 (Size == 8 || Size == 16))
20571}
20572
20574 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20575 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20576 unsigned XLen = Subtarget.getXLen();
20577 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20578 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20579 if (XLen == 64) {
20580 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20581 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20582 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20583 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20584 }
20585 Type *Tys[] = {AlignedAddr->getType()};
20586 Function *MaskedCmpXchg =
20587 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20588 Value *Result = Builder.CreateCall(
20589 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20590 if (XLen == 64)
20591 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20592 return Result;
20593}
20594
20596 EVT DataVT) const {
20597 // We have indexed loads for all supported EEW types. Indices are always
20598 // zero extended.
20599 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20600 isTypeLegal(Extend.getValueType()) &&
20601 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20602 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20603}
20604
20606 EVT VT) const {
20607 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20608 return false;
20609
20610 switch (FPVT.getSimpleVT().SimpleTy) {
20611 case MVT::f16:
20612 return Subtarget.hasStdExtZfhmin();
20613 case MVT::f32:
20614 return Subtarget.hasStdExtF();
20615 case MVT::f64:
20616 return Subtarget.hasStdExtD();
20617 default:
20618 return false;
20619 }
20620}
20621
20623 // If we are using the small code model, we can reduce size of jump table
20624 // entry to 4 bytes.
20625 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20628 }
20630}
20631
20633 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20634 unsigned uid, MCContext &Ctx) const {
20635 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20637 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20638}
20639
20641 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20642 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20643 // a power of two as well.
20644 // FIXME: This doesn't work for zve32, but that's already broken
20645 // elsewhere for the same reason.
20646 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20647 static_assert(RISCV::RVVBitsPerBlock == 64,
20648 "RVVBitsPerBlock changed, audit needed");
20649 return true;
20650}
20651
20653 SDValue &Offset,
20655 SelectionDAG &DAG) const {
20656 // Target does not support indexed loads.
20657 if (!Subtarget.hasVendorXTHeadMemIdx())
20658 return false;
20659
20660 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20661 return false;
20662
20663 Base = Op->getOperand(0);
20664 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20665 int64_t RHSC = RHS->getSExtValue();
20666 if (Op->getOpcode() == ISD::SUB)
20667 RHSC = -(uint64_t)RHSC;
20668
20669 // The constants that can be encoded in the THeadMemIdx instructions
20670 // are of the form (sign_extend(imm5) << imm2).
20671 bool isLegalIndexedOffset = false;
20672 for (unsigned i = 0; i < 4; i++)
20673 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20674 isLegalIndexedOffset = true;
20675 break;
20676 }
20677
20678 if (!isLegalIndexedOffset)
20679 return false;
20680
20681 Offset = Op->getOperand(1);
20682 return true;
20683 }
20684
20685 return false;
20686}
20687
20689 SDValue &Offset,
20691 SelectionDAG &DAG) const {
20692 EVT VT;
20693 SDValue Ptr;
20694 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20695 VT = LD->getMemoryVT();
20696 Ptr = LD->getBasePtr();
20697 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20698 VT = ST->getMemoryVT();
20699 Ptr = ST->getBasePtr();
20700 } else
20701 return false;
20702
20703 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20704 return false;
20705
20706 AM = ISD::PRE_INC;
20707 return true;
20708}
20709
20711 SDValue &Base,
20712 SDValue &Offset,
20714 SelectionDAG &DAG) const {
20715 EVT VT;
20716 SDValue Ptr;
20717 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20718 VT = LD->getMemoryVT();
20719 Ptr = LD->getBasePtr();
20720 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20721 VT = ST->getMemoryVT();
20722 Ptr = ST->getBasePtr();
20723 } else
20724 return false;
20725
20726 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20727 return false;
20728 // Post-indexing updates the base, so it's not a valid transform
20729 // if that's not the same as the load's pointer.
20730 if (Ptr != Base)
20731 return false;
20732
20733 AM = ISD::POST_INC;
20734 return true;
20735}
20736
20738 EVT VT) const {
20739 EVT SVT = VT.getScalarType();
20740
20741 if (!SVT.isSimple())
20742 return false;
20743
20744 switch (SVT.getSimpleVT().SimpleTy) {
20745 case MVT::f16:
20746 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20747 : Subtarget.hasStdExtZfhOrZhinx();
20748 case MVT::f32:
20749 return Subtarget.hasStdExtFOrZfinx();
20750 case MVT::f64:
20751 return Subtarget.hasStdExtDOrZdinx();
20752 default:
20753 break;
20754 }
20755
20756 return false;
20757}
20758
20760 // Zacas will use amocas.w which does not require extension.
20761 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20762}
20763
20765 const Constant *PersonalityFn) const {
20766 return RISCV::X10;
20767}
20768
20770 const Constant *PersonalityFn) const {
20771 return RISCV::X11;
20772}
20773
20775 // Return false to suppress the unnecessary extensions if the LibCall
20776 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20777 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20778 Type.getSizeInBits() < Subtarget.getXLen()))
20779 return false;
20780
20781 return true;
20782}
20783
20785 if (Subtarget.is64Bit() && Type == MVT::i32)
20786 return true;
20787
20788 return IsSigned;
20789}
20790
20792 SDValue C) const {
20793 // Check integral scalar types.
20794 const bool HasExtMOrZmmul =
20795 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20796 if (!VT.isScalarInteger())
20797 return false;
20798
20799 // Omit the optimization if the sub target has the M extension and the data
20800 // size exceeds XLen.
20801 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20802 return false;
20803
20804 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20805 // Break the MUL to a SLLI and an ADD/SUB.
20806 const APInt &Imm = ConstNode->getAPIntValue();
20807 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20808 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20809 return true;
20810
20811 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20812 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20813 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20814 (Imm - 8).isPowerOf2()))
20815 return true;
20816
20817 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20818 // a pair of LUI/ADDI.
20819 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20820 ConstNode->hasOneUse()) {
20821 APInt ImmS = Imm.ashr(Imm.countr_zero());
20822 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20823 (1 - ImmS).isPowerOf2())
20824 return true;
20825 }
20826 }
20827
20828 return false;
20829}
20830
20832 SDValue ConstNode) const {
20833 // Let the DAGCombiner decide for vectors.
20834 EVT VT = AddNode.getValueType();
20835 if (VT.isVector())
20836 return true;
20837
20838 // Let the DAGCombiner decide for larger types.
20839 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20840 return true;
20841
20842 // It is worse if c1 is simm12 while c1*c2 is not.
20843 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20844 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20845 const APInt &C1 = C1Node->getAPIntValue();
20846 const APInt &C2 = C2Node->getAPIntValue();
20847 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20848 return false;
20849
20850 // Default to true and let the DAGCombiner decide.
20851 return true;
20852}
20853
20855 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20856 unsigned *Fast) const {
20857 if (!VT.isVector()) {
20858 if (Fast)
20859 *Fast = Subtarget.enableUnalignedScalarMem();
20860 return Subtarget.enableUnalignedScalarMem();
20861 }
20862
20863 // All vector implementations must support element alignment
20864 EVT ElemVT = VT.getVectorElementType();
20865 if (Alignment >= ElemVT.getStoreSize()) {
20866 if (Fast)
20867 *Fast = 1;
20868 return true;
20869 }
20870
20871 // Note: We lower an unmasked unaligned vector access to an equally sized
20872 // e8 element type access. Given this, we effectively support all unmasked
20873 // misaligned accesses. TODO: Work through the codegen implications of
20874 // allowing such accesses to be formed, and considered fast.
20875 if (Fast)
20876 *Fast = Subtarget.enableUnalignedVectorMem();
20877 return Subtarget.enableUnalignedVectorMem();
20878}
20879
20880
20882 const AttributeList &FuncAttributes) const {
20883 if (!Subtarget.hasVInstructions())
20884 return MVT::Other;
20885
20886 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20887 return MVT::Other;
20888
20889 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20890 // has an expansion threshold, and we want the number of hardware memory
20891 // operations to correspond roughly to that threshold. LMUL>1 operations
20892 // are typically expanded linearly internally, and thus correspond to more
20893 // than one actual memory operation. Note that store merging and load
20894 // combining will typically form larger LMUL operations from the LMUL1
20895 // operations emitted here, and that's okay because combining isn't
20896 // introducing new memory operations; it's just merging existing ones.
20897 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20898 if (Op.size() < MinVLenInBytes)
20899 // TODO: Figure out short memops. For the moment, do the default thing
20900 // which ends up using scalar sequences.
20901 return MVT::Other;
20902
20903 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20904 // a large scalar constant and instead use vmv.v.x/i to do the
20905 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20906 // maximize the chance we can encode the size in the vsetvli.
20907 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20908 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20909
20910 // Do we have sufficient alignment for our preferred VT? If not, revert
20911 // to largest size allowed by our alignment criteria.
20912 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
20913 Align RequiredAlign(PreferredVT.getStoreSize());
20914 if (Op.isFixedDstAlign())
20915 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20916 if (Op.isMemcpy())
20917 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20918 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20919 }
20920 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20921}
20922
20924 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20925 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20926 bool IsABIRegCopy = CC.has_value();
20927 EVT ValueVT = Val.getValueType();
20928 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20929 PartVT == MVT::f32) {
20930 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20931 // nan, and cast to f32.
20932 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20933 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20934 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20935 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20936 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20937 Parts[0] = Val;
20938 return true;
20939 }
20940
20941 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20942 LLVMContext &Context = *DAG.getContext();
20943 EVT ValueEltVT = ValueVT.getVectorElementType();
20944 EVT PartEltVT = PartVT.getVectorElementType();
20945 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20946 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20947 if (PartVTBitSize % ValueVTBitSize == 0) {
20948 assert(PartVTBitSize >= ValueVTBitSize);
20949 // If the element types are different, bitcast to the same element type of
20950 // PartVT first.
20951 // Give an example here, we want copy a <vscale x 1 x i8> value to
20952 // <vscale x 4 x i16>.
20953 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20954 // subvector, then we can bitcast to <vscale x 4 x i16>.
20955 if (ValueEltVT != PartEltVT) {
20956 if (PartVTBitSize > ValueVTBitSize) {
20957 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20958 assert(Count != 0 && "The number of element should not be zero.");
20959 EVT SameEltTypeVT =
20960 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20961 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20962 DAG.getUNDEF(SameEltTypeVT), Val,
20963 DAG.getVectorIdxConstant(0, DL));
20964 }
20965 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20966 } else {
20967 Val =
20968 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20969 Val, DAG.getVectorIdxConstant(0, DL));
20970 }
20971 Parts[0] = Val;
20972 return true;
20973 }
20974 }
20975 return false;
20976}
20977
20979 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20980 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20981 bool IsABIRegCopy = CC.has_value();
20982 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20983 PartVT == MVT::f32) {
20984 SDValue Val = Parts[0];
20985
20986 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20987 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20988 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20989 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20990 return Val;
20991 }
20992
20993 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20994 LLVMContext &Context = *DAG.getContext();
20995 SDValue Val = Parts[0];
20996 EVT ValueEltVT = ValueVT.getVectorElementType();
20997 EVT PartEltVT = PartVT.getVectorElementType();
20998 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20999 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
21000 if (PartVTBitSize % ValueVTBitSize == 0) {
21001 assert(PartVTBitSize >= ValueVTBitSize);
21002 EVT SameEltTypeVT = ValueVT;
21003 // If the element types are different, convert it to the same element type
21004 // of PartVT.
21005 // Give an example here, we want copy a <vscale x 1 x i8> value from
21006 // <vscale x 4 x i16>.
21007 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
21008 // then we can extract <vscale x 1 x i8>.
21009 if (ValueEltVT != PartEltVT) {
21010 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
21011 assert(Count != 0 && "The number of element should not be zero.");
21012 SameEltTypeVT =
21013 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
21014 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
21015 }
21016 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
21017 DAG.getVectorIdxConstant(0, DL));
21018 return Val;
21019 }
21020 }
21021 return SDValue();
21022}
21023
21025 // When aggressively optimizing for code size, we prefer to use a div
21026 // instruction, as it is usually smaller than the alternative sequence.
21027 // TODO: Add vector division?
21028 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
21029 return OptSize && !VT.isVector();
21030}
21031
21033 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
21034 // some situation.
21035 unsigned Opc = N->getOpcode();
21036 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
21037 return false;
21038 return true;
21039}
21040
21041static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
21042 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
21043 Function *ThreadPointerFunc =
21044 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
21045 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
21046 IRB.CreateCall(ThreadPointerFunc), Offset);
21047}
21048
21050 // Fuchsia provides a fixed TLS slot for the stack cookie.
21051 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
21052 if (Subtarget.isTargetFuchsia())
21053 return useTpOffset(IRB, -0x10);
21054
21056}
21057
21059 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
21060 const DataLayout &DL) const {
21061 EVT VT = getValueType(DL, VTy);
21062 // Don't lower vlseg/vsseg for vector types that can't be split.
21063 if (!isTypeLegal(VT))
21064 return false;
21065
21067 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
21068 Alignment))
21069 return false;
21070
21071 MVT ContainerVT = VT.getSimpleVT();
21072
21073 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21074 if (!Subtarget.useRVVForFixedLengthVectors())
21075 return false;
21076 // Sometimes the interleaved access pass picks up splats as interleaves of
21077 // one element. Don't lower these.
21078 if (FVTy->getNumElements() < 2)
21079 return false;
21080
21082 } else {
21083 // The intrinsics for scalable vectors are not overloaded on pointer type
21084 // and can only handle the default address space.
21085 if (AddrSpace)
21086 return false;
21087 }
21088
21089 // Need to make sure that EMUL * NFIELDS ≤ 8
21090 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
21091 if (Fractional)
21092 return true;
21093 return Factor * LMUL <= 8;
21094}
21095
21097 Align Alignment) const {
21098 if (!Subtarget.hasVInstructions())
21099 return false;
21100
21101 // Only support fixed vectors if we know the minimum vector size.
21102 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
21103 return false;
21104
21105 EVT ScalarType = DataType.getScalarType();
21106 if (!isLegalElementTypeForRVV(ScalarType))
21107 return false;
21108
21109 if (!Subtarget.enableUnalignedVectorMem() &&
21110 Alignment < ScalarType.getStoreSize())
21111 return false;
21112
21113 return true;
21114}
21115
21117 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
21118 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
21119 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
21120 Intrinsic::riscv_seg8_load};
21121
21122/// Lower an interleaved load into a vlsegN intrinsic.
21123///
21124/// E.g. Lower an interleaved load (Factor = 2):
21125/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
21126/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21127/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21128///
21129/// Into:
21130/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
21131/// %ptr, i64 4)
21132/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
21133/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
21136 ArrayRef<unsigned> Indices, unsigned Factor) const {
21137 IRBuilder<> Builder(LI);
21138
21139 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
21140 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
21142 LI->getModule()->getDataLayout()))
21143 return false;
21144
21145 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21146
21147 Function *VlsegNFunc =
21149 {VTy, LI->getPointerOperandType(), XLenTy});
21150
21151 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21152
21153 CallInst *VlsegN =
21154 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
21155
21156 for (unsigned i = 0; i < Shuffles.size(); i++) {
21157 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
21158 Shuffles[i]->replaceAllUsesWith(SubVec);
21159 }
21160
21161 return true;
21162}
21163
21165 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
21166 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
21167 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
21168 Intrinsic::riscv_seg8_store};
21169
21170/// Lower an interleaved store into a vssegN intrinsic.
21171///
21172/// E.g. Lower an interleaved store (Factor = 3):
21173/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21174/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21175/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21176///
21177/// Into:
21178/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21179/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21180/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21181/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
21182/// %ptr, i32 4)
21183///
21184/// Note that the new shufflevectors will be removed and we'll only generate one
21185/// vsseg3 instruction in CodeGen.
21187 ShuffleVectorInst *SVI,
21188 unsigned Factor) const {
21189 IRBuilder<> Builder(SI);
21190 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
21191 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
21192 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
21193 ShuffleVTy->getNumElements() / Factor);
21194 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
21195 SI->getPointerAddressSpace(),
21196 SI->getModule()->getDataLayout()))
21197 return false;
21198
21199 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21200
21201 Function *VssegNFunc =
21202 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
21203 {VTy, SI->getPointerOperandType(), XLenTy});
21204
21205 auto Mask = SVI->getShuffleMask();
21207
21208 for (unsigned i = 0; i < Factor; i++) {
21209 Value *Shuffle = Builder.CreateShuffleVector(
21210 SVI->getOperand(0), SVI->getOperand(1),
21211 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
21212 Ops.push_back(Shuffle);
21213 }
21214 // This VL should be OK (should be executable in one vsseg instruction,
21215 // potentially under larger LMULs) because we checked that the fixed vector
21216 // type fits in isLegalInterleavedAccessType
21217 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
21218 Ops.append({SI->getPointerOperand(), VL});
21219
21220 Builder.CreateCall(VssegNFunc, Ops);
21221
21222 return true;
21223}
21224
21226 LoadInst *LI) const {
21227 assert(LI->isSimple());
21228 IRBuilder<> Builder(LI);
21229
21230 // Only deinterleave2 supported at present.
21231 if (DI->getIntrinsicID() != Intrinsic::vector_deinterleave2)
21232 return false;
21233
21234 unsigned Factor = 2;
21235
21236 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
21237 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
21238
21239 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
21241 LI->getModule()->getDataLayout()))
21242 return false;
21243
21244 Function *VlsegNFunc;
21245 Value *VL;
21246 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
21248
21249 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21250 VlsegNFunc = Intrinsic::getDeclaration(
21251 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
21252 {ResVTy, LI->getPointerOperandType(), XLenTy});
21253 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21254 } else {
21255 static const Intrinsic::ID IntrIds[] = {
21256 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
21257 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
21258 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
21259 Intrinsic::riscv_vlseg8};
21260
21261 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
21262 {ResVTy, XLenTy});
21263 VL = Constant::getAllOnesValue(XLenTy);
21264 Ops.append(Factor, PoisonValue::get(ResVTy));
21265 }
21266
21267 Ops.append({LI->getPointerOperand(), VL});
21268
21269 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
21270 DI->replaceAllUsesWith(Vlseg);
21271
21272 return true;
21273}
21274
21276 StoreInst *SI) const {
21277 assert(SI->isSimple());
21278 IRBuilder<> Builder(SI);
21279
21280 // Only interleave2 supported at present.
21281 if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
21282 return false;
21283
21284 unsigned Factor = 2;
21285
21286 VectorType *VTy = cast<VectorType>(II->getType());
21287 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
21288
21289 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
21290 SI->getPointerAddressSpace(),
21291 SI->getModule()->getDataLayout()))
21292 return false;
21293
21294 Function *VssegNFunc;
21295 Value *VL;
21296 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
21297
21298 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
21299 VssegNFunc = Intrinsic::getDeclaration(
21300 SI->getModule(), FixedVssegIntrIds[Factor - 2],
21301 {InVTy, SI->getPointerOperandType(), XLenTy});
21302 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
21303 } else {
21304 static const Intrinsic::ID IntrIds[] = {
21305 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
21306 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
21307 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
21308 Intrinsic::riscv_vsseg8};
21309
21310 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
21311 {InVTy, XLenTy});
21312 VL = Constant::getAllOnesValue(XLenTy);
21313 }
21314
21315 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
21316 SI->getPointerOperand(), VL});
21317
21318 return true;
21319}
21320
21324 const TargetInstrInfo *TII) const {
21325 assert(MBBI->isCall() && MBBI->getCFIType() &&
21326 "Invalid call instruction for a KCFI check");
21327 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
21328 MBBI->getOpcode()));
21329
21330 MachineOperand &Target = MBBI->getOperand(0);
21331 Target.setIsRenamable(false);
21332
21333 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
21334 .addReg(Target.getReg())
21335 .addImm(MBBI->getCFIType())
21336 .getInstr();
21337}
21338
21339#define GET_REGISTER_MATCHER
21340#include "RISCVGenAsmMatcher.inc"
21341
21344 const MachineFunction &MF) const {
21346 if (Reg == RISCV::NoRegister)
21348 if (Reg == RISCV::NoRegister)
21350 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
21351 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
21352 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
21353 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
21354 StringRef(RegName) + "\"."));
21355 return Reg;
21356}
21357
21360 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
21361
21362 if (NontemporalInfo == nullptr)
21364
21365 // 1 for default value work as __RISCV_NTLH_ALL
21366 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
21367 // 3 -> __RISCV_NTLH_ALL_PRIVATE
21368 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
21369 // 5 -> __RISCV_NTLH_ALL
21370 int NontemporalLevel = 5;
21371 const MDNode *RISCVNontemporalInfo =
21372 I.getMetadata("riscv-nontemporal-domain");
21373 if (RISCVNontemporalInfo != nullptr)
21374 NontemporalLevel =
21375 cast<ConstantInt>(
21376 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
21377 ->getValue())
21378 ->getZExtValue();
21379
21380 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
21381 "RISC-V target doesn't support this non-temporal domain.");
21382
21383 NontemporalLevel -= 2;
21385 if (NontemporalLevel & 0b1)
21386 Flags |= MONontemporalBit0;
21387 if (NontemporalLevel & 0b10)
21388 Flags |= MONontemporalBit1;
21389
21390 return Flags;
21391}
21392
21395
21396 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
21398 TargetFlags |= (NodeFlags & MONontemporalBit0);
21399 TargetFlags |= (NodeFlags & MONontemporalBit1);
21400 return TargetFlags;
21401}
21402
21404 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
21405 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
21406}
21407
21409 if (VT.isScalableVector())
21410 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
21411 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
21412 return true;
21413 return Subtarget.hasStdExtZbb() &&
21414 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
21415}
21416
21418 ISD::CondCode Cond) const {
21419 return isCtpopFast(VT) ? 0 : 1;
21420}
21421
21423
21424 // GISel support is in progress or complete for these opcodes.
21425 unsigned Op = Inst.getOpcode();
21426 if (Op == Instruction::Add || Op == Instruction::Sub ||
21427 Op == Instruction::And || Op == Instruction::Or ||
21428 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21429 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21430 return false;
21431
21432 if (Inst.getType()->isScalableTy())
21433 return true;
21434
21435 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21436 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21437 !isa<ReturnInst>(&Inst))
21438 return true;
21439
21440 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21441 if (AI->getAllocatedType()->isScalableTy())
21442 return true;
21443 }
21444
21445 return false;
21446}
21447
21448SDValue
21449RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21450 SelectionDAG &DAG,
21451 SmallVectorImpl<SDNode *> &Created) const {
21453 if (isIntDivCheap(N->getValueType(0), Attr))
21454 return SDValue(N, 0); // Lower SDIV as SDIV
21455
21456 // Only perform this transform if short forward branch opt is supported.
21457 if (!Subtarget.hasShortForwardBranchOpt())
21458 return SDValue();
21459 EVT VT = N->getValueType(0);
21460 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21461 return SDValue();
21462
21463 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21464 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21465 return SDValue();
21466 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21467}
21468
21469bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21470 EVT VT, const APInt &AndMask) const {
21471 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21472 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21474}
21475
21476unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21477 return Subtarget.getMinimumJumpTableEntries();
21478}
21479
21480// Handle single arg such as return value.
21481template <typename Arg>
21482void RVVArgDispatcher::constructArgInfos(ArrayRef<Arg> ArgList) {
21483 // This lambda determines whether an array of types are constructed by
21484 // homogeneous vector types.
21485 auto isHomogeneousScalableVectorType = [](ArrayRef<Arg> ArgList) {
21486 // First, extract the first element in the argument type.
21487 auto It = ArgList.begin();
21488 MVT FirstArgRegType = It->VT;
21489
21490 // Return if there is no return or the type needs split.
21491 if (It == ArgList.end() || It->Flags.isSplit())
21492 return false;
21493
21494 ++It;
21495
21496 // Return if this argument type contains only 1 element, or it's not a
21497 // vector type.
21498 if (It == ArgList.end() || !FirstArgRegType.isScalableVector())
21499 return false;
21500
21501 // Second, check if the following elements in this argument type are all the
21502 // same.
21503 for (; It != ArgList.end(); ++It)
21504 if (It->Flags.isSplit() || It->VT != FirstArgRegType)
21505 return false;
21506
21507 return true;
21508 };
21509
21510 if (isHomogeneousScalableVectorType(ArgList)) {
21511 // Handle as tuple type
21512 RVVArgInfos.push_back({(unsigned)ArgList.size(), ArgList[0].VT, false});
21513 } else {
21514 // Handle as normal vector type
21515 bool FirstVMaskAssigned = false;
21516 for (const auto &OutArg : ArgList) {
21517 MVT RegisterVT = OutArg.VT;
21518
21519 // Skip non-RVV register type
21520 if (!RegisterVT.isVector())
21521 continue;
21522
21523 if (RegisterVT.isFixedLengthVector())
21524 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21525
21526 if (!FirstVMaskAssigned && RegisterVT.getVectorElementType() == MVT::i1) {
21527 RVVArgInfos.push_back({1, RegisterVT, true});
21528 FirstVMaskAssigned = true;
21529 continue;
21530 }
21531
21532 RVVArgInfos.push_back({1, RegisterVT, false});
21533 }
21534 }
21535}
21536
21537// Handle multiple args.
21538template <>
21539void RVVArgDispatcher::constructArgInfos<Type *>(ArrayRef<Type *> TypeList) {
21540 const DataLayout &DL = MF->getDataLayout();
21541 const Function &F = MF->getFunction();
21542 LLVMContext &Context = F.getContext();
21543
21544 bool FirstVMaskAssigned = false;
21545 for (Type *Ty : TypeList) {
21546 StructType *STy = dyn_cast<StructType>(Ty);
21547 if (STy && STy->containsHomogeneousScalableVectorTypes()) {
21548 Type *ElemTy = STy->getTypeAtIndex(0U);
21549 EVT VT = TLI->getValueType(DL, ElemTy);
21550 MVT RegisterVT =
21551 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21552 unsigned NumRegs =
21553 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21554
21555 RVVArgInfos.push_back(
21556 {NumRegs * STy->getNumElements(), RegisterVT, false});
21557 } else {
21558 SmallVector<EVT, 4> ValueVTs;
21559 ComputeValueVTs(*TLI, DL, Ty, ValueVTs);
21560
21561 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
21562 ++Value) {
21563 EVT VT = ValueVTs[Value];
21564 MVT RegisterVT =
21565 TLI->getRegisterTypeForCallingConv(Context, F.getCallingConv(), VT);
21566 unsigned NumRegs =
21567 TLI->getNumRegistersForCallingConv(Context, F.getCallingConv(), VT);
21568
21569 // Skip non-RVV register type
21570 if (!RegisterVT.isVector())
21571 continue;
21572
21573 if (RegisterVT.isFixedLengthVector())
21574 RegisterVT = TLI->getContainerForFixedLengthVector(RegisterVT);
21575
21576 if (!FirstVMaskAssigned &&
21577 RegisterVT.getVectorElementType() == MVT::i1) {
21578 RVVArgInfos.push_back({1, RegisterVT, true});
21579 FirstVMaskAssigned = true;
21580 --NumRegs;
21581 }
21582
21583 RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false});
21584 }
21585 }
21586 }
21587}
21588
21589void RVVArgDispatcher::allocatePhysReg(unsigned NF, unsigned LMul,
21590 unsigned StartReg) {
21591 assert((StartReg % LMul) == 0 &&
21592 "Start register number should be multiple of lmul");
21593 const MCPhysReg *VRArrays;
21594 switch (LMul) {
21595 default:
21596 report_fatal_error("Invalid lmul");
21597 case 1:
21598 VRArrays = ArgVRs;
21599 break;
21600 case 2:
21601 VRArrays = ArgVRM2s;
21602 break;
21603 case 4:
21604 VRArrays = ArgVRM4s;
21605 break;
21606 case 8:
21607 VRArrays = ArgVRM8s;
21608 break;
21609 }
21610
21611 for (unsigned i = 0; i < NF; ++i)
21612 if (StartReg)
21613 AllocatedPhysRegs.push_back(VRArrays[(StartReg - 8) / LMul + i]);
21614 else
21615 AllocatedPhysRegs.push_back(MCPhysReg());
21616}
21617
21618/// This function determines if each RVV argument is passed by register, if the
21619/// argument can be assigned to a VR, then give it a specific register.
21620/// Otherwise, assign the argument to 0 which is a invalid MCPhysReg.
21621void RVVArgDispatcher::compute() {
21622 uint32_t AssignedMap = 0;
21623 auto allocate = [&](const RVVArgInfo &ArgInfo) {
21624 // Allocate first vector mask argument to V0.
21625 if (ArgInfo.FirstVMask) {
21626 AllocatedPhysRegs.push_back(RISCV::V0);
21627 return;
21628 }
21629
21630 unsigned RegsNeeded = divideCeil(
21631 ArgInfo.VT.getSizeInBits().getKnownMinValue(), RISCV::RVVBitsPerBlock);
21632 unsigned TotalRegsNeeded = ArgInfo.NF * RegsNeeded;
21633 for (unsigned StartReg = 0; StartReg + TotalRegsNeeded <= NumArgVRs;
21634 StartReg += RegsNeeded) {
21635 uint32_t Map = ((1 << TotalRegsNeeded) - 1) << StartReg;
21636 if ((AssignedMap & Map) == 0) {
21637 allocatePhysReg(ArgInfo.NF, RegsNeeded, StartReg + 8);
21638 AssignedMap |= Map;
21639 return;
21640 }
21641 }
21642
21643 allocatePhysReg(ArgInfo.NF, RegsNeeded, 0);
21644 };
21645
21646 for (unsigned i = 0; i < RVVArgInfos.size(); ++i)
21647 allocate(RVVArgInfos[i]);
21648}
21649
21651 assert(CurIdx < AllocatedPhysRegs.size() && "Index out of range");
21652 return AllocatedPhysRegs[CurIdx++];
21653}
21654
21656
21657#define GET_RISCVVIntrinsicsTable_IMPL
21658#include "RISCVGenSearchableTables.inc"
21659
21660} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:311
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
iterator_range< arg_iterator > args()
Definition: Function.h:842
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
Argument * getArg(unsigned i) const
Definition: Function.h:836
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:529
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1881
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1834
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2494
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:398
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
As per the spec, the rules for passing vector arguments are as follows:
static constexpr unsigned NumArgVRs
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
Class to represent struct types.
Definition: DerivedTypes.h:216
bool containsHomogeneousScalableVectorTypes() const
Returns true if this struct contains homogeneous scalable vector types.
Definition: Type.cpp:435
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:341
Type * getTypeAtIndex(const Value *V) const
Given an index value into the type, return the type of the element.
Definition: Type.cpp:612
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:255
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
Definition: CallingConv.h:268
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1346
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:886
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1195
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ STRICT_LROUND
Definition: ISDOpcodes.h:432
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:587
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ STRICT_LRINT
Definition: ISDOpcodes.h:434
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:592
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:430
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:637
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:435
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:613
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ STRICT_LLROUND
Definition: ISDOpcodes.h:433
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:581
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1413
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, RVVArgDispatcher &RVVDispatcher)
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:428
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1928
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:257
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:79
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Helper struct shared between Function Specialization and SCCP Solver.
Definition: SCCPSolver.h:41
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1030
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:988
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:291
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasDisjoint() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)