LLVM 19.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
143
145 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
146 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
147 "function to use initial-exec"));
148
149STATISTIC(NumTailCalls, "Number of tail calls");
150STATISTIC(NumSiblingCalls, "Number of sibling calls");
151STATISTIC(ShufflesHandledWithVPERM,
152 "Number of shuffles lowered to a VPERM or XXPERM");
153STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
154
155static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
156
157static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
158
159static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
160
161// A faster local-[exec|dynamic] TLS access sequence (enabled with the
162// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
163// variables; consistent with the IBM XL compiler, we apply a max size of
164// slightly under 32KB.
166
167// FIXME: Remove this once the bug has been fixed!
169
171 const PPCSubtarget &STI)
172 : TargetLowering(TM), Subtarget(STI) {
173 // Initialize map that relates the PPC addressing modes to the computed flags
174 // of a load/store instruction. The map is used to determine the optimal
175 // addressing mode when selecting load and stores.
176 initializeAddrModeMap();
177 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
178 // arguments are at least 4/8 bytes aligned.
179 bool isPPC64 = Subtarget.isPPC64();
180 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
184 if (!useSoftFloat()) {
185 if (hasSPE()) {
186 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
187 // EFPU2 APU only supports f32
188 if (!Subtarget.hasEFPU2())
189 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
190 } else {
191 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
192 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
193 }
194 }
195
196 // Match BITREVERSE to customized fast code sequence in the td file.
199
200 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
202
203 // Custom lower inline assembly to check for special registers.
206
207 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
208 for (MVT VT : MVT::integer_valuetypes()) {
211 }
212
213 if (Subtarget.isISA3_0()) {
214 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
215 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
216 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
217 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
218 } else {
219 // No extending loads from f16 or HW conversions back and forth.
220 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
223 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
226 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
227 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
228 }
229
230 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
231
232 // PowerPC has pre-inc load and store's.
243 if (!Subtarget.hasSPE()) {
248 }
249
250 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
251 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
252 for (MVT VT : ScalarIntVTs) {
257 }
258
259 if (Subtarget.useCRBits()) {
261
262 if (isPPC64 || Subtarget.hasFPCVT()) {
265 isPPC64 ? MVT::i64 : MVT::i32);
268 isPPC64 ? MVT::i64 : MVT::i32);
269
272 isPPC64 ? MVT::i64 : MVT::i32);
275 isPPC64 ? MVT::i64 : MVT::i32);
276
279 isPPC64 ? MVT::i64 : MVT::i32);
282 isPPC64 ? MVT::i64 : MVT::i32);
283
286 isPPC64 ? MVT::i64 : MVT::i32);
289 isPPC64 ? MVT::i64 : MVT::i32);
290 } else {
295 }
296
297 // PowerPC does not support direct load/store of condition registers.
300
301 // FIXME: Remove this once the ANDI glue bug is fixed:
302 if (ANDIGlueBug)
304
305 for (MVT VT : MVT::integer_valuetypes()) {
308 setTruncStoreAction(VT, MVT::i1, Expand);
309 }
310
311 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
312 }
313
314 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
315 // PPC (the libcall is not available).
320
321 // We do not currently implement these libm ops for PowerPC.
322 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
323 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
324 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
325 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
327 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
328
329 // PowerPC has no SREM/UREM instructions unless we are on P9
330 // On P9 we may use a hardware instruction to compute the remainder.
331 // When the result of both the remainder and the division is required it is
332 // more efficient to compute the remainder from the result of the division
333 // rather than use the remainder instruction. The instructions are legalized
334 // directly because the DivRemPairsPass performs the transformation at the IR
335 // level.
336 if (Subtarget.isISA3_0()) {
341 } else {
346 }
347
348 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
357
358 // Handle constrained floating-point operations of scalar.
359 // TODO: Handle SPE specific operation.
365
370
371 if (!Subtarget.hasSPE()) {
374 }
375
376 if (Subtarget.hasVSX()) {
379 }
380
381 if (Subtarget.hasFSQRT()) {
384 }
385
386 if (Subtarget.hasFPRND()) {
391
396 }
397
398 // We don't support sin/cos/sqrt/fmod/pow
409
410 // MASS transformation for LLVM intrinsics with replicating fast-math flag
411 // to be consistent to PPCGenScalarMASSEntries pass
412 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
425 }
426
427 if (Subtarget.hasSPE()) {
430 } else {
431 setOperationAction(ISD::FMA , MVT::f64, Legal);
432 setOperationAction(ISD::FMA , MVT::f32, Legal);
433 }
434
435 if (Subtarget.hasSPE())
436 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
437
439
440 // If we're enabling GP optimizations, use hardware square root
441 if (!Subtarget.hasFSQRT() &&
442 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
443 Subtarget.hasFRE()))
445
446 if (!Subtarget.hasFSQRT() &&
447 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
448 Subtarget.hasFRES()))
450
451 if (Subtarget.hasFCPSGN()) {
454 } else {
457 }
458
459 if (Subtarget.hasFPRND()) {
464
469 }
470
471 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
472 // instruction xxbrd to speed up scalar BSWAP64.
473 if (Subtarget.isISA3_1()) {
476 } else {
479 ISD::BSWAP, MVT::i64,
480 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
481 }
482
483 // CTPOP or CTTZ were introduced in P8/P9 respectively
484 if (Subtarget.isISA3_0()) {
485 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
486 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
487 } else {
488 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
489 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
490 }
491
492 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
495 } else {
498 }
499
500 // PowerPC does not have ROTR
503
504 if (!Subtarget.useCRBits()) {
505 // PowerPC does not have Select
510 }
511
512 // PowerPC wants to turn select_cc of FP into fsel when possible.
515
516 // PowerPC wants to optimize integer setcc a bit
517 if (!Subtarget.useCRBits())
519
520 if (Subtarget.hasFPU()) {
524
528 }
529
530 // PowerPC does not have BRCOND which requires SetCC
531 if (!Subtarget.useCRBits())
533
535
536 if (Subtarget.hasSPE()) {
537 // SPE has built-in conversions
544
545 // SPE supports signaling compare of f32/f64.
548 } else {
549 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
552
553 // PowerPC does not have [U|S]INT_TO_FP
558 }
559
560 if (Subtarget.hasDirectMove() && isPPC64) {
565 if (TM.Options.UnsafeFPMath) {
574 }
575 } else {
580 }
581
582 // We cannot sextinreg(i1). Expand to shifts.
584
585 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
586 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
587 // support continuation, user-level threading, and etc.. As a result, no
588 // other SjLj exception interfaces are implemented and please don't build
589 // your own exception handling based on them.
590 // LLVM/Clang supports zero-cost DWARF exception handling.
593
594 // We want to legalize GlobalAddress and ConstantPool nodes into the
595 // appropriate instructions to materialize the address.
606
607 // TRAP is legal.
608 setOperationAction(ISD::TRAP, MVT::Other, Legal);
609
610 // TRAMPOLINE is custom lowered.
613
614 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
616
617 if (Subtarget.is64BitELFABI()) {
618 // VAARG always uses double-word chunks, so promote anything smaller.
620 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
622 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
624 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
626 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
628 } else if (Subtarget.is32BitELFABI()) {
629 // VAARG is custom lowered with the 32-bit SVR4 ABI.
632 } else
634
635 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
636 if (Subtarget.is32BitELFABI())
638 else
640
641 // Use the default implementation.
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651
652 // We want to custom lower some of our intrinsics.
658
659 // To handle counter-based loop conditions.
661
666
667 // Comparisons that require checking two conditions.
668 if (Subtarget.hasSPE()) {
673 }
686
689
690 if (Subtarget.has64BitSupport()) {
691 // They also have instructions for converting between i64 and fp.
700 // This is just the low 32 bits of a (signed) fp->i64 conversion.
701 // We cannot do this with Promote because i64 is not a legal type.
704
705 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
708 }
709 } else {
710 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
711 if (Subtarget.hasSPE()) {
714 } else {
717 }
718 }
719
720 // With the instructions enabled under FPCVT, we can do everything.
721 if (Subtarget.hasFPCVT()) {
722 if (Subtarget.has64BitSupport()) {
731 }
732
741 }
742
743 if (Subtarget.use64BitRegs()) {
744 // 64-bit PowerPC implementations can support i64 types directly
745 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
746 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
748 // 64-bit PowerPC wants to expand i128 shifts itself.
752 } else {
753 // 32-bit PowerPC wants to expand i64 shifts itself.
757 }
758
759 // PowerPC has better expansions for funnel shifts than the generic
760 // TargetLowering::expandFunnelShift.
761 if (Subtarget.has64BitSupport()) {
764 }
767
768 if (Subtarget.hasVSX()) {
773 }
774
775 if (Subtarget.hasAltivec()) {
776 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
781 }
782 // First set operation action for all vector types to expand. Then we
783 // will selectively turn on ones that can be effectively codegen'd.
785 // add/sub are legal for all supported vector VT's.
788
789 // For v2i64, these are only valid with P8Vector. This is corrected after
790 // the loop.
791 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
796 }
797 else {
802 }
803
804 if (Subtarget.hasVSX()) {
807 }
808
809 // Vector instructions introduced in P8
810 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
813 }
814 else {
817 }
818
819 // Vector instructions introduced in P9
820 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
822 else
824
825 // We promote all shuffles to v16i8.
827 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
828
829 // We promote all non-typed operations to v4i32.
831 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
833 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
835 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
837 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
839 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
842 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
844 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
845
846 // No other operations are legal.
885
886 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
887 setTruncStoreAction(VT, InnerVT, Expand);
890 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
891 }
892 }
894 if (!Subtarget.hasP8Vector()) {
895 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
896 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
897 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
898 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
899 }
900
901 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
902 // with merges, splats, etc.
904
905 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
906 // are cheap, so handle them before they get expanded to scalar.
912
913 setOperationAction(ISD::AND , MVT::v4i32, Legal);
914 setOperationAction(ISD::OR , MVT::v4i32, Legal);
915 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
916 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
918 Subtarget.useCRBits() ? Legal : Expand);
919 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
929 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
932
933 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
934 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
935 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
936 if (Subtarget.hasAltivec())
937 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
939 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
940 if (Subtarget.hasP8Altivec())
941 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
942
943 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
944 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
945 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
946 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
947
948 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
949 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
950
951 if (Subtarget.hasVSX()) {
952 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
953 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
955 }
956
957 if (Subtarget.hasP8Altivec())
958 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
959 else
960 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
961
962 if (Subtarget.isISA3_1()) {
963 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
964 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
965 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
966 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
967 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
968 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
969 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
970 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
971 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
972 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
973 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
974 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
975 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
976 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
977 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
978 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
979 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
980 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
981 }
982
983 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
984 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
985
988
993
994 // Altivec does not contain unordered floating-point compare instructions
995 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
997 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
999
1000 if (Subtarget.hasVSX()) {
1003 if (Subtarget.hasP8Vector()) {
1006 }
1007 if (Subtarget.hasDirectMove() && isPPC64) {
1016 }
1018
1019 // The nearbyint variants are not allowed to raise the inexact exception
1020 // so we can only code-gen them with unsafe math.
1021 if (TM.Options.UnsafeFPMath) {
1024 }
1025
1026 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1027 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1028 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1030 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1031 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1034
1036 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1037 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1040
1041 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1042 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1043
1044 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1046
1047 // Share the Altivec comparison restrictions.
1048 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1049 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1050 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1051 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1052
1053 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1054 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1055
1057
1058 if (Subtarget.hasP8Vector())
1059 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1060
1061 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1062
1063 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1064 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1065 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1066
1067 if (Subtarget.hasP8Altivec()) {
1068 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1069 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1070 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1071
1072 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1073 // SRL, but not for SRA because of the instructions available:
1074 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1075 // doing
1076 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1077 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1078 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1079
1080 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1081 }
1082 else {
1083 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1084 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1085 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1086
1087 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1088
1089 // VSX v2i64 only supports non-arithmetic operations.
1090 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1091 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1092 }
1093
1094 if (Subtarget.isISA3_1())
1095 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1096 else
1097 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1098
1099 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1102 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1103
1105
1114
1115 // Custom handling for partial vectors of integers converted to
1116 // floating point. We already have optimal handling for v2i32 through
1117 // the DAG combine, so those aren't necessary.
1134
1135 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1136 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1137 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1138 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1141
1144
1145 // Handle constrained floating-point operations of vector.
1146 // The predictor is `hasVSX` because altivec instruction has
1147 // no exception but VSX vector instruction has.
1161
1175
1176 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1177 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1178
1179 for (MVT FPT : MVT::fp_valuetypes())
1180 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1181
1182 // Expand the SELECT to SELECT_CC
1184
1185 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1186 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1187
1188 // No implementation for these ops for PowerPC.
1190 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1191 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1192 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1194 setOperationAction(ISD::FREM, MVT::f128, Expand);
1195 }
1196
1197 if (Subtarget.hasP8Altivec()) {
1198 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1199 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1200 }
1201
1202 if (Subtarget.hasP9Vector()) {
1205
1206 // Test data class instructions store results in CR bits.
1207 if (Subtarget.useCRBits()) {
1211 }
1212
1213 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1214 // SRL, but not for SRA because of the instructions available:
1215 // VS{RL} and VS{RL}O.
1216 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1217 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1218 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1219
1220 setOperationAction(ISD::FADD, MVT::f128, Legal);
1221 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1222 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1223 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1225
1226 setOperationAction(ISD::FMA, MVT::f128, Legal);
1233
1235 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1237 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1240
1244
1245 // Handle constrained floating-point operations of fp128
1262 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1263 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1264 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1265 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1266 } else if (Subtarget.hasVSX()) {
1269
1270 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1271 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1272
1273 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1274 // fp_to_uint and int_to_fp.
1277
1278 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1279 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1280 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1281 setOperationAction(ISD::FABS, MVT::f128, Expand);
1283 setOperationAction(ISD::FMA, MVT::f128, Expand);
1285
1286 // Expand the fp_extend if the target type is fp128.
1289
1290 // Expand the fp_round if the source type is fp128.
1291 for (MVT VT : {MVT::f32, MVT::f64}) {
1294 }
1295
1300
1301 // Lower following f128 select_cc pattern:
1302 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1304
1305 // We need to handle f128 SELECT_CC with integer result type.
1307 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1308 }
1309
1310 if (Subtarget.hasP9Altivec()) {
1311 if (Subtarget.isISA3_1()) {
1316 } else {
1319 }
1327
1328 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1329 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1330 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1331 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1332 }
1333
1334 if (Subtarget.hasP10Vector()) {
1336 }
1337 }
1338
1339 if (Subtarget.pairedVectorMemops()) {
1340 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1341 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1342 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1343 }
1344 if (Subtarget.hasMMA()) {
1345 if (Subtarget.isISAFuture())
1346 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1347 else
1348 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1349 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1350 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1352 }
1353
1354 if (Subtarget.has64BitSupport())
1356
1357 if (Subtarget.isISA3_1())
1358 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1359
1360 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1361
1362 if (!isPPC64) {
1365 }
1366
1371 }
1372
1374
1375 if (Subtarget.hasAltivec()) {
1376 // Altivec instructions set fields to all zeros or all ones.
1378 }
1379
1380 setLibcallName(RTLIB::MULO_I128, nullptr);
1381 if (!isPPC64) {
1382 // These libcalls are not available in 32-bit.
1383 setLibcallName(RTLIB::SHL_I128, nullptr);
1384 setLibcallName(RTLIB::SRL_I128, nullptr);
1385 setLibcallName(RTLIB::SRA_I128, nullptr);
1386 setLibcallName(RTLIB::MUL_I128, nullptr);
1387 setLibcallName(RTLIB::MULO_I64, nullptr);
1388 }
1389
1392 else if (isPPC64)
1394 else
1396
1397 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1398
1399 // We have target-specific dag combine patterns for the following nodes:
1402 if (Subtarget.hasFPCVT())
1405 if (Subtarget.useCRBits())
1409
1411
1413
1414 if (Subtarget.useCRBits()) {
1416 }
1417
1418 setLibcallName(RTLIB::LOG_F128, "logf128");
1419 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1420 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1421 setLibcallName(RTLIB::EXP_F128, "expf128");
1422 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1423 setLibcallName(RTLIB::SIN_F128, "sinf128");
1424 setLibcallName(RTLIB::COS_F128, "cosf128");
1425 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1426 setLibcallName(RTLIB::POW_F128, "powf128");
1427 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1428 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1429 setLibcallName(RTLIB::REM_F128, "fmodf128");
1430 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1431 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1432 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1433 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1434 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1435 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1436 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1437 setLibcallName(RTLIB::RINT_F128, "rintf128");
1438 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1439 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1440 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1441 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1442 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1443
1444 if (Subtarget.isAIXABI()) {
1445 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1446 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1447 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1448 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1449 }
1450
1451 // With 32 condition bits, we don't need to sink (and duplicate) compares
1452 // aggressively in CodeGenPrep.
1453 if (Subtarget.useCRBits()) {
1456 }
1457
1458 // TODO: The default entry number is set to 64. This stops most jump table
1459 // generation on PPC. But it is good for current PPC HWs because the indirect
1460 // branch instruction mtctr to the jump table may lead to bad branch predict.
1461 // Re-evaluate this value on future HWs that can do better with mtctr.
1463
1465
1466 switch (Subtarget.getCPUDirective()) {
1467 default: break;
1468 case PPC::DIR_970:
1469 case PPC::DIR_A2:
1470 case PPC::DIR_E500:
1471 case PPC::DIR_E500mc:
1472 case PPC::DIR_E5500:
1473 case PPC::DIR_PWR4:
1474 case PPC::DIR_PWR5:
1475 case PPC::DIR_PWR5X:
1476 case PPC::DIR_PWR6:
1477 case PPC::DIR_PWR6X:
1478 case PPC::DIR_PWR7:
1479 case PPC::DIR_PWR8:
1480 case PPC::DIR_PWR9:
1481 case PPC::DIR_PWR10:
1485 break;
1486 }
1487
1488 if (Subtarget.enableMachineScheduler())
1490 else
1492
1494
1495 // The Freescale cores do better with aggressive inlining of memcpy and
1496 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1497 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1498 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1499 MaxStoresPerMemset = 32;
1501 MaxStoresPerMemcpy = 32;
1505 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1506 // The A2 also benefits from (very) aggressive inlining of memcpy and
1507 // friends. The overhead of a the function call, even when warm, can be
1508 // over one hundred cycles.
1509 MaxStoresPerMemset = 128;
1510 MaxStoresPerMemcpy = 128;
1511 MaxStoresPerMemmove = 128;
1512 MaxLoadsPerMemcmp = 128;
1513 } else {
1516 }
1517
1518 IsStrictFPEnabled = true;
1519
1520 // Let the subtarget (CPU) decide if a predictable select is more expensive
1521 // than the corresponding branch. This information is used in CGP to decide
1522 // when to convert selects into branches.
1524
1526}
1527
1528// *********************************** NOTE ************************************
1529// For selecting load and store instructions, the addressing modes are defined
1530// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1531// patterns to match the load the store instructions.
1532//
1533// The TD definitions for the addressing modes correspond to their respective
1534// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1535// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1536// address mode flags of a particular node. Afterwards, the computed address
1537// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1538// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1539// accordingly, based on the preferred addressing mode.
1540//
1541// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1542// MemOpFlags contains all the possible flags that can be used to compute the
1543// optimal addressing mode for load and store instructions.
1544// AddrMode contains all the possible load and store addressing modes available
1545// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1546//
1547// When adding new load and store instructions, it is possible that new address
1548// flags may need to be added into MemOpFlags, and a new addressing mode will
1549// need to be added to AddrMode. An entry of the new addressing mode (consisting
1550// of the minimal and main distinguishing address flags for the new load/store
1551// instructions) will need to be added into initializeAddrModeMap() below.
1552// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1553// need to be updated to account for selecting the optimal addressing mode.
1554// *****************************************************************************
1555/// Initialize the map that relates the different addressing modes of the load
1556/// and store instructions to a set of flags. This ensures the load/store
1557/// instruction is correctly matched during instruction selection.
1558void PPCTargetLowering::initializeAddrModeMap() {
1559 AddrModesMap[PPC::AM_DForm] = {
1560 // LWZ, STW
1565 // LBZ, LHZ, STB, STH
1570 // LHA
1575 // LFS, LFD, STFS, STFD
1580 };
1581 AddrModesMap[PPC::AM_DSForm] = {
1582 // LWA
1586 // LD, STD
1590 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1594 };
1595 AddrModesMap[PPC::AM_DQForm] = {
1596 // LXV, STXV
1600 };
1601 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1603 // TODO: Add mapping for quadword load/store.
1604}
1605
1606/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1607/// the desired ByVal argument alignment.
1608static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1609 if (MaxAlign == MaxMaxAlign)
1610 return;
1611 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1612 if (MaxMaxAlign >= 32 &&
1613 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1614 MaxAlign = Align(32);
1615 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1616 MaxAlign < 16)
1617 MaxAlign = Align(16);
1618 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1619 Align EltAlign;
1620 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1621 if (EltAlign > MaxAlign)
1622 MaxAlign = EltAlign;
1623 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1624 for (auto *EltTy : STy->elements()) {
1625 Align EltAlign;
1626 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1627 if (EltAlign > MaxAlign)
1628 MaxAlign = EltAlign;
1629 if (MaxAlign == MaxMaxAlign)
1630 break;
1631 }
1632 }
1633}
1634
1635/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1636/// function arguments in the caller parameter area.
1638 const DataLayout &DL) const {
1639 // 16byte and wider vectors are passed on 16byte boundary.
1640 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1641 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1642 if (Subtarget.hasAltivec())
1643 getMaxByValAlign(Ty, Alignment, Align(16));
1644 return Alignment.value();
1645}
1646
1648 return Subtarget.useSoftFloat();
1649}
1650
1652 return Subtarget.hasSPE();
1653}
1654
1656 return VT.isScalarInteger();
1657}
1658
1660 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1661 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1662 return false;
1663
1664 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1665 if (VTy->getScalarType()->isIntegerTy()) {
1666 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1667 if (ElemSizeInBits == 32) {
1668 Index = Subtarget.isLittleEndian() ? 2 : 1;
1669 return true;
1670 }
1671 if (ElemSizeInBits == 64) {
1672 Index = Subtarget.isLittleEndian() ? 1 : 0;
1673 return true;
1674 }
1675 }
1676 }
1677 return false;
1678}
1679
1680const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1681 switch ((PPCISD::NodeType)Opcode) {
1682 case PPCISD::FIRST_NUMBER: break;
1683 case PPCISD::FSEL: return "PPCISD::FSEL";
1684 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1685 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1686 case PPCISD::FCFID: return "PPCISD::FCFID";
1687 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1688 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1689 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1690 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1691 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1692 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1693 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1694 case PPCISD::FRE: return "PPCISD::FRE";
1695 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1696 case PPCISD::FTSQRT:
1697 return "PPCISD::FTSQRT";
1698 case PPCISD::FSQRT:
1699 return "PPCISD::FSQRT";
1700 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1701 case PPCISD::VPERM: return "PPCISD::VPERM";
1702 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1704 return "PPCISD::XXSPLTI_SP_TO_DP";
1706 return "PPCISD::XXSPLTI32DX";
1707 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1708 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1709 case PPCISD::XXPERM:
1710 return "PPCISD::XXPERM";
1711 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1712 case PPCISD::CMPB: return "PPCISD::CMPB";
1713 case PPCISD::Hi: return "PPCISD::Hi";
1714 case PPCISD::Lo: return "PPCISD::Lo";
1715 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1716 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1717 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1718 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1719 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1720 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1721 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1722 case PPCISD::SRL: return "PPCISD::SRL";
1723 case PPCISD::SRA: return "PPCISD::SRA";
1724 case PPCISD::SHL: return "PPCISD::SHL";
1725 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1726 case PPCISD::CALL: return "PPCISD::CALL";
1727 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1728 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1729 case PPCISD::CALL_RM:
1730 return "PPCISD::CALL_RM";
1732 return "PPCISD::CALL_NOP_RM";
1734 return "PPCISD::CALL_NOTOC_RM";
1735 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1736 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1737 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1738 case PPCISD::BCTRL_RM:
1739 return "PPCISD::BCTRL_RM";
1741 return "PPCISD::BCTRL_LOAD_TOC_RM";
1742 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1743 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1744 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1745 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1746 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1747 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1748 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1749 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1750 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1751 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1753 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1755 return "PPCISD::ANDI_rec_1_EQ_BIT";
1757 return "PPCISD::ANDI_rec_1_GT_BIT";
1758 case PPCISD::VCMP: return "PPCISD::VCMP";
1759 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1760 case PPCISD::LBRX: return "PPCISD::LBRX";
1761 case PPCISD::STBRX: return "PPCISD::STBRX";
1762 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1763 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1764 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1765 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1766 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1767 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1768 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1769 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1770 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1772 return "PPCISD::ST_VSR_SCAL_INT";
1773 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1774 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1775 case PPCISD::BDZ: return "PPCISD::BDZ";
1776 case PPCISD::MFFS: return "PPCISD::MFFS";
1777 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1778 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1779 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1780 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1781 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1782 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1783 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1784 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1785 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1786 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1787 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1788 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1789 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1790 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1791 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1792 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1793 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1794 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1795 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1796 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1797 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1798 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1799 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1801 return "PPCISD::PADDI_DTPREL";
1802 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1803 case PPCISD::SC: return "PPCISD::SC";
1804 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1805 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1806 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1807 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1808 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1809 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1810 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1811 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1812 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1813 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1814 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1815 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1817 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1819 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1820 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1821 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1822 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1823 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1824 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1825 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1826 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1827 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1829 return "PPCISD::STRICT_FADDRTZ";
1831 return "PPCISD::STRICT_FCTIDZ";
1833 return "PPCISD::STRICT_FCTIWZ";
1835 return "PPCISD::STRICT_FCTIDUZ";
1837 return "PPCISD::STRICT_FCTIWUZ";
1839 return "PPCISD::STRICT_FCFID";
1841 return "PPCISD::STRICT_FCFIDU";
1843 return "PPCISD::STRICT_FCFIDS";
1845 return "PPCISD::STRICT_FCFIDUS";
1846 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1847 case PPCISD::STORE_COND:
1848 return "PPCISD::STORE_COND";
1849 }
1850 return nullptr;
1851}
1852
1854 EVT VT) const {
1855 if (!VT.isVector())
1856 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1857
1859}
1860
1862 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1863 return true;
1864}
1865
1866//===----------------------------------------------------------------------===//
1867// Node matching predicates, for use by the tblgen matching code.
1868//===----------------------------------------------------------------------===//
1869
1870/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1872 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1873 return CFP->getValueAPF().isZero();
1874 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1875 // Maybe this has already been legalized into the constant pool?
1876 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1877 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1878 return CFP->getValueAPF().isZero();
1879 }
1880 return false;
1881}
1882
1883/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1884/// true if Op is undef or if it matches the specified value.
1885static bool isConstantOrUndef(int Op, int Val) {
1886 return Op < 0 || Op == Val;
1887}
1888
1889/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1890/// VPKUHUM instruction.
1891/// The ShuffleKind distinguishes between big-endian operations with
1892/// two different inputs (0), either-endian operations with two identical
1893/// inputs (1), and little-endian operations with two different inputs (2).
1894/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1896 SelectionDAG &DAG) {
1897 bool IsLE = DAG.getDataLayout().isLittleEndian();
1898 if (ShuffleKind == 0) {
1899 if (IsLE)
1900 return false;
1901 for (unsigned i = 0; i != 16; ++i)
1902 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1903 return false;
1904 } else if (ShuffleKind == 2) {
1905 if (!IsLE)
1906 return false;
1907 for (unsigned i = 0; i != 16; ++i)
1908 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1909 return false;
1910 } else if (ShuffleKind == 1) {
1911 unsigned j = IsLE ? 0 : 1;
1912 for (unsigned i = 0; i != 8; ++i)
1913 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1914 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1915 return false;
1916 }
1917 return true;
1918}
1919
1920/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1921/// VPKUWUM instruction.
1922/// The ShuffleKind distinguishes between big-endian operations with
1923/// two different inputs (0), either-endian operations with two identical
1924/// inputs (1), and little-endian operations with two different inputs (2).
1925/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1927 SelectionDAG &DAG) {
1928 bool IsLE = DAG.getDataLayout().isLittleEndian();
1929 if (ShuffleKind == 0) {
1930 if (IsLE)
1931 return false;
1932 for (unsigned i = 0; i != 16; i += 2)
1933 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1934 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1935 return false;
1936 } else if (ShuffleKind == 2) {
1937 if (!IsLE)
1938 return false;
1939 for (unsigned i = 0; i != 16; i += 2)
1940 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1941 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1942 return false;
1943 } else if (ShuffleKind == 1) {
1944 unsigned j = IsLE ? 0 : 2;
1945 for (unsigned i = 0; i != 8; i += 2)
1946 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1947 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1948 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1949 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1950 return false;
1951 }
1952 return true;
1953}
1954
1955/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1956/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1957/// current subtarget.
1958///
1959/// The ShuffleKind distinguishes between big-endian operations with
1960/// two different inputs (0), either-endian operations with two identical
1961/// inputs (1), and little-endian operations with two different inputs (2).
1962/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1964 SelectionDAG &DAG) {
1965 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1966 if (!Subtarget.hasP8Vector())
1967 return false;
1968
1969 bool IsLE = DAG.getDataLayout().isLittleEndian();
1970 if (ShuffleKind == 0) {
1971 if (IsLE)
1972 return false;
1973 for (unsigned i = 0; i != 16; i += 4)
1974 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1975 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1976 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1977 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1978 return false;
1979 } else if (ShuffleKind == 2) {
1980 if (!IsLE)
1981 return false;
1982 for (unsigned i = 0; i != 16; i += 4)
1983 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1984 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1985 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1986 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1987 return false;
1988 } else if (ShuffleKind == 1) {
1989 unsigned j = IsLE ? 0 : 4;
1990 for (unsigned i = 0; i != 8; i += 4)
1991 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1992 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1993 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1994 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1995 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1996 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1997 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1998 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1999 return false;
2000 }
2001 return true;
2002}
2003
2004/// isVMerge - Common function, used to match vmrg* shuffles.
2005///
2006static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2007 unsigned LHSStart, unsigned RHSStart) {
2008 if (N->getValueType(0) != MVT::v16i8)
2009 return false;
2010 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2011 "Unsupported merge size!");
2012
2013 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2014 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2015 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2016 LHSStart+j+i*UnitSize) ||
2017 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2018 RHSStart+j+i*UnitSize))
2019 return false;
2020 }
2021 return true;
2022}
2023
2024/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2025/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2026/// The ShuffleKind distinguishes between big-endian merges with two
2027/// different inputs (0), either-endian merges with two identical inputs (1),
2028/// and little-endian merges with two different inputs (2). For the latter,
2029/// the input operands are swapped (see PPCInstrAltivec.td).
2031 unsigned ShuffleKind, SelectionDAG &DAG) {
2032 if (DAG.getDataLayout().isLittleEndian()) {
2033 if (ShuffleKind == 1) // unary
2034 return isVMerge(N, UnitSize, 0, 0);
2035 else if (ShuffleKind == 2) // swapped
2036 return isVMerge(N, UnitSize, 0, 16);
2037 else
2038 return false;
2039 } else {
2040 if (ShuffleKind == 1) // unary
2041 return isVMerge(N, UnitSize, 8, 8);
2042 else if (ShuffleKind == 0) // normal
2043 return isVMerge(N, UnitSize, 8, 24);
2044 else
2045 return false;
2046 }
2047}
2048
2049/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2050/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2051/// The ShuffleKind distinguishes between big-endian merges with two
2052/// different inputs (0), either-endian merges with two identical inputs (1),
2053/// and little-endian merges with two different inputs (2). For the latter,
2054/// the input operands are swapped (see PPCInstrAltivec.td).
2056 unsigned ShuffleKind, SelectionDAG &DAG) {
2057 if (DAG.getDataLayout().isLittleEndian()) {
2058 if (ShuffleKind == 1) // unary
2059 return isVMerge(N, UnitSize, 8, 8);
2060 else if (ShuffleKind == 2) // swapped
2061 return isVMerge(N, UnitSize, 8, 24);
2062 else
2063 return false;
2064 } else {
2065 if (ShuffleKind == 1) // unary
2066 return isVMerge(N, UnitSize, 0, 0);
2067 else if (ShuffleKind == 0) // normal
2068 return isVMerge(N, UnitSize, 0, 16);
2069 else
2070 return false;
2071 }
2072}
2073
2074/**
2075 * Common function used to match vmrgew and vmrgow shuffles
2076 *
2077 * The indexOffset determines whether to look for even or odd words in
2078 * the shuffle mask. This is based on the of the endianness of the target
2079 * machine.
2080 * - Little Endian:
2081 * - Use offset of 0 to check for odd elements
2082 * - Use offset of 4 to check for even elements
2083 * - Big Endian:
2084 * - Use offset of 0 to check for even elements
2085 * - Use offset of 4 to check for odd elements
2086 * A detailed description of the vector element ordering for little endian and
2087 * big endian can be found at
2088 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2089 * Targeting your applications - what little endian and big endian IBM XL C/C++
2090 * compiler differences mean to you
2091 *
2092 * The mask to the shuffle vector instruction specifies the indices of the
2093 * elements from the two input vectors to place in the result. The elements are
2094 * numbered in array-access order, starting with the first vector. These vectors
2095 * are always of type v16i8, thus each vector will contain 16 elements of size
2096 * 8. More info on the shuffle vector can be found in the
2097 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2098 * Language Reference.
2099 *
2100 * The RHSStartValue indicates whether the same input vectors are used (unary)
2101 * or two different input vectors are used, based on the following:
2102 * - If the instruction uses the same vector for both inputs, the range of the
2103 * indices will be 0 to 15. In this case, the RHSStart value passed should
2104 * be 0.
2105 * - If the instruction has two different vectors then the range of the
2106 * indices will be 0 to 31. In this case, the RHSStart value passed should
2107 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2108 * to 31 specify elements in the second vector).
2109 *
2110 * \param[in] N The shuffle vector SD Node to analyze
2111 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2112 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2113 * vector to the shuffle_vector instruction
2114 * \return true iff this shuffle vector represents an even or odd word merge
2115 */
2116static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2117 unsigned RHSStartValue) {
2118 if (N->getValueType(0) != MVT::v16i8)
2119 return false;
2120
2121 for (unsigned i = 0; i < 2; ++i)
2122 for (unsigned j = 0; j < 4; ++j)
2123 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2124 i*RHSStartValue+j+IndexOffset) ||
2125 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2126 i*RHSStartValue+j+IndexOffset+8))
2127 return false;
2128 return true;
2129}
2130
2131/**
2132 * Determine if the specified shuffle mask is suitable for the vmrgew or
2133 * vmrgow instructions.
2134 *
2135 * \param[in] N The shuffle vector SD Node to analyze
2136 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2137 * \param[in] ShuffleKind Identify the type of merge:
2138 * - 0 = big-endian merge with two different inputs;
2139 * - 1 = either-endian merge with two identical inputs;
2140 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2141 * little-endian merges).
2142 * \param[in] DAG The current SelectionDAG
2143 * \return true iff this shuffle mask
2144 */
2146 unsigned ShuffleKind, SelectionDAG &DAG) {
2147 if (DAG.getDataLayout().isLittleEndian()) {
2148 unsigned indexOffset = CheckEven ? 4 : 0;
2149 if (ShuffleKind == 1) // Unary
2150 return isVMerge(N, indexOffset, 0);
2151 else if (ShuffleKind == 2) // swapped
2152 return isVMerge(N, indexOffset, 16);
2153 else
2154 return false;
2155 }
2156 else {
2157 unsigned indexOffset = CheckEven ? 0 : 4;
2158 if (ShuffleKind == 1) // Unary
2159 return isVMerge(N, indexOffset, 0);
2160 else if (ShuffleKind == 0) // Normal
2161 return isVMerge(N, indexOffset, 16);
2162 else
2163 return false;
2164 }
2165 return false;
2166}
2167
2168/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2169/// amount, otherwise return -1.
2170/// The ShuffleKind distinguishes between big-endian operations with two
2171/// different inputs (0), either-endian operations with two identical inputs
2172/// (1), and little-endian operations with two different inputs (2). For the
2173/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2174int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2175 SelectionDAG &DAG) {
2176 if (N->getValueType(0) != MVT::v16i8)
2177 return -1;
2178
2179 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2180
2181 // Find the first non-undef value in the shuffle mask.
2182 unsigned i;
2183 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2184 /*search*/;
2185
2186 if (i == 16) return -1; // all undef.
2187
2188 // Otherwise, check to see if the rest of the elements are consecutively
2189 // numbered from this value.
2190 unsigned ShiftAmt = SVOp->getMaskElt(i);
2191 if (ShiftAmt < i) return -1;
2192
2193 ShiftAmt -= i;
2194 bool isLE = DAG.getDataLayout().isLittleEndian();
2195
2196 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2197 // Check the rest of the elements to see if they are consecutive.
2198 for (++i; i != 16; ++i)
2199 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2200 return -1;
2201 } else if (ShuffleKind == 1) {
2202 // Check the rest of the elements to see if they are consecutive.
2203 for (++i; i != 16; ++i)
2204 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2205 return -1;
2206 } else
2207 return -1;
2208
2209 if (isLE)
2210 ShiftAmt = 16 - ShiftAmt;
2211
2212 return ShiftAmt;
2213}
2214
2215/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2216/// specifies a splat of a single element that is suitable for input to
2217/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2219 EVT VT = N->getValueType(0);
2220 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2221 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2222
2223 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2224 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2225
2226 // The consecutive indices need to specify an element, not part of two
2227 // different elements. So abandon ship early if this isn't the case.
2228 if (N->getMaskElt(0) % EltSize != 0)
2229 return false;
2230
2231 // This is a splat operation if each element of the permute is the same, and
2232 // if the value doesn't reference the second vector.
2233 unsigned ElementBase = N->getMaskElt(0);
2234
2235 // FIXME: Handle UNDEF elements too!
2236 if (ElementBase >= 16)
2237 return false;
2238
2239 // Check that the indices are consecutive, in the case of a multi-byte element
2240 // splatted with a v16i8 mask.
2241 for (unsigned i = 1; i != EltSize; ++i)
2242 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2243 return false;
2244
2245 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2246 if (N->getMaskElt(i) < 0) continue;
2247 for (unsigned j = 0; j != EltSize; ++j)
2248 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2249 return false;
2250 }
2251 return true;
2252}
2253
2254/// Check that the mask is shuffling N byte elements. Within each N byte
2255/// element of the mask, the indices could be either in increasing or
2256/// decreasing order as long as they are consecutive.
2257/// \param[in] N the shuffle vector SD Node to analyze
2258/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2259/// Word/DoubleWord/QuadWord).
2260/// \param[in] StepLen the delta indices number among the N byte element, if
2261/// the mask is in increasing/decreasing order then it is 1/-1.
2262/// \return true iff the mask is shuffling N byte elements.
2263static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2264 int StepLen) {
2265 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2266 "Unexpected element width.");
2267 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2268
2269 unsigned NumOfElem = 16 / Width;
2270 unsigned MaskVal[16]; // Width is never greater than 16
2271 for (unsigned i = 0; i < NumOfElem; ++i) {
2272 MaskVal[0] = N->getMaskElt(i * Width);
2273 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2274 return false;
2275 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2276 return false;
2277 }
2278
2279 for (unsigned int j = 1; j < Width; ++j) {
2280 MaskVal[j] = N->getMaskElt(i * Width + j);
2281 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2282 return false;
2283 }
2284 }
2285 }
2286
2287 return true;
2288}
2289
2290bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2291 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2292 if (!isNByteElemShuffleMask(N, 4, 1))
2293 return false;
2294
2295 // Now we look at mask elements 0,4,8,12
2296 unsigned M0 = N->getMaskElt(0) / 4;
2297 unsigned M1 = N->getMaskElt(4) / 4;
2298 unsigned M2 = N->getMaskElt(8) / 4;
2299 unsigned M3 = N->getMaskElt(12) / 4;
2300 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2301 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2302
2303 // Below, let H and L be arbitrary elements of the shuffle mask
2304 // where H is in the range [4,7] and L is in the range [0,3].
2305 // H, 1, 2, 3 or L, 5, 6, 7
2306 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2307 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2308 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2309 InsertAtByte = IsLE ? 12 : 0;
2310 Swap = M0 < 4;
2311 return true;
2312 }
2313 // 0, H, 2, 3 or 4, L, 6, 7
2314 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2315 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2316 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2317 InsertAtByte = IsLE ? 8 : 4;
2318 Swap = M1 < 4;
2319 return true;
2320 }
2321 // 0, 1, H, 3 or 4, 5, L, 7
2322 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2323 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2324 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2325 InsertAtByte = IsLE ? 4 : 8;
2326 Swap = M2 < 4;
2327 return true;
2328 }
2329 // 0, 1, 2, H or 4, 5, 6, L
2330 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2331 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2332 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2333 InsertAtByte = IsLE ? 0 : 12;
2334 Swap = M3 < 4;
2335 return true;
2336 }
2337
2338 // If both vector operands for the shuffle are the same vector, the mask will
2339 // contain only elements from the first one and the second one will be undef.
2340 if (N->getOperand(1).isUndef()) {
2341 ShiftElts = 0;
2342 Swap = true;
2343 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2344 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2345 InsertAtByte = IsLE ? 12 : 0;
2346 return true;
2347 }
2348 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2349 InsertAtByte = IsLE ? 8 : 4;
2350 return true;
2351 }
2352 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2353 InsertAtByte = IsLE ? 4 : 8;
2354 return true;
2355 }
2356 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2357 InsertAtByte = IsLE ? 0 : 12;
2358 return true;
2359 }
2360 }
2361
2362 return false;
2363}
2364
2366 bool &Swap, bool IsLE) {
2367 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2368 // Ensure each byte index of the word is consecutive.
2369 if (!isNByteElemShuffleMask(N, 4, 1))
2370 return false;
2371
2372 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2373 unsigned M0 = N->getMaskElt(0) / 4;
2374 unsigned M1 = N->getMaskElt(4) / 4;
2375 unsigned M2 = N->getMaskElt(8) / 4;
2376 unsigned M3 = N->getMaskElt(12) / 4;
2377
2378 // If both vector operands for the shuffle are the same vector, the mask will
2379 // contain only elements from the first one and the second one will be undef.
2380 if (N->getOperand(1).isUndef()) {
2381 assert(M0 < 4 && "Indexing into an undef vector?");
2382 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2383 return false;
2384
2385 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2386 Swap = false;
2387 return true;
2388 }
2389
2390 // Ensure each word index of the ShuffleVector Mask is consecutive.
2391 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2392 return false;
2393
2394 if (IsLE) {
2395 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2396 // Input vectors don't need to be swapped if the leading element
2397 // of the result is one of the 3 left elements of the second vector
2398 // (or if there is no shift to be done at all).
2399 Swap = false;
2400 ShiftElts = (8 - M0) % 8;
2401 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2402 // Input vectors need to be swapped if the leading element
2403 // of the result is one of the 3 left elements of the first vector
2404 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2405 Swap = true;
2406 ShiftElts = (4 - M0) % 4;
2407 }
2408
2409 return true;
2410 } else { // BE
2411 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2412 // Input vectors don't need to be swapped if the leading element
2413 // of the result is one of the 4 elements of the first vector.
2414 Swap = false;
2415 ShiftElts = M0;
2416 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2417 // Input vectors need to be swapped if the leading element
2418 // of the result is one of the 4 elements of the right vector.
2419 Swap = true;
2420 ShiftElts = M0 - 4;
2421 }
2422
2423 return true;
2424 }
2425}
2426
2428 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2429
2430 if (!isNByteElemShuffleMask(N, Width, -1))
2431 return false;
2432
2433 for (int i = 0; i < 16; i += Width)
2434 if (N->getMaskElt(i) != i + Width - 1)
2435 return false;
2436
2437 return true;
2438}
2439
2441 return isXXBRShuffleMaskHelper(N, 2);
2442}
2443
2445 return isXXBRShuffleMaskHelper(N, 4);
2446}
2447
2449 return isXXBRShuffleMaskHelper(N, 8);
2450}
2451
2453 return isXXBRShuffleMaskHelper(N, 16);
2454}
2455
2456/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2457/// if the inputs to the instruction should be swapped and set \p DM to the
2458/// value for the immediate.
2459/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2460/// AND element 0 of the result comes from the first input (LE) or second input
2461/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2462/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2463/// mask.
2465 bool &Swap, bool IsLE) {
2466 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2467
2468 // Ensure each byte index of the double word is consecutive.
2469 if (!isNByteElemShuffleMask(N, 8, 1))
2470 return false;
2471
2472 unsigned M0 = N->getMaskElt(0) / 8;
2473 unsigned M1 = N->getMaskElt(8) / 8;
2474 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2475
2476 // If both vector operands for the shuffle are the same vector, the mask will
2477 // contain only elements from the first one and the second one will be undef.
2478 if (N->getOperand(1).isUndef()) {
2479 if ((M0 | M1) < 2) {
2480 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2481 Swap = false;
2482 return true;
2483 } else
2484 return false;
2485 }
2486
2487 if (IsLE) {
2488 if (M0 > 1 && M1 < 2) {
2489 Swap = false;
2490 } else if (M0 < 2 && M1 > 1) {
2491 M0 = (M0 + 2) % 4;
2492 M1 = (M1 + 2) % 4;
2493 Swap = true;
2494 } else
2495 return false;
2496
2497 // Note: if control flow comes here that means Swap is already set above
2498 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2499 return true;
2500 } else { // BE
2501 if (M0 < 2 && M1 > 1) {
2502 Swap = false;
2503 } else if (M0 > 1 && M1 < 2) {
2504 M0 = (M0 + 2) % 4;
2505 M1 = (M1 + 2) % 4;
2506 Swap = true;
2507 } else
2508 return false;
2509
2510 // Note: if control flow comes here that means Swap is already set above
2511 DM = (M0 << 1) + (M1 & 1);
2512 return true;
2513 }
2514}
2515
2516
2517/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2518/// appropriate for PPC mnemonics (which have a big endian bias - namely
2519/// elements are counted from the left of the vector register).
2520unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2521 SelectionDAG &DAG) {
2522 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2523 assert(isSplatShuffleMask(SVOp, EltSize));
2524 EVT VT = SVOp->getValueType(0);
2525
2526 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2527 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2528 : SVOp->getMaskElt(0);
2529
2530 if (DAG.getDataLayout().isLittleEndian())
2531 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2532 else
2533 return SVOp->getMaskElt(0) / EltSize;
2534}
2535
2536/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2537/// by using a vspltis[bhw] instruction of the specified element size, return
2538/// the constant being splatted. The ByteSize field indicates the number of
2539/// bytes of each element [124] -> [bhw].
2541 SDValue OpVal;
2542
2543 // If ByteSize of the splat is bigger than the element size of the
2544 // build_vector, then we have a case where we are checking for a splat where
2545 // multiple elements of the buildvector are folded together into a single
2546 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2547 unsigned EltSize = 16/N->getNumOperands();
2548 if (EltSize < ByteSize) {
2549 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2550 SDValue UniquedVals[4];
2551 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2552
2553 // See if all of the elements in the buildvector agree across.
2554 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2555 if (N->getOperand(i).isUndef()) continue;
2556 // If the element isn't a constant, bail fully out.
2557 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2558
2559 if (!UniquedVals[i&(Multiple-1)].getNode())
2560 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2561 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2562 return SDValue(); // no match.
2563 }
2564
2565 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2566 // either constant or undef values that are identical for each chunk. See
2567 // if these chunks can form into a larger vspltis*.
2568
2569 // Check to see if all of the leading entries are either 0 or -1. If
2570 // neither, then this won't fit into the immediate field.
2571 bool LeadingZero = true;
2572 bool LeadingOnes = true;
2573 for (unsigned i = 0; i != Multiple-1; ++i) {
2574 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2575
2576 LeadingZero &= isNullConstant(UniquedVals[i]);
2577 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2578 }
2579 // Finally, check the least significant entry.
2580 if (LeadingZero) {
2581 if (!UniquedVals[Multiple-1].getNode())
2582 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2583 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2584 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2585 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2586 }
2587 if (LeadingOnes) {
2588 if (!UniquedVals[Multiple-1].getNode())
2589 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2590 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2591 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2592 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2593 }
2594
2595 return SDValue();
2596 }
2597
2598 // Check to see if this buildvec has a single non-undef value in its elements.
2599 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2600 if (N->getOperand(i).isUndef()) continue;
2601 if (!OpVal.getNode())
2602 OpVal = N->getOperand(i);
2603 else if (OpVal != N->getOperand(i))
2604 return SDValue();
2605 }
2606
2607 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2608
2609 unsigned ValSizeInBytes = EltSize;
2610 uint64_t Value = 0;
2611 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2612 Value = CN->getZExtValue();
2613 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2614 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2615 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2616 }
2617
2618 // If the splat value is larger than the element value, then we can never do
2619 // this splat. The only case that we could fit the replicated bits into our
2620 // immediate field for would be zero, and we prefer to use vxor for it.
2621 if (ValSizeInBytes < ByteSize) return SDValue();
2622
2623 // If the element value is larger than the splat value, check if it consists
2624 // of a repeated bit pattern of size ByteSize.
2625 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2626 return SDValue();
2627
2628 // Properly sign extend the value.
2629 int MaskVal = SignExtend32(Value, ByteSize * 8);
2630
2631 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2632 if (MaskVal == 0) return SDValue();
2633
2634 // Finally, if this value fits in a 5 bit sext field, return it
2635 if (SignExtend32<5>(MaskVal) == MaskVal)
2636 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2637 return SDValue();
2638}
2639
2640//===----------------------------------------------------------------------===//
2641// Addressing Mode Selection
2642//===----------------------------------------------------------------------===//
2643
2644/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2645/// or 64-bit immediate, and if the value can be accurately represented as a
2646/// sign extension from a 16-bit value. If so, this returns true and the
2647/// immediate.
2648bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2649 if (!isa<ConstantSDNode>(N))
2650 return false;
2651
2652 Imm = (int16_t)N->getAsZExtVal();
2653 if (N->getValueType(0) == MVT::i32)
2654 return Imm == (int32_t)N->getAsZExtVal();
2655 else
2656 return Imm == (int64_t)N->getAsZExtVal();
2657}
2659 return isIntS16Immediate(Op.getNode(), Imm);
2660}
2661
2662/// Used when computing address flags for selecting loads and stores.
2663/// If we have an OR, check if the LHS and RHS are provably disjoint.
2664/// An OR of two provably disjoint values is equivalent to an ADD.
2665/// Most PPC load/store instructions compute the effective address as a sum,
2666/// so doing this conversion is useful.
2667static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2668 if (N.getOpcode() != ISD::OR)
2669 return false;
2670 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2671 if (!LHSKnown.Zero.getBoolValue())
2672 return false;
2673 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2674 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2675}
2676
2677/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2678/// be represented as an indexed [r+r] operation.
2680 SDValue &Index,
2681 SelectionDAG &DAG) const {
2682 for (SDNode *U : N->uses()) {
2683 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2684 if (Memop->getMemoryVT() == MVT::f64) {
2685 Base = N.getOperand(0);
2686 Index = N.getOperand(1);
2687 return true;
2688 }
2689 }
2690 }
2691 return false;
2692}
2693
2694/// isIntS34Immediate - This method tests if value of node given can be
2695/// accurately represented as a sign extension from a 34-bit value. If so,
2696/// this returns true and the immediate.
2697bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2698 if (!isa<ConstantSDNode>(N))
2699 return false;
2700
2701 Imm = (int64_t)N->getAsZExtVal();
2702 return isInt<34>(Imm);
2703}
2705 return isIntS34Immediate(Op.getNode(), Imm);
2706}
2707
2708/// SelectAddressRegReg - Given the specified addressed, check to see if it
2709/// can be represented as an indexed [r+r] operation. Returns false if it
2710/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2711/// non-zero and N can be represented by a base register plus a signed 16-bit
2712/// displacement, make a more precise judgement by checking (displacement % \p
2713/// EncodingAlignment).
2716 MaybeAlign EncodingAlignment) const {
2717 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2718 // a [pc+imm].
2720 return false;
2721
2722 int16_t Imm = 0;
2723 if (N.getOpcode() == ISD::ADD) {
2724 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2725 // SPE load/store can only handle 8-bit offsets.
2726 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2727 return true;
2728 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2729 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2730 return false; // r+i
2731 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2732 return false; // r+i
2733
2734 Base = N.getOperand(0);
2735 Index = N.getOperand(1);
2736 return true;
2737 } else if (N.getOpcode() == ISD::OR) {
2738 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2739 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2740 return false; // r+i can fold it if we can.
2741
2742 // If this is an or of disjoint bitfields, we can codegen this as an add
2743 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2744 // disjoint.
2745 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2746
2747 if (LHSKnown.Zero.getBoolValue()) {
2748 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2749 // If all of the bits are known zero on the LHS or RHS, the add won't
2750 // carry.
2751 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2752 Base = N.getOperand(0);
2753 Index = N.getOperand(1);
2754 return true;
2755 }
2756 }
2757 }
2758
2759 return false;
2760}
2761
2762// If we happen to be doing an i64 load or store into a stack slot that has
2763// less than a 4-byte alignment, then the frame-index elimination may need to
2764// use an indexed load or store instruction (because the offset may not be a
2765// multiple of 4). The extra register needed to hold the offset comes from the
2766// register scavenger, and it is possible that the scavenger will need to use
2767// an emergency spill slot. As a result, we need to make sure that a spill slot
2768// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2769// stack slot.
2770static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2771 // FIXME: This does not handle the LWA case.
2772 if (VT != MVT::i64)
2773 return;
2774
2775 // NOTE: We'll exclude negative FIs here, which come from argument
2776 // lowering, because there are no known test cases triggering this problem
2777 // using packed structures (or similar). We can remove this exclusion if
2778 // we find such a test case. The reason why this is so test-case driven is
2779 // because this entire 'fixup' is only to prevent crashes (from the
2780 // register scavenger) on not-really-valid inputs. For example, if we have:
2781 // %a = alloca i1
2782 // %b = bitcast i1* %a to i64*
2783 // store i64* a, i64 b
2784 // then the store should really be marked as 'align 1', but is not. If it
2785 // were marked as 'align 1' then the indexed form would have been
2786 // instruction-selected initially, and the problem this 'fixup' is preventing
2787 // won't happen regardless.
2788 if (FrameIdx < 0)
2789 return;
2790
2792 MachineFrameInfo &MFI = MF.getFrameInfo();
2793
2794 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2795 return;
2796
2797 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2798 FuncInfo->setHasNonRISpills();
2799}
2800
2801/// Returns true if the address N can be represented by a base register plus
2802/// a signed 16-bit displacement [r+imm], and if it is not better
2803/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2804/// displacements that are multiples of that value.
2806 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2807 MaybeAlign EncodingAlignment) const {
2808 // FIXME dl should come from parent load or store, not from address
2809 SDLoc dl(N);
2810
2811 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2812 // a [pc+imm].
2814 return false;
2815
2816 // If this can be more profitably realized as r+r, fail.
2817 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2818 return false;
2819
2820 if (N.getOpcode() == ISD::ADD) {
2821 int16_t imm = 0;
2822 if (isIntS16Immediate(N.getOperand(1), imm) &&
2823 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2824 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2825 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2826 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2827 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2828 } else {
2829 Base = N.getOperand(0);
2830 }
2831 return true; // [r+i]
2832 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2833 // Match LOAD (ADD (X, Lo(G))).
2834 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2835 "Cannot handle constant offsets yet!");
2836 Disp = N.getOperand(1).getOperand(0); // The global address.
2841 Base = N.getOperand(0);
2842 return true; // [&g+r]
2843 }
2844 } else if (N.getOpcode() == ISD::OR) {
2845 int16_t imm = 0;
2846 if (isIntS16Immediate(N.getOperand(1), imm) &&
2847 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2848 // If this is an or of disjoint bitfields, we can codegen this as an add
2849 // (for better address arithmetic) if the LHS and RHS of the OR are
2850 // provably disjoint.
2851 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2852
2853 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2854 // If all of the bits are known zero on the LHS or RHS, the add won't
2855 // carry.
2856 if (FrameIndexSDNode *FI =
2857 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2858 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2859 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2860 } else {
2861 Base = N.getOperand(0);
2862 }
2863 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2864 return true;
2865 }
2866 }
2867 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2868 // Loading from a constant address.
2869
2870 // If this address fits entirely in a 16-bit sext immediate field, codegen
2871 // this as "d, 0"
2872 int16_t Imm;
2873 if (isIntS16Immediate(CN, Imm) &&
2874 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2875 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2876 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2877 CN->getValueType(0));
2878 return true;
2879 }
2880
2881 // Handle 32-bit sext immediates with LIS + addr mode.
2882 if ((CN->getValueType(0) == MVT::i32 ||
2883 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2884 (!EncodingAlignment ||
2885 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2886 int Addr = (int)CN->getZExtValue();
2887
2888 // Otherwise, break this down into an LIS + disp.
2889 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2890
2891 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2892 MVT::i32);
2893 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2894 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2895 return true;
2896 }
2897 }
2898
2899 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2900 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2901 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2902 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2903 } else
2904 Base = N;
2905 return true; // [r+0]
2906}
2907
2908/// Similar to the 16-bit case but for instructions that take a 34-bit
2909/// displacement field (prefixed loads/stores).
2911 SDValue &Base,
2912 SelectionDAG &DAG) const {
2913 // Only on 64-bit targets.
2914 if (N.getValueType() != MVT::i64)
2915 return false;
2916
2917 SDLoc dl(N);
2918 int64_t Imm = 0;
2919
2920 if (N.getOpcode() == ISD::ADD) {
2921 if (!isIntS34Immediate(N.getOperand(1), Imm))
2922 return false;
2923 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2924 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2925 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2926 else
2927 Base = N.getOperand(0);
2928 return true;
2929 }
2930
2931 if (N.getOpcode() == ISD::OR) {
2932 if (!isIntS34Immediate(N.getOperand(1), Imm))
2933 return false;
2934 // If this is an or of disjoint bitfields, we can codegen this as an add
2935 // (for better address arithmetic) if the LHS and RHS of the OR are
2936 // provably disjoint.
2937 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2938 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2939 return false;
2940 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2941 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2942 else
2943 Base = N.getOperand(0);
2944 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2945 return true;
2946 }
2947
2948 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2949 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2950 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2951 return true;
2952 }
2953
2954 return false;
2955}
2956
2957/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2958/// represented as an indexed [r+r] operation.
2960 SDValue &Index,
2961 SelectionDAG &DAG) const {
2962 // Check to see if we can easily represent this as an [r+r] address. This
2963 // will fail if it thinks that the address is more profitably represented as
2964 // reg+imm, e.g. where imm = 0.
2965 if (SelectAddressRegReg(N, Base, Index, DAG))
2966 return true;
2967
2968 // If the address is the result of an add, we will utilize the fact that the
2969 // address calculation includes an implicit add. However, we can reduce
2970 // register pressure if we do not materialize a constant just for use as the
2971 // index register. We only get rid of the add if it is not an add of a
2972 // value and a 16-bit signed constant and both have a single use.
2973 int16_t imm = 0;
2974 if (N.getOpcode() == ISD::ADD &&
2975 (!isIntS16Immediate(N.getOperand(1), imm) ||
2976 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2977 Base = N.getOperand(0);
2978 Index = N.getOperand(1);
2979 return true;
2980 }
2981
2982 // Otherwise, do it the hard way, using R0 as the base register.
2983 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2984 N.getValueType());
2985 Index = N;
2986 return true;
2987}
2988
2989template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2990 Ty *PCRelCand = dyn_cast<Ty>(N);
2991 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2992}
2993
2994/// Returns true if this address is a PC Relative address.
2995/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2996/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2998 // This is a materialize PC Relative node. Always select this as PC Relative.
2999 Base = N;
3000 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3001 return true;
3002 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3003 isValidPCRelNode<GlobalAddressSDNode>(N) ||
3004 isValidPCRelNode<JumpTableSDNode>(N) ||
3005 isValidPCRelNode<BlockAddressSDNode>(N))
3006 return true;
3007 return false;
3008}
3009
3010/// Returns true if we should use a direct load into vector instruction
3011/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3012static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3013
3014 // If there are any other uses other than scalar to vector, then we should
3015 // keep it as a scalar load -> direct move pattern to prevent multiple
3016 // loads.
3017 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3018 if (!LD)
3019 return false;
3020
3021 EVT MemVT = LD->getMemoryVT();
3022 if (!MemVT.isSimple())
3023 return false;
3024 switch(MemVT.getSimpleVT().SimpleTy) {
3025 case MVT::i64:
3026 break;
3027 case MVT::i32:
3028 if (!ST.hasP8Vector())
3029 return false;
3030 break;
3031 case MVT::i16:
3032 case MVT::i8:
3033 if (!ST.hasP9Vector())
3034 return false;
3035 break;
3036 default:
3037 return false;
3038 }
3039
3040 SDValue LoadedVal(N, 0);
3041 if (!LoadedVal.hasOneUse())
3042 return false;
3043
3044 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3045 UI != UE; ++UI)
3046 if (UI.getUse().get().getResNo() == 0 &&
3047 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3048 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3049 return false;
3050
3051 return true;
3052}
3053
3054/// getPreIndexedAddressParts - returns true by value, base pointer and
3055/// offset pointer and addressing mode by reference if the node's address
3056/// can be legally represented as pre-indexed load / store address.
3058 SDValue &Offset,
3060 SelectionDAG &DAG) const {
3061 if (DisablePPCPreinc) return false;
3062
3063 bool isLoad = true;
3064 SDValue Ptr;
3065 EVT VT;
3066 Align Alignment;
3067 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3068 Ptr = LD->getBasePtr();
3069 VT = LD->getMemoryVT();
3070 Alignment = LD->getAlign();
3071 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3072 Ptr = ST->getBasePtr();
3073 VT = ST->getMemoryVT();
3074 Alignment = ST->getAlign();
3075 isLoad = false;
3076 } else
3077 return false;
3078
3079 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3080 // instructions because we can fold these into a more efficient instruction
3081 // instead, (such as LXSD).
3082 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3083 return false;
3084 }
3085
3086 // PowerPC doesn't have preinc load/store instructions for vectors
3087 if (VT.isVector())
3088 return false;
3089
3090 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3091 // Common code will reject creating a pre-inc form if the base pointer
3092 // is a frame index, or if N is a store and the base pointer is either
3093 // the same as or a predecessor of the value being stored. Check for
3094 // those situations here, and try with swapped Base/Offset instead.
3095 bool Swap = false;
3096
3097 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3098 Swap = true;
3099 else if (!isLoad) {
3100 SDValue Val = cast<StoreSDNode>(N)->getValue();
3101 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3102 Swap = true;
3103 }
3104
3105 if (Swap)
3107
3108 AM = ISD::PRE_INC;
3109 return true;
3110 }
3111
3112 // LDU/STU can only handle immediates that are a multiple of 4.
3113 if (VT != MVT::i64) {
3114 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3115 return false;
3116 } else {
3117 // LDU/STU need an address with at least 4-byte alignment.
3118 if (Alignment < Align(4))
3119 return false;
3120
3121 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3122 return false;
3123 }
3124
3125 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3126 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3127 // sext i32 to i64 when addr mode is r+i.
3128 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3129 LD->getExtensionType() == ISD::SEXTLOAD &&
3130 isa<ConstantSDNode>(Offset))
3131 return false;
3132 }
3133
3134 AM = ISD::PRE_INC;
3135 return true;
3136}
3137
3138//===----------------------------------------------------------------------===//
3139// LowerOperation implementation
3140//===----------------------------------------------------------------------===//
3141
3142/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3143/// and LoOpFlags to the target MO flags.
3144static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3145 unsigned &HiOpFlags, unsigned &LoOpFlags,
3146 const GlobalValue *GV = nullptr) {
3147 HiOpFlags = PPCII::MO_HA;
3148 LoOpFlags = PPCII::MO_LO;
3149
3150 // Don't use the pic base if not in PIC relocation model.
3151 if (IsPIC) {
3152 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3153 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3154 }
3155}
3156
3157static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3158 SelectionDAG &DAG) {
3159 SDLoc DL(HiPart);
3160 EVT PtrVT = HiPart.getValueType();
3161 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3162
3163 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3164 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3165
3166 // With PIC, the first instruction is actually "GR+hi(&G)".
3167 if (isPIC)
3168 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3169 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3170
3171 // Generate non-pic code that has direct accesses to the constant pool.
3172 // The address of the global is just (hi(&g)+lo(&g)).
3173 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3174}
3175
3177 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3178 FuncInfo->setUsesTOCBasePtr();
3179}
3180
3183}
3184
3185SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3186 SDValue GA) const {
3187 const bool Is64Bit = Subtarget.isPPC64();
3188 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3189 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3190 : Subtarget.isAIXABI()
3191 ? DAG.getRegister(PPC::R2, VT)
3192 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3193 SDValue Ops[] = { GA, Reg };
3194 return DAG.getMemIntrinsicNode(
3195 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3198}
3199
3200SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3201 SelectionDAG &DAG) const {
3202 EVT PtrVT = Op.getValueType();
3203 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3204 const Constant *C = CP->getConstVal();
3205
3206 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3207 // The actual address of the GlobalValue is stored in the TOC.
3208 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3209 if (Subtarget.isUsingPCRelativeCalls()) {
3210 SDLoc DL(CP);
3211 EVT Ty = getPointerTy(DAG.getDataLayout());
3212 SDValue ConstPool = DAG.getTargetConstantPool(
3213 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3214 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3215 }
3216 setUsesTOCBasePtr(DAG);
3217 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3218 return getTOCEntry(DAG, SDLoc(CP), GA);
3219 }
3220
3221 unsigned MOHiFlag, MOLoFlag;
3222 bool IsPIC = isPositionIndependent();
3223 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3224
3225 if (IsPIC && Subtarget.isSVR4ABI()) {
3226 SDValue GA =
3227 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3228 return getTOCEntry(DAG, SDLoc(CP), GA);
3229 }
3230
3231 SDValue CPIHi =
3232 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3233 SDValue CPILo =
3234 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3235 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3236}
3237
3238// For 64-bit PowerPC, prefer the more compact relative encodings.
3239// This trades 32 bits per jump table entry for one or two instructions
3240// on the jump site.
3242 if (isJumpTableRelative())
3244
3246}
3247
3250 return false;
3251 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3252 return true;
3254}
3255
3257 SelectionDAG &DAG) const {
3258 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3260
3261 switch (getTargetMachine().getCodeModel()) {
3262 case CodeModel::Small:
3263 case CodeModel::Medium:
3265 default:
3266 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3268 }
3269}
3270
3271const MCExpr *
3273 unsigned JTI,
3274 MCContext &Ctx) const {
3275 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3277
3278 switch (getTargetMachine().getCodeModel()) {
3279 case CodeModel::Small:
3280 case CodeModel::Medium:
3282 default:
3283 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3284 }
3285}
3286
3287SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3288 EVT PtrVT = Op.getValueType();
3289 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3290
3291 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3292 if (Subtarget.isUsingPCRelativeCalls()) {
3293 SDLoc DL(JT);
3294 EVT Ty = getPointerTy(DAG.getDataLayout());
3295 SDValue GA =
3296 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3297 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3298 return MatAddr;
3299 }
3300
3301 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3302 // The actual address of the GlobalValue is stored in the TOC.
3303 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3304 setUsesTOCBasePtr(DAG);
3305 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3306 return getTOCEntry(DAG, SDLoc(JT), GA);
3307 }
3308
3309 unsigned MOHiFlag, MOLoFlag;
3310 bool IsPIC = isPositionIndependent();
3311 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3312
3313 if (IsPIC && Subtarget.isSVR4ABI()) {
3314 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3316 return getTOCEntry(DAG, SDLoc(GA), GA);
3317 }
3318
3319 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3320 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3321 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3322}
3323
3324SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3325 SelectionDAG &DAG) const {
3326 EVT PtrVT = Op.getValueType();
3327 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3328 const BlockAddress *BA = BASDN->getBlockAddress();
3329
3330 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3331 if (Subtarget.isUsingPCRelativeCalls()) {
3332 SDLoc DL(BASDN);
3333 EVT Ty = getPointerTy(DAG.getDataLayout());
3334 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3336 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3337 return MatAddr;
3338 }
3339
3340 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3341 // The actual BlockAddress is stored in the TOC.
3342 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3343 setUsesTOCBasePtr(DAG);
3344 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3345 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3346 }
3347
3348 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3349 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3350 return getTOCEntry(
3351 DAG, SDLoc(BASDN),
3352 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3353
3354 unsigned MOHiFlag, MOLoFlag;
3355 bool IsPIC = isPositionIndependent();
3356 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3357 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3358 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3359 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3360}
3361
3362SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3363 SelectionDAG &DAG) const {
3364 if (Subtarget.isAIXABI())
3365 return LowerGlobalTLSAddressAIX(Op, DAG);
3366
3367 return LowerGlobalTLSAddressLinux(Op, DAG);
3368}
3369
3370/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3371/// and then apply the update.
3373 SelectionDAG &DAG,
3374 const TargetMachine &TM) {
3375 // Initialize TLS model opt setting lazily:
3376 // (1) Use initial-exec for single TLS var references within current function.
3377 // (2) Use local-dynamic for multiple TLS var references within current
3378 // function.
3379 PPCFunctionInfo *FuncInfo =
3381 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3383 // Iterate over all instructions within current function, collect all TLS
3384 // global variables (global variables taken as the first parameter to
3385 // Intrinsic::threadlocal_address).
3386 const Function &Func = DAG.getMachineFunction().getFunction();
3387 for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
3388 ++BI)
3389 for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
3390 II != IE; ++II)
3391 if (II->getOpcode() == Instruction::Call)
3392 if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
3393 if (Function *CF = CI->getCalledFunction())
3394 if (CF->isDeclaration() &&
3395 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3396 if (const GlobalValue *GV =
3397 dyn_cast<GlobalValue>(II->getOperand(0))) {
3398 TLSModel::Model GVModel = TM.getTLSModel(GV);
3399 if (GVModel == TLSModel::LocalDynamic)
3400 TLSGV.insert(GV);
3401 }
3402
3403 unsigned TLSGVCnt = TLSGV.size();
3404 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3405 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3406 FuncInfo->setAIXFuncUseTLSIEForLD();
3408 }
3409
3410 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3411 LLVM_DEBUG(
3412 dbgs() << DAG.getMachineFunction().getName()
3413 << " function is using the TLS-IE model for TLS-LD access.\n");
3414 Model = TLSModel::InitialExec;
3415 }
3416}
3417
3418SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3419 SelectionDAG &DAG) const {
3420 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3421
3422 if (DAG.getTarget().useEmulatedTLS())
3423 report_fatal_error("Emulated TLS is not yet supported on AIX");
3424
3425 SDLoc dl(GA);
3426 const GlobalValue *GV = GA->getGlobal();
3427 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3428 bool Is64Bit = Subtarget.isPPC64();
3430
3431 // Apply update to the TLS model.
3432 if (Subtarget.hasAIXShLibTLSModelOpt())
3434
3435 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3436
3437 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3438 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3439 bool HasAIXSmallTLSGlobalAttr = false;
3440 SDValue VariableOffsetTGA =
3441 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3442 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3443 SDValue TLSReg;
3444
3445 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3446 if (GVar->hasAttribute("aix-small-tls"))
3447 HasAIXSmallTLSGlobalAttr = true;
3448
3449 if (Is64Bit) {
3450 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3451 // involves a load of the variable offset (from the TOC), followed by an
3452 // add of the loaded variable offset to R13 (the thread pointer).
3453 // This code sequence looks like:
3454 // ld reg1,var[TC](2)
3455 // add reg2, reg1, r13 // r13 contains the thread pointer
3456 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3457
3458 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3459 // global variable attribute, produce a faster access sequence for
3460 // local-exec TLS variables where the offset from the TLS base is encoded
3461 // as an immediate operand.
3462 //
3463 // We only utilize the faster local-exec access sequence when the TLS
3464 // variable has a size within the policy limit. We treat types that are
3465 // not sized or are empty as being over the policy size limit.
3466 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3467 IsTLSLocalExecModel) {
3468 Type *GVType = GV->getValueType();
3469 if (GVType->isSized() && !GVType->isEmptyTy() &&
3470 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3472 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3473 }
3474 } else {
3475 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3476 // involves loading the variable offset from the TOC, generating a call to
3477 // .__get_tpointer to get the thread pointer (which will be in R3), and
3478 // adding the two together:
3479 // lwz reg1,var[TC](2)
3480 // bla .__get_tpointer
3481 // add reg2, reg1, r3
3482 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3483
3484 // We do not implement the 32-bit version of the faster access sequence
3485 // for local-exec that is controlled by the -maix-small-local-exec-tls
3486 // option, or the "aix-small-tls" global variable attribute.
3487 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3488 report_fatal_error("The small-local-exec TLS access sequence is "
3489 "currently only supported on AIX (64-bit mode).");
3490 }
3491 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3492 }
3493
3494 if (Model == TLSModel::LocalDynamic) {
3495 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3496
3497 // We do not implement the 32-bit version of the faster access sequence
3498 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3499 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3500 report_fatal_error("The small-local-dynamic TLS access sequence is "
3501 "currently only supported on AIX (64-bit mode).");
3502
3503 // For local-dynamic on AIX, we need to generate one TOC entry for each
3504 // variable offset, and a single module-handle TOC entry for the entire
3505 // file.
3506
3507 SDValue VariableOffsetTGA =
3508 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3509 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3510
3512 GlobalVariable *TLSGV =
3513 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3514 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3516 assert(TLSGV && "Not able to create GV for _$TLSML.");
3517 SDValue ModuleHandleTGA =
3518 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3519 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3520 SDValue ModuleHandle =
3521 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3522
3523 // With the -maix-small-local-dynamic-tls option, produce a faster access
3524 // sequence for local-dynamic TLS variables where the offset from the
3525 // module-handle is encoded as an immediate operand.
3526 //
3527 // We only utilize the faster local-dynamic access sequence when the TLS
3528 // variable has a size within the policy limit. We treat types that are
3529 // not sized or are empty as being over the policy size limit.
3530 if (HasAIXSmallLocalDynamicTLS) {
3531 Type *GVType = GV->getValueType();
3532 if (GVType->isSized() && !GVType->isEmptyTy() &&
3533 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3535 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3536 ModuleHandle);
3537 }
3538
3539 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3540 }
3541
3542 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3543 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3544 // need to generate two TOC entries, one for the variable offset, one for the
3545 // region handle. The global address for the TOC entry of the region handle is
3546 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3547 // entry of the variable offset is created with MO_TLSGD_FLAG.
3548 SDValue VariableOffsetTGA =
3549 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3550 SDValue RegionHandleTGA =
3551 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3552 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3553 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3554 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3555 RegionHandle);
3556}
3557
3558SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 // FIXME: TLS addresses currently use medium model code sequences,
3561 // which is the most useful form. Eventually support for small and
3562 // large models could be added if users need it, at the cost of
3563 // additional complexity.
3564 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3565 if (DAG.getTarget().useEmulatedTLS())
3566 return LowerToTLSEmulatedModel(GA, DAG);
3567
3568 SDLoc dl(GA);
3569 const GlobalValue *GV = GA->getGlobal();
3570 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3571 bool is64bit = Subtarget.isPPC64();
3572 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3573 PICLevel::Level picLevel = M->getPICLevel();
3574
3576 TLSModel::Model Model = TM.getTLSModel(GV);
3577
3578 if (Model == TLSModel::LocalExec) {
3579 if (Subtarget.isUsingPCRelativeCalls()) {
3580 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3581 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3583 SDValue MatAddr =
3584 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3585 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3586 }
3587
3588 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3592 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3593 : DAG.getRegister(PPC::R2, MVT::i32);
3594
3595 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3596 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3597 }
3598
3599 if (Model == TLSModel::InitialExec) {
3600 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3602 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3603 SDValue TGATLS = DAG.getTargetGlobalAddress(
3604 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3605 SDValue TPOffset;
3606 if (IsPCRel) {
3607 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3608 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3610 } else {
3611 SDValue GOTPtr;
3612 if (is64bit) {
3613 setUsesTOCBasePtr(DAG);
3614 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3615 GOTPtr =
3616 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3617 } else {
3618 if (!TM.isPositionIndependent())
3619 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3620 else if (picLevel == PICLevel::SmallPIC)
3621 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3622 else
3623 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3624 }
3625 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3626 }
3627 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3628 }
3629
3630 if (Model == TLSModel::GeneralDynamic) {
3631 if (Subtarget.isUsingPCRelativeCalls()) {
3632 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3634 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3635 }
3636
3637 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3638 SDValue GOTPtr;
3639 if (is64bit) {
3640 setUsesTOCBasePtr(DAG);
3641 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3642 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3643 GOTReg, TGA);
3644 } else {
3645 if (picLevel == PICLevel::SmallPIC)
3646 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3647 else
3648 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3649 }
3650 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3651 GOTPtr, TGA, TGA);
3652 }
3653
3654 if (Model == TLSModel::LocalDynamic) {
3655 if (Subtarget.isUsingPCRelativeCalls()) {
3656 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3658 SDValue MatPCRel =
3659 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3660 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3661 }
3662
3663 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3664 SDValue GOTPtr;
3665 if (is64bit) {
3666 setUsesTOCBasePtr(DAG);
3667 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3668 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3669 GOTReg, TGA);
3670 } else {
3671 if (picLevel == PICLevel::SmallPIC)
3672 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3673 else
3674 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3675 }
3676 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3677 PtrVT, GOTPtr, TGA, TGA);
3678 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3679 PtrVT, TLSAddr, TGA);
3680 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3681 }
3682
3683 llvm_unreachable("Unknown TLS model!");
3684}
3685
3686SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3687 SelectionDAG &DAG) const {
3688 EVT PtrVT = Op.getValueType();
3689 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3690 SDLoc DL(GSDN);
3691 const GlobalValue *GV = GSDN->getGlobal();
3692
3693 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3694 // The actual address of the GlobalValue is stored in the TOC.
3695 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3696 if (Subtarget.isUsingPCRelativeCalls()) {
3697 EVT Ty = getPointerTy(DAG.getDataLayout());
3699 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3701 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3702 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3704 return Load;
3705 } else {
3706 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3708 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3709 }
3710 }
3711 setUsesTOCBasePtr(DAG);
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3713 return getTOCEntry(DAG, DL, GA);
3714 }
3715
3716 unsigned MOHiFlag, MOLoFlag;
3717 bool IsPIC = isPositionIndependent();
3718 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3719
3720 if (IsPIC && Subtarget.isSVR4ABI()) {
3721 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3722 GSDN->getOffset(),
3724 return getTOCEntry(DAG, DL, GA);
3725 }
3726
3727 SDValue GAHi =
3728 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3729 SDValue GALo =
3730 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3731
3732 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3733}
3734
3735SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3736 bool IsStrict = Op->isStrictFPOpcode();
3738 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3739 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3740 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3741 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3742 EVT LHSVT = LHS.getValueType();
3743 SDLoc dl(Op);
3744
3745 // Soften the setcc with libcall if it is fp128.
3746 if (LHSVT == MVT::f128) {
3747 assert(!Subtarget.hasP9Vector() &&
3748 "SETCC for f128 is already legal under Power9!");
3749 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3750 Op->getOpcode() == ISD::STRICT_FSETCCS);
3751 if (RHS.getNode())
3752 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3753 DAG.getCondCode(CC));
3754 if (IsStrict)
3755 return DAG.getMergeValues({LHS, Chain}, dl);
3756 return LHS;
3757 }
3758
3759 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3760
3761 if (Op.getValueType() == MVT::v2i64) {
3762 // When the operands themselves are v2i64 values, we need to do something
3763 // special because VSX has no underlying comparison operations for these.
3764 if (LHS.getValueType() == MVT::v2i64) {
3765 // Equality can be handled by casting to the legal type for Altivec
3766 // comparisons, everything else needs to be expanded.
3767 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3768 return SDValue();
3769 SDValue SetCC32 = DAG.getSetCC(
3770 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3771 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3772 int ShuffV[] = {1, 0, 3, 2};
3773 SDValue Shuff =
3774 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3775 return DAG.getBitcast(MVT::v2i64,
3777 dl, MVT::v4i32, Shuff, SetCC32));
3778 }
3779
3780 // We handle most of these in the usual way.
3781 return Op;
3782 }
3783
3784 // If we're comparing for equality to zero, expose the fact that this is
3785 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3786 // fold the new nodes.
3787 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3788 return V;
3789
3790 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3791 // Leave comparisons against 0 and -1 alone for now, since they're usually
3792 // optimized. FIXME: revisit this when we can custom lower all setcc
3793 // optimizations.
3794 if (C->isAllOnes() || C->isZero())
3795 return SDValue();
3796 }
3797
3798 // If we have an integer seteq/setne, turn it into a compare against zero
3799 // by xor'ing the rhs with the lhs, which is faster than setting a
3800 // condition register, reading it back out, and masking the correct bit. The
3801 // normal approach here uses sub to do this instead of xor. Using xor exposes
3802 // the result to other bit-twiddling opportunities.
3803 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3804 EVT VT = Op.getValueType();
3805 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3806 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3807 }
3808 return SDValue();
3809}
3810
3811SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3812 SDNode *Node = Op.getNode();
3813 EVT VT = Node->getValueType(0);
3814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3815 SDValue InChain = Node->getOperand(0);
3816 SDValue VAListPtr = Node->getOperand(1);
3817 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3818 SDLoc dl(Node);
3819
3820 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3821
3822 // gpr_index
3823 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3824 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3825 InChain = GprIndex.getValue(1);
3826
3827 if (VT == MVT::i64) {
3828 // Check if GprIndex is even
3829 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3830 DAG.getConstant(1, dl, MVT::i32));
3831 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3832 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3833 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3834 DAG.getConstant(1, dl, MVT::i32));
3835 // Align GprIndex to be even if it isn't
3836 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3837 GprIndex);
3838 }
3839
3840 // fpr index is 1 byte after gpr
3841 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3842 DAG.getConstant(1, dl, MVT::i32));
3843
3844 // fpr
3845 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3846 FprPtr, MachinePointerInfo(SV), MVT::i8);
3847 InChain = FprIndex.getValue(1);
3848
3849 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850 DAG.getConstant(8, dl, MVT::i32));
3851
3852 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3853 DAG.getConstant(4, dl, MVT::i32));
3854
3855 // areas
3856 SDValue OverflowArea =
3857 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3858 InChain = OverflowArea.getValue(1);
3859
3860 SDValue RegSaveArea =
3861 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3862 InChain = RegSaveArea.getValue(1);
3863
3864 // select overflow_area if index > 8
3865 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3866 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3867
3868 // adjustment constant gpr_index * 4/8
3869 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3870 VT.isInteger() ? GprIndex : FprIndex,
3871 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3872 MVT::i32));
3873
3874 // OurReg = RegSaveArea + RegConstant
3875 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3876 RegConstant);
3877
3878 // Floating types are 32 bytes into RegSaveArea
3879 if (VT.isFloatingPoint())
3880 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3881 DAG.getConstant(32, dl, MVT::i32));
3882
3883 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3884 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3885 VT.isInteger() ? GprIndex : FprIndex,
3886 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3887 MVT::i32));
3888
3889 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3890 VT.isInteger() ? VAListPtr : FprPtr,
3891 MachinePointerInfo(SV), MVT::i8);
3892
3893 // determine if we should load from reg_save_area or overflow_area
3894 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3895
3896 // increase overflow_area by 4/8 if gpr/fpr > 8
3897 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3898 DAG.getConstant(VT.isInteger() ? 4 : 8,
3899 dl, MVT::i32));
3900
3901 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3902 OverflowAreaPlusN);
3903
3904 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3905 MachinePointerInfo(), MVT::i32);
3906
3907 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3908}
3909
3910SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3911 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3912
3913 // We have to copy the entire va_list struct:
3914 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3915 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3916 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3917 false, true, false, MachinePointerInfo(),
3919}
3920
3921SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 if (Subtarget.isAIXABI())
3924 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3925
3926 return Op.getOperand(0);
3927}
3928
3929SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3932
3933 assert((Op.getOpcode() == ISD::INLINEASM ||
3934 Op.getOpcode() == ISD::INLINEASM_BR) &&
3935 "Expecting Inline ASM node.");
3936
3937 // If an LR store is already known to be required then there is not point in
3938 // checking this ASM as well.
3939 if (MFI.isLRStoreRequired())
3940 return Op;
3941
3942 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3943 // type MVT::Glue. We want to ignore this last operand if that is the case.
3944 unsigned NumOps = Op.getNumOperands();
3945 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3946 --NumOps;
3947
3948 // Check all operands that may contain the LR.
3949 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3950 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3951 unsigned NumVals = Flags.getNumOperandRegisters();
3952 ++i; // Skip the ID value.
3953
3954 switch (Flags.getKind()) {
3955 default:
3956 llvm_unreachable("Bad flags!");
3960 i += NumVals;
3961 break;
3965 for (; NumVals; --NumVals, ++i) {
3966 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3967 if (Reg != PPC::LR && Reg != PPC::LR8)
3968 continue;
3969 MFI.setLRStoreRequired();
3970 return Op;
3971 }
3972 break;
3973 }
3974 }
3975 }
3976
3977 return Op;
3978}
3979
3980SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3981 SelectionDAG &DAG) const {
3982 if (Subtarget.isAIXABI())
3983 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3984
3985 SDValue Chain = Op.getOperand(0);
3986 SDValue Trmp = Op.getOperand(1); // trampoline
3987 SDValue FPtr = Op.getOperand(2); // nested function
3988 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3989 SDLoc dl(Op);
3990
3991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3992 bool isPPC64 = (PtrVT == MVT::i64);
3993 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3994
3997
3998 Entry.Ty = IntPtrTy;
3999 Entry.Node = Trmp; Args.push_back(Entry);
4000
4001 // TrampSize == (isPPC64 ? 48 : 40);
4002 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
4003 isPPC64 ? MVT::i64 : MVT::i32);
4004 Args.push_back(Entry);
4005
4006 Entry.Node = FPtr; Args.push_back(Entry);
4007 Entry.Node = Nest; Args.push_back(Entry);
4008
4009 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4011 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4013 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4014
4015 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4016 return CallResult.second;
4017}
4018
4019SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4021 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4022 EVT PtrVT = getPointerTy(MF.getDataLayout());
4023
4024 SDLoc dl(Op);
4025
4026 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4027 // vastart just stores the address of the VarArgsFrameIndex slot into the
4028 // memory location argument.
4029 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4030 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4031 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4032 MachinePointerInfo(SV));
4033 }
4034
4035 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4036 // We suppose the given va_list is already allocated.
4037 //
4038 // typedef struct {
4039 // char gpr; /* index into the array of 8 GPRs
4040 // * stored in the register save area
4041 // * gpr=0 corresponds to r3,
4042 // * gpr=1 to r4, etc.
4043 // */
4044 // char fpr; /* index into the array of 8 FPRs
4045 // * stored in the register save area
4046 // * fpr=0 corresponds to f1,
4047 // * fpr=1 to f2, etc.
4048 // */
4049 // char *overflow_arg_area;
4050 // /* location on stack that holds
4051 // * the next overflow argument
4052 // */
4053 // char *reg_save_area;
4054 // /* where r3:r10 and f1:f8 (if saved)
4055 // * are stored
4056 // */
4057 // } va_list[1];
4058
4059 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4060 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4061 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4062 PtrVT);
4063 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4064 PtrVT);
4065
4066 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4067 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4068
4069 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4070 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4071
4072 uint64_t FPROffset = 1;
4073 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4074
4075 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4076
4077 // Store first byte : number of int regs
4078 SDValue firstStore =
4079 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4080 MachinePointerInfo(SV), MVT::i8);
4081 uint64_t nextOffset = FPROffset;
4082 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4083 ConstFPROffset);
4084
4085 // Store second byte : number of float regs
4086 SDValue secondStore =
4087 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4088 MachinePointerInfo(SV, nextOffset), MVT::i8);
4089 nextOffset += StackOffset;
4090 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4091
4092 // Store second word : arguments given on stack
4093 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4094 MachinePointerInfo(SV, nextOffset));
4095 nextOffset += FrameOffset;
4096 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4097
4098 // Store third word : arguments given in registers
4099 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4100 MachinePointerInfo(SV, nextOffset));
4101}
4102
4103/// FPR - The set of FP registers that should be allocated for arguments
4104/// on Darwin and AIX.
4105static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4106 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4107 PPC::F11, PPC::F12, PPC::F13};
4108
4109/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4110/// the stack.
4111static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4112 unsigned PtrByteSize) {
4113 unsigned ArgSize = ArgVT.getStoreSize();
4114 if (Flags.isByVal())
4115 ArgSize = Flags.getByValSize();
4116
4117 // Round up to multiples of the pointer size, except for array members,
4118 // which are always packed.
4119 if (!Flags.isInConsecutiveRegs())
4120 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4121
4122 return ArgSize;
4123}
4124
4125/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4126/// on the stack.
4128 ISD::ArgFlagsTy Flags,
4129 unsigned PtrByteSize) {
4130 Align Alignment(PtrByteSize);
4131
4132 // Altivec parameters are padded to a 16 byte boundary.
4133 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4134 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4135 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4136 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4137 Alignment = Align(16);
4138
4139 // ByVal parameters are aligned as requested.
4140 if (Flags.isByVal()) {
4141 auto BVAlign = Flags.getNonZeroByValAlign();
4142 if (BVAlign > PtrByteSize) {
4143 if (BVAlign.value() % PtrByteSize != 0)
4145 "ByVal alignment is not a multiple of the pointer size");
4146
4147 Alignment = BVAlign;
4148 }
4149 }
4150
4151 // Array members are always packed to their original alignment.
4152 if (Flags.isInConsecutiveRegs()) {
4153 // If the array member was split into multiple registers, the first
4154 // needs to be aligned to the size of the full type. (Except for
4155 // ppcf128, which is only aligned as its f64 components.)
4156 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4157 Alignment = Align(OrigVT.getStoreSize());
4158 else
4159 Alignment = Align(ArgVT.getStoreSize());
4160 }
4161
4162 return Alignment;
4163}
4164
4165/// CalculateStackSlotUsed - Return whether this argument will use its
4166/// stack slot (instead of being passed in registers). ArgOffset,
4167/// AvailableFPRs, and AvailableVRs must hold the current argument
4168/// position, and will be updated to account for this argument.
4169static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4170 unsigned PtrByteSize, unsigned LinkageSize,
4171 unsigned ParamAreaSize, unsigned &ArgOffset,
4172 unsigned &AvailableFPRs,
4173 unsigned &AvailableVRs) {
4174 bool UseMemory = false;
4175
4176 // Respect alignment of argument on the stack.
4177 Align Alignment =
4178 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4179 ArgOffset = alignTo(ArgOffset, Alignment);
4180 // If there's no space left in the argument save area, we must
4181 // use memory (this check also catches zero-sized arguments).
4182 if (ArgOffset >= LinkageSize + ParamAreaSize)
4183 UseMemory = true;
4184
4185 // Allocate argument on the stack.
4186 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4187 if (Flags.isInConsecutiveRegsLast())
4188 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4189 // If we overran the argument save area, we must use memory
4190 // (this check catches arguments passed partially in memory)
4191 if (ArgOffset > LinkageSize + ParamAreaSize)
4192 UseMemory = true;
4193
4194 // However, if the argument is actually passed in an FPR or a VR,
4195 // we don't use memory after all.
4196 if (!Flags.isByVal()) {
4197 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4198 if (AvailableFPRs > 0) {
4199 --AvailableFPRs;
4200 return false;
4201 }
4202 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4203 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4204 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4205 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4206 if (AvailableVRs > 0) {
4207 --AvailableVRs;
4208 return false;
4209 }
4210 }
4211
4212 return UseMemory;
4213}
4214
4215/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4216/// ensure minimum alignment required for target.
4218 unsigned NumBytes) {
4219 return alignTo(NumBytes, Lowering->getStackAlign());
4220}
4221
4222SDValue PPCTargetLowering::LowerFormalArguments(
4223 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4224 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4225 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4226 if (Subtarget.isAIXABI())
4227 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4228 InVals);
4229 if (Subtarget.is64BitELFABI())
4230 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4231 InVals);
4232 assert(Subtarget.is32BitELFABI());
4233 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4234 InVals);
4235}
4236
4237SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4238 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4239 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4240 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4241
4242 // 32-bit SVR4 ABI Stack Frame Layout:
4243 // +-----------------------------------+
4244 // +--> | Back chain |
4245 // | +-----------------------------------+
4246 // | | Floating-point register save area |
4247 // | +-----------------------------------+
4248 // | | General register save area |
4249 // | +-----------------------------------+
4250 // | | CR save word |
4251 // | +-----------------------------------+
4252 // | | VRSAVE save word |
4253 // | +-----------------------------------+
4254 // | | Alignment padding |
4255 // | +-----------------------------------+
4256 // | | Vector register save area |
4257 // | +-----------------------------------+
4258 // | | Local variable space |
4259 // | +-----------------------------------+
4260 // | | Parameter list area |
4261 // | +-----------------------------------+
4262 // | | LR save word |
4263 // | +-----------------------------------+
4264 // SP--> +--- | Back chain |
4265 // +-----------------------------------+
4266 //
4267 // Specifications:
4268 // System V Application Binary Interface PowerPC Processor Supplement
4269 // AltiVec Technology Programming Interface Manual
4270
4272 MachineFrameInfo &MFI = MF.getFrameInfo();
4273 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4274
4275 EVT PtrVT = getPointerTy(MF.getDataLayout());
4276 // Potential tail calls could cause overwriting of argument stack slots.
4277 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4278 (CallConv == CallingConv::Fast));
4279 const Align PtrAlign(4);
4280
4281 // Assign locations to all of the incoming arguments.
4283 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4284 *DAG.getContext());
4285
4286 // Reserve space for the linkage area on the stack.
4287 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4288 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4289 if (useSoftFloat())
4290 CCInfo.PreAnalyzeFormalArguments(Ins);
4291
4292 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4293 CCInfo.clearWasPPCF128();
4294
4295 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4296 CCValAssign &VA = ArgLocs[i];
4297
4298 // Arguments stored in registers.
4299 if (VA.isRegLoc()) {
4300 const TargetRegisterClass *RC;
4301 EVT ValVT = VA.getValVT();
4302
4303 switch (ValVT.getSimpleVT().SimpleTy) {
4304 default:
4305 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4306 case MVT::i1:
4307 case MVT::i32:
4308 RC = &PPC::GPRCRegClass;
4309 break;
4310 case MVT::f32:
4311 if (Subtarget.hasP8Vector())
4312 RC = &PPC::VSSRCRegClass;
4313 else if (Subtarget.hasSPE())
4314 RC = &PPC::GPRCRegClass;
4315 else
4316 RC = &PPC::F4RCRegClass;
4317 break;
4318 case MVT::f64:
4319 if (Subtarget.hasVSX())
4320 RC = &PPC::VSFRCRegClass;
4321 else if (Subtarget.hasSPE())
4322 // SPE passes doubles in GPR pairs.
4323 RC = &PPC::GPRCRegClass;
4324 else
4325 RC = &PPC::F8RCRegClass;
4326 break;
4327 case MVT::v16i8:
4328 case MVT::v8i16:
4329 case MVT::v4i32:
4330 RC = &PPC::VRRCRegClass;
4331 break;
4332 case MVT::v4f32:
4333 RC = &PPC::VRRCRegClass;
4334 break;
4335 case MVT::v2f64:
4336 case MVT::v2i64:
4337 RC = &PPC::VRRCRegClass;
4338 break;
4339 }
4340
4341 SDValue ArgValue;
4342 // Transform the arguments stored in physical registers into
4343 // virtual ones.
4344 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4345 assert(i + 1 < e && "No second half of double precision argument");
4346 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4347 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4348 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4349 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4350 if (!Subtarget.isLittleEndian())
4351 std::swap (ArgValueLo, ArgValueHi);
4352 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4353 ArgValueHi);
4354 } else {
4355 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4356 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4357 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4358 if (ValVT == MVT::i1)
4359 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4360 }
4361
4362 InVals.push_back(ArgValue);
4363 } else {
4364 // Argument stored in memory.
4365 assert(VA.isMemLoc());
4366
4367 // Get the extended size of the argument type in stack
4368 unsigned ArgSize = VA.getLocVT().getStoreSize();
4369 // Get the actual size of the argument type
4370 unsigned ObjSize = VA.getValVT().getStoreSize();
4371 unsigned ArgOffset = VA.getLocMemOffset();
4372 // Stack objects in PPC32 are right justified.
4373 ArgOffset += ArgSize - ObjSize;
4374 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4375
4376 // Create load nodes to retrieve arguments from the stack.
4377 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4378 InVals.push_back(
4379 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4380 }
4381 }
4382
4383 // Assign locations to all of the incoming aggregate by value arguments.
4384 // Aggregates passed by value are stored in the local variable space of the
4385 // caller's stack frame, right above the parameter list area.
4386 SmallVector<CCValAssign, 16> ByValArgLocs;
4387 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4388 ByValArgLocs, *DAG.getContext());
4389
4390 // Reserve stack space for the allocations in CCInfo.
4391 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4392
4393 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4394
4395 // Area that is at least reserved in the caller of this function.
4396 unsigned MinReservedArea = CCByValInfo.getStackSize();
4397 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4398
4399 // Set the size that is at least reserved in caller of this function. Tail
4400 // call optimized function's reserved stack space needs to be aligned so that
4401 // taking the difference between two stack areas will result in an aligned
4402 // stack.
4403 MinReservedArea =
4404 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4405 FuncInfo->setMinReservedArea(MinReservedArea);
4406
4408
4409 // If the function takes variable number of arguments, make a frame index for
4410 // the start of the first vararg value... for expansion of llvm.va_start.
4411 if (isVarArg) {
4412 static const MCPhysReg GPArgRegs[] = {
4413 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4414 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4415 };
4416 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4417
4418 static const MCPhysReg FPArgRegs[] = {
4419 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4420 PPC::F8
4421 };
4422 unsigned NumFPArgRegs = std::size(FPArgRegs);
4423
4424 if (useSoftFloat() || hasSPE())
4425 NumFPArgRegs = 0;
4426
4427 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4428 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4429
4430 // Make room for NumGPArgRegs and NumFPArgRegs.
4431 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4432 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4433
4435 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4436
4437 FuncInfo->setVarArgsFrameIndex(
4438 MFI.CreateStackObject(Depth, Align(8), false));
4439 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4440
4441 // The fixed integer arguments of a variadic function are stored to the
4442 // VarArgsFrameIndex on the stack so that they may be loaded by
4443 // dereferencing the result of va_next.
4444 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4445 // Get an existing live-in vreg, or add a new one.
4446 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4447 if (!VReg)
4448 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4449
4450 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4451 SDValue Store =
4452 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4453 MemOps.push_back(Store);
4454 // Increment the address by four for the next argument to store
4455 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4456 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4457 }
4458
4459 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4460 // is set.
4461 // The double arguments are stored to the VarArgsFrameIndex
4462 // on the stack.
4463 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4464 // Get an existing live-in vreg, or add a new one.
4465 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4466 if (!VReg)
4467 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4468
4469 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4470 SDValue Store =
4471 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4472 MemOps.push_back(Store);
4473 // Increment the address by eight for the next argument to store
4474 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4475 PtrVT);
4476 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4477 }
4478 }
4479
4480 if (!MemOps.empty())
4481 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4482
4483 return Chain;
4484}
4485
4486// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4487// value to MVT::i64 and then truncate to the correct register size.
4488SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4489 EVT ObjectVT, SelectionDAG &DAG,
4490 SDValue ArgVal,
4491 const SDLoc &dl) const {
4492 if (Flags.isSExt())
4493 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4494 DAG.getValueType(ObjectVT));
4495 else if (Flags.isZExt())
4496 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4497 DAG.getValueType(ObjectVT));
4498
4499 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4500}
4501
4502SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4503 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4504 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4505 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4506 // TODO: add description of PPC stack frame format, or at least some docs.
4507 //
4508 bool isELFv2ABI = Subtarget.isELFv2ABI();
4509 bool isLittleEndian = Subtarget.isLittleEndian();
4511 MachineFrameInfo &MFI = MF.getFrameInfo();
4512 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4513
4514 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4515 "fastcc not supported on varargs functions");
4516
4517 EVT PtrVT = getPointerTy(MF.getDataLayout());
4518 // Potential tail calls could cause overwriting of argument stack slots.
4519 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4520 (CallConv == CallingConv::Fast));
4521 unsigned PtrByteSize = 8;
4522 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4523
4524 static const MCPhysReg GPR[] = {
4525 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4526 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4527 };
4528 static const MCPhysReg VR[] = {
4529 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4530 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4531 };
4532
4533 const unsigned Num_GPR_Regs = std::size(GPR);
4534 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4535 const unsigned Num_VR_Regs = std::size(VR);
4536
4537 // Do a first pass over the arguments to determine whether the ABI
4538 // guarantees that our caller has allocated the parameter save area
4539 // on its stack frame. In the ELFv1 ABI, this is always the case;
4540 // in the ELFv2 ABI, it is true if this is a vararg function or if
4541 // any parameter is located in a stack slot.
4542
4543 bool HasParameterArea = !isELFv2ABI || isVarArg;
4544 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4545 unsigned NumBytes = LinkageSize;
4546 unsigned AvailableFPRs = Num_FPR_Regs;
4547 unsigned AvailableVRs = Num_VR_Regs;
4548 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4549 if (Ins[i].Flags.isNest())
4550 continue;
4551
4552 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4553 PtrByteSize, LinkageSize, ParamAreaSize,
4554 NumBytes, AvailableFPRs, AvailableVRs))
4555 HasParameterArea = true;
4556 }
4557
4558 // Add DAG nodes to load the arguments or copy them out of registers. On
4559 // entry to a function on PPC, the arguments start after the linkage area,
4560 // although the first ones are often in registers.
4561
4562 unsigned ArgOffset = LinkageSize;
4563 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4566 unsigned CurArgIdx = 0;
4567 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4568 SDValue ArgVal;
4569 bool needsLoad = false;
4570 EVT ObjectVT = Ins[ArgNo].VT;
4571 EVT OrigVT = Ins[ArgNo].ArgVT;
4572 unsigned ObjSize = ObjectVT.getStoreSize();
4573 unsigned ArgSize = ObjSize;
4574 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4575 if (Ins[ArgNo].isOrigArg()) {
4576 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4577 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4578 }
4579 // We re-align the argument offset for each argument, except when using the
4580 // fast calling convention, when we need to make sure we do that only when
4581 // we'll actually use a stack slot.
4582 unsigned CurArgOffset;
4583 Align Alignment;
4584 auto ComputeArgOffset = [&]() {
4585 /* Respect alignment of argument on the stack. */
4586 Alignment =
4587 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4588 ArgOffset = alignTo(ArgOffset, Alignment);
4589 CurArgOffset = ArgOffset;
4590 };
4591
4592 if (CallConv != CallingConv::Fast) {
4593 ComputeArgOffset();
4594
4595 /* Compute GPR index associated with argument offset. */
4596 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4597 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4598 }
4599
4600 // FIXME the codegen can be much improved in some cases.
4601 // We do not have to keep everything in memory.
4602 if (Flags.isByVal()) {
4603 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4604
4605 if (CallConv == CallingConv::Fast)
4606 ComputeArgOffset();
4607
4608 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4609 ObjSize = Flags.getByValSize();
4610 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4611 // Empty aggregate parameters do not take up registers. Examples:
4612 // struct { } a;
4613 // union { } b;
4614 // int c[0];
4615 // etc. However, we have to provide a place-holder in InVals, so
4616 // pretend we have an 8-byte item at the current address for that
4617 // purpose.
4618 if (!ObjSize) {
4619 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4620 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4621 InVals.push_back(FIN);
4622 continue;
4623 }
4624
4625 // Create a stack object covering all stack doublewords occupied
4626 // by the argument. If the argument is (fully or partially) on
4627 // the stack, or if the argument is fully in registers but the
4628 // caller has allocated the parameter save anyway, we can refer
4629 // directly to the caller's stack frame. Otherwise, create a
4630 // local copy in our own frame.
4631 int FI;
4632 if (HasParameterArea ||
4633 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4634 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4635 else
4636 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4637 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4638
4639 // Handle aggregates smaller than 8 bytes.
4640 if (ObjSize < PtrByteSize) {
4641 // The value of the object is its address, which differs from the
4642 // address of the enclosing doubleword on big-endian systems.
4643 SDValue Arg = FIN;
4644 if (!isLittleEndian) {
4645 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4646 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4647 }
4648 InVals.push_back(Arg);
4649
4650 if (GPR_idx != Num_GPR_Regs) {
4651 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4652 FuncInfo->addLiveInAttr(VReg, Flags);
4653 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4654 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4655 SDValue Store =
4656 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4657 MachinePointerInfo(&*FuncArg), ObjType);
4658 MemOps.push_back(Store);
4659 }
4660 // Whether we copied from a register or not, advance the offset
4661 // into the parameter save area by a full doubleword.
4662 ArgOffset += PtrByteSize;
4663 continue;
4664 }
4665
4666 // The value of the object is its address, which is the address of
4667 // its first stack doubleword.
4668 InVals.push_back(FIN);
4669
4670 // Store whatever pieces of the object are in registers to memory.
4671 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4672 if (GPR_idx == Num_GPR_Regs)
4673 break;
4674
4675 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4676 FuncInfo->addLiveInAttr(VReg, Flags);
4677 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4678 SDValue Addr = FIN;
4679 if (j) {
4680 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4681 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4682 }
4683 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4684 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4685 SDValue Store =
4686 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4687 MachinePointerInfo(&*FuncArg, j), ObjType);
4688 MemOps.push_back(Store);
4689 ++GPR_idx;
4690 }
4691 ArgOffset += ArgSize;
4692 continue;
4693 }
4694
4695 switch (ObjectVT.getSimpleVT().SimpleTy) {
4696 default: llvm_unreachable("Unhandled argument type!");
4697 case MVT::i1:
4698 case MVT::i32:
4699 case MVT::i64:
4700 if (Flags.isNest()) {
4701 // The 'nest' parameter, if any, is passed in R11.
4702 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4703 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4704
4705 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4706 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4707
4708 break;
4709 }
4710
4711 // These can be scalar arguments or elements of an integer array type
4712 // passed directly. Clang may use those instead of "byval" aggregate
4713 // types to avoid forcing arguments to memory unnecessarily.
4714 if (GPR_idx != Num_GPR_Regs) {
4715 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4716 FuncInfo->addLiveInAttr(VReg, Flags);
4717 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4718
4719 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4720 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4721 // value to MVT::i64 and then truncate to the correct register size.
4722 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4723 } else {
4724 if (CallConv == CallingConv::Fast)
4725 ComputeArgOffset();
4726
4727 needsLoad = true;
4728 ArgSize = PtrByteSize;
4729 }
4730 if (CallConv != CallingConv::Fast || needsLoad)
4731 ArgOffset += 8;
4732 break;
4733
4734 case MVT::f32:
4735 case MVT::f64:
4736 // These can be scalar arguments or elements of a float array type
4737 // passed directly. The latter are used to implement ELFv2 homogenous
4738 // float aggregates.
4739 if (FPR_idx != Num_FPR_Regs) {
4740 unsigned VReg;
4741
4742 if (ObjectVT == MVT::f32)
4743 VReg = MF.addLiveIn(FPR[FPR_idx],
4744 Subtarget.hasP8Vector()
4745 ? &PPC::VSSRCRegClass
4746 : &PPC::F4RCRegClass);
4747 else
4748 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4749 ? &PPC::VSFRCRegClass
4750 : &PPC::F8RCRegClass);
4751
4752 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4753 ++FPR_idx;
4754 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4755 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4756 // once we support fp <-> gpr moves.
4757
4758 // This can only ever happen in the presence of f32 array types,
4759 // since otherwise we never run out of FPRs before running out
4760 // of GPRs.
4761 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4762 FuncInfo->addLiveInAttr(VReg, Flags);
4763 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4764
4765 if (ObjectVT == MVT::f32) {
4766 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4767 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4768 DAG.getConstant(32, dl, MVT::i32));
4769 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4770 }
4771
4772 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4773 } else {
4774 if (CallConv == CallingConv::Fast)
4775 ComputeArgOffset();
4776
4777 needsLoad = true;
4778 }
4779
4780 // When passing an array of floats, the array occupies consecutive
4781 // space in the argument area; only round up to the next doubleword
4782 // at the end of the array. Otherwise, each float takes 8 bytes.
4783 if (CallConv != CallingConv::Fast || needsLoad) {
4784 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4785 ArgOffset += ArgSize;
4786 if (Flags.isInConsecutiveRegsLast())
4787 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4788 }
4789 break;
4790 case MVT::v4f32:
4791 case MVT::v4i32:
4792 case MVT::v8i16:
4793 case MVT::v16i8:
4794 case MVT::v2f64:
4795 case MVT::v2i64:
4796 case MVT::v1i128:
4797 case MVT::f128:
4798 // These can be scalar arguments or elements of a vector array type
4799 // passed directly. The latter are used to implement ELFv2 homogenous
4800 // vector aggregates.
4801 if (VR_idx != Num_VR_Regs) {
4802 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4803 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4804 ++VR_idx;
4805 } else {
4806 if (CallConv == CallingConv::Fast)
4807 ComputeArgOffset();
4808 needsLoad = true;
4809 }
4810 if (CallConv != CallingConv::Fast || needsLoad)
4811 ArgOffset += 16;
4812 break;
4813 }
4814
4815 // We need to load the argument to a virtual register if we determined
4816 // above that we ran out of physical registers of the appropriate type.
4817 if (needsLoad) {
4818 if (ObjSize < ArgSize && !isLittleEndian)
4819 CurArgOffset += ArgSize - ObjSize;
4820 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4821 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4822 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4823 }
4824
4825 InVals.push_back(ArgVal);
4826 }
4827
4828 // Area that is at least reserved in the caller of this function.
4829 unsigned MinReservedArea;
4830 if (HasParameterArea)
4831 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4832 else
4833 MinReservedArea = LinkageSize;
4834
4835 // Set the size that is at least reserved in caller of this function. Tail
4836 // call optimized functions' reserved stack space needs to be aligned so that
4837 // taking the difference between two stack areas will result in an aligned
4838 // stack.
4839 MinReservedArea =
4840 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4841 FuncInfo->setMinReservedArea(MinReservedArea);
4842
4843 // If the function takes variable number of arguments, make a frame index for
4844 // the start of the first vararg value... for expansion of llvm.va_start.
4845 // On ELFv2ABI spec, it writes:
4846 // C programs that are intended to be *portable* across different compilers
4847 // and architectures must use the header file <stdarg.h> to deal with variable
4848 // argument lists.
4849 if (isVarArg && MFI.hasVAStart()) {
4850 int Depth = ArgOffset;
4851
4852 FuncInfo->setVarArgsFrameIndex(
4853 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4854 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4855
4856 // If this function is vararg, store any remaining integer argument regs
4857 // to their spots on the stack so that they may be loaded by dereferencing
4858 // the result of va_next.
4859 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4860 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4861 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4862 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4863 SDValue Store =
4864 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4865 MemOps.push_back(Store);
4866 // Increment the address by four for the next argument to store
4867 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4868 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4869 }
4870 }
4871
4872 if (!MemOps.empty())
4873 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4874
4875 return Chain;
4876}
4877
4878/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4879/// adjusted to accommodate the arguments for the tailcall.
4880static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4881 unsigned ParamSize) {
4882
4883 if (!isTailCall) return 0;
4884
4886 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4887 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4888 // Remember only if the new adjustment is bigger.
4889 if (SPDiff < FI->getTailCallSPDelta())
4890 FI->setTailCallSPDelta(SPDiff);
4891
4892 return SPDiff;
4893}
4894
4895static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4896
4897static bool callsShareTOCBase(const Function *Caller,
4898 const GlobalValue *CalleeGV,
4899 const TargetMachine &TM) {
4900 // It does not make sense to call callsShareTOCBase() with a caller that
4901 // is PC Relative since PC Relative callers do not have a TOC.
4902#ifndef NDEBUG
4903 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4904 assert(!STICaller->isUsingPCRelativeCalls() &&
4905 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4906#endif
4907
4908 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4909 // don't have enough information to determine if the caller and callee share
4910 // the same TOC base, so we have to pessimistically assume they don't for
4911 // correctness.
4912 if (!CalleeGV)
4913 return false;
4914
4915 // If the callee is preemptable, then the static linker will use a plt-stub
4916 // which saves the toc to the stack, and needs a nop after the call
4917 // instruction to convert to a toc-restore.
4918 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4919 return false;
4920
4921 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4922 // We may need a TOC restore in the situation where the caller requires a
4923 // valid TOC but the callee is PC Relative and does not.
4924 const Function *F = dyn_cast<Function>(CalleeGV);
4925 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4926
4927 // If we have an Alias we can try to get the function from there.
4928 if (Alias) {
4929 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4930 F = dyn_cast<Function>(GlobalObj);
4931 }
4932
4933 // If we still have no valid function pointer we do not have enough
4934 // information to determine if the callee uses PC Relative calls so we must
4935 // assume that it does.
4936 if (!F)
4937 return false;
4938
4939 // If the callee uses PC Relative we cannot guarantee that the callee won't
4940 // clobber the TOC of the caller and so we must assume that the two
4941 // functions do not share a TOC base.
4942 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4943 if (STICallee->isUsingPCRelativeCalls())
4944 return false;
4945
4946 // If the GV is not a strong definition then we need to assume it can be
4947 // replaced by another function at link time. The function that replaces
4948 // it may not share the same TOC as the caller since the callee may be
4949 // replaced by a PC Relative version of the same function.
4950 if (!CalleeGV->isStrongDefinitionForLinker())
4951 return false;
4952
4953 // The medium and large code models are expected to provide a sufficiently
4954 // large TOC to provide all data addressing needs of a module with a
4955 // single TOC.
4956 if (CodeModel::Medium == TM.getCodeModel() ||
4957 CodeModel::Large == TM.getCodeModel())
4958 return true;
4959
4960 // Any explicitly-specified sections and section prefixes must also match.
4961 // Also, if we're using -ffunction-sections, then each function is always in
4962 // a different section (the same is true for COMDAT functions).
4963 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4964 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4965 return false;
4966 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4967 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4968 return false;
4969 }
4970
4971 return true;
4972}
4973
4974static bool
4976 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4977 assert(Subtarget.is64BitELFABI());
4978
4979 const unsigned PtrByteSize = 8;
4980 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4981
4982 static const MCPhysReg GPR[] = {
4983 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4984 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4985 };
4986 static const MCPhysReg VR[] = {
4987 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4988 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4989 };
4990
4991 const unsigned NumGPRs = std::size(GPR);
4992 const unsigned NumFPRs = 13;
4993 const unsigned NumVRs = std::size(VR);
4994 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4995
4996 unsigned NumBytes = LinkageSize;
4997 unsigned AvailableFPRs = NumFPRs;
4998 unsigned AvailableVRs = NumVRs;
4999
5000 for (const ISD::OutputArg& Param : Outs) {
5001 if (Param.Flags.isNest()) continue;
5002
5003 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5004 LinkageSize, ParamAreaSize, NumBytes,
5005 AvailableFPRs, AvailableVRs))
5006 return true;
5007 }
5008 return false;
5009}
5010
5011static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5012 if (CB.arg_size() != CallerFn->arg_size())
5013 return false;
5014
5015 auto CalleeArgIter = CB.arg_begin();
5016 auto CalleeArgEnd = CB.arg_end();
5017 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5018
5019 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5020 const Value* CalleeArg = *CalleeArgIter;
5021 const Value* CallerArg = &(*CallerArgIter);
5022 if (CalleeArg == CallerArg)
5023 continue;
5024
5025 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5026 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5027 // }
5028 // 1st argument of callee is undef and has the same type as caller.
5029 if (CalleeArg->getType() == CallerArg->getType() &&
5030 isa<UndefValue>(CalleeArg))
5031 continue;
5032
5033 return false;
5034 }
5035
5036 return true;
5037}
5038
5039// Returns true if TCO is possible between the callers and callees
5040// calling conventions.
5041static bool
5043 CallingConv::ID CalleeCC) {
5044 // Tail calls are possible with fastcc and ccc.
5045 auto isTailCallableCC = [] (CallingConv::ID CC){
5046 return CC == CallingConv::C || CC == CallingConv::Fast;
5047 };
5048 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5049 return false;
5050
5051 // We can safely tail call both fastcc and ccc callees from a c calling
5052 // convention caller. If the caller is fastcc, we may have less stack space
5053 // than a non-fastcc caller with the same signature so disable tail-calls in
5054 // that case.
5055 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5056}
5057
5058bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5059 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5060 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5062 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5063 bool isCalleeExternalSymbol) const {
5064 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5065
5066 if (DisableSCO && !TailCallOpt) return false;
5067
5068 // Variadic argument functions are not supported.
5069 if (isVarArg) return false;
5070
5071 // Check that the calling conventions are compatible for tco.
5072 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5073 return false;
5074
5075 // Caller contains any byval parameter is not supported.
5076 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5077 return false;
5078
5079 // Callee contains any byval parameter is not supported, too.
5080 // Note: This is a quick work around, because in some cases, e.g.
5081 // caller's stack size > callee's stack size, we are still able to apply
5082 // sibling call optimization. For example, gcc is able to do SCO for caller1
5083 // in the following example, but not for caller2.
5084 // struct test {
5085 // long int a;
5086 // char ary[56];
5087 // } gTest;
5088 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5089 // b->a = v.a;
5090 // return 0;
5091 // }
5092 // void caller1(struct test a, struct test c, struct test *b) {
5093 // callee(gTest, b); }
5094 // void caller2(struct test *b) { callee(gTest, b); }
5095 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5096 return false;
5097
5098 // If callee and caller use different calling conventions, we cannot pass
5099 // parameters on stack since offsets for the parameter area may be different.
5100 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5101 return false;
5102
5103 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5104 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5105 // callee potentially have different TOC bases then we cannot tail call since
5106 // we need to restore the TOC pointer after the call.
5107 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5108 // We cannot guarantee this for indirect calls or calls to external functions.
5109 // When PC-Relative addressing is used, the concept of the TOC is no longer
5110 // applicable so this check is not required.
5111 // Check first for indirect calls.
5112 if (!Subtarget.isUsingPCRelativeCalls() &&
5113 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5114 return false;
5115
5116 // Check if we share the TOC base.
5117 if (!Subtarget.isUsingPCRelativeCalls() &&
5118 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5119 return false;
5120
5121 // TCO allows altering callee ABI, so we don't have to check further.
5122 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5123 return true;
5124
5125 if (DisableSCO) return false;
5126
5127 // If callee use the same argument list that caller is using, then we can
5128 // apply SCO on this case. If it is not, then we need to check if callee needs
5129 // stack for passing arguments.
5130 // PC Relative tail calls may not have a CallBase.
5131 // If there is no CallBase we cannot verify if we have the same argument
5132 // list so assume that we don't have the same argument list.
5133 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5134 needStackSlotPassParameters(Subtarget, Outs))
5135 return false;
5136 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5137 return false;
5138
5139 return true;
5140}
5141
5142/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5143/// for tail call optimization. Targets which want to do tail call
5144/// optimization should implement this function.
5145bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5146 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5147 CallingConv::ID CallerCC, bool isVarArg,
5148 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5149 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5150 return false;
5151
5152 // Variable argument functions are not supported.
5153 if (isVarArg)
5154 return false;
5155
5156 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5157 // Functions containing by val parameters are not supported.
5158 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5159 return false;
5160
5161 // Non-PIC/GOT tail calls are supported.
5162 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5163 return true;
5164
5165 // At the moment we can only do local tail calls (in same module, hidden
5166 // or protected) if we are generating PIC.
5167 if (CalleeGV)
5168 return CalleeGV->hasHiddenVisibility() ||
5169 CalleeGV->hasProtectedVisibility();
5170 }
5171
5172 return false;
5173}
5174
5175/// isCallCompatibleAddress - Return the immediate to use if the specified
5176/// 32-bit value is representable in the immediate field of a BxA instruction.
5178 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5179 if (!C) return nullptr;
5180
5181 int Addr = C->getZExtValue();
5182 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5183 SignExtend32<26>(Addr) != Addr)
5184 return nullptr; // Top 6 bits have to be sext of immediate.
5185
5186 return DAG
5187 .getConstant(
5188 (int)C->getZExtValue() >> 2, SDLoc(Op),
5190 .getNode();
5191}
5192
5193namespace {
5194
5195struct TailCallArgumentInfo {
5196 SDValue Arg;
5197 SDValue FrameIdxOp;
5198 int FrameIdx = 0;
5199
5200 TailCallArgumentInfo() = default;
5201};
5202
5203} // end anonymous namespace
5204
5205/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5207 SelectionDAG &DAG, SDValue Chain,
5208 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5209 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5210 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5211 SDValue Arg = TailCallArgs[i].Arg;
5212 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5213 int FI = TailCallArgs[i].FrameIdx;
5214 // Store relative to framepointer.
5215 MemOpChains.push_back(DAG.getStore(
5216 Chain, dl, Arg, FIN,
5218 }
5219}
5220
5221/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5222/// the appropriate stack slot for the tail call optimized function call.
5224 SDValue OldRetAddr, SDValue OldFP,
5225 int SPDiff, const SDLoc &dl) {
5226 if (SPDiff) {
5227 // Calculate the new stack slot for the return address.
5229 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5230 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5231 bool isPPC64 = Subtarget.isPPC64();
5232 int SlotSize = isPPC64 ? 8 : 4;
5233 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5234 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5235 NewRetAddrLoc, true);
5236 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5237 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5238 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5239 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5240 }
5241 return Chain;
5242}
5243
5244/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5245/// the position of the argument.
5246static void
5248 SDValue Arg, int SPDiff, unsigned ArgOffset,
5249 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5250 int Offset = ArgOffset + SPDiff;
5251 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5252 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5253 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5254 SDValue FIN = DAG.getFrameIndex(FI, VT);
5255 TailCallArgumentInfo Info;
5256 Info.Arg = Arg;
5257 Info.FrameIdxOp = FIN;
5258 Info.FrameIdx = FI;
5259 TailCallArguments.push_back(Info);
5260}
5261
5262/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5263/// stack slot. Returns the chain as result and the loaded frame pointers in
5264/// LROpOut/FPOpout. Used when tail calling.
5265SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5266 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5267 SDValue &FPOpOut, const SDLoc &dl) const {
5268 if (SPDiff) {
5269 // Load the LR and FP stack slot for later adjusting.
5270 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5271 LROpOut = getReturnAddrFrameIndex(DAG);
5272 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5273 Chain = SDValue(LROpOut.getNode(), 1);
5274 }
5275 return Chain;
5276}
5277
5278/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5279/// by "Src" to address "Dst" of size "Size". Alignment information is
5280/// specified by the specific parameter attribute. The copy will be passed as
5281/// a byval function parameter.
5282/// Sometimes what we are copying is the end of a larger object, the part that
5283/// does not fit in registers.
5285 SDValue Chain, ISD::ArgFlagsTy Flags,
5286 SelectionDAG &DAG, const SDLoc &dl) {
5287 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5288 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5289 Flags.getNonZeroByValAlign(), false, false, false,
5291}
5292
5293/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5294/// tail calls.
5296 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5297 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5298 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5299 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5301 if (!isTailCall) {
5302 if (isVector) {
5303 SDValue StackPtr;
5304 if (isPPC64)
5305 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5306 else
5307 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5308 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5309 DAG.getConstant(ArgOffset, dl, PtrVT));
5310 }
5311 MemOpChains.push_back(
5312 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5313 // Calculate and remember argument location.
5314 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5315 TailCallArguments);
5316}
5317
5318static void
5320 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5321 SDValue FPOp,
5322 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5323 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5324 // might overwrite each other in case of tail call optimization.
5325 SmallVector<SDValue, 8> MemOpChains2;
5326 // Do not flag preceding copytoreg stuff together with the following stuff.
5327 InGlue = SDValue();
5328 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5329 MemOpChains2, dl);
5330 if (!MemOpChains2.empty())
5331 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5332
5333 // Store the return address to the appropriate stack slot.
5334 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5335
5336 // Emit callseq_end just before tailcall node.
5337 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5338 InGlue = Chain.getValue(1);
5339}
5340
5341// Is this global address that of a function that can be called by name? (as
5342// opposed to something that must hold a descriptor for an indirect call).
5343static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5344 if (GV) {
5345 if (GV->isThreadLocal())
5346 return false;
5347
5348 return GV->getValueType()->isFunctionTy();
5349 }
5350
5351 return false;
5352}
5353
5354SDValue PPCTargetLowering::LowerCallResult(
5355 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5356 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5357 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5359 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5360 *DAG.getContext());
5361
5362 CCRetInfo.AnalyzeCallResult(
5363 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5365 : RetCC_PPC);
5366
5367 // Copy all of the result registers out of their specified physreg.
5368 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5369 CCValAssign &VA = RVLocs[i];
5370 assert(VA.isRegLoc() && "Can only return in registers!");
5371
5372 SDValue Val;
5373
5374 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5375 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5376 InGlue);
5377 Chain = Lo.getValue(1);
5378 InGlue = Lo.getValue(2);
5379 VA = RVLocs[++i]; // skip ahead to next loc
5380 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5381 InGlue);
5382 Chain = Hi.getValue(1);
5383 InGlue = Hi.getValue(2);
5384 if (!Subtarget.isLittleEndian())
5385 std::swap (Lo, Hi);
5386 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5387 } else {
5388 Val = DAG.getCopyFromReg(Chain, dl,
5389 VA.getLocReg(), VA.getLocVT(), InGlue);
5390 Chain = Val.getValue(1);
5391 InGlue = Val.getValue(2);
5392 }
5393
5394 switch (VA.getLocInfo()) {
5395 default: llvm_unreachable("Unknown loc info!");
5396 case CCValAssign::Full: break;
5397 case CCValAssign::AExt:
5398 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5399 break;
5400 case CCValAssign::ZExt:
5401 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5402 DAG.getValueType(VA.getValVT()));
5403 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5404 break;
5405 case CCValAssign::SExt:
5406 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5407 DAG.getValueType(VA.getValVT()));
5408 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5409 break;
5410 }
5411
5412 InVals.push_back(Val);
5413 }
5414
5415 return Chain;
5416}
5417
5418static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5419 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5420 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5421 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5422
5423 // PatchPoint calls are not indirect.
5424 if (isPatchPoint)
5425 return false;
5426
5427 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5428 return false;
5429
5430 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5431 // becuase the immediate function pointer points to a descriptor instead of
5432 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5433 // pointer immediate points to the global entry point, while the BLA would
5434 // need to jump to the local entry point (see rL211174).
5435 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5436 isBLACompatibleAddress(Callee, DAG))
5437 return false;
5438
5439 return true;
5440}
5441
5442// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5443static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5444 return Subtarget.isAIXABI() ||
5445 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5446}
5447
5449 const Function &Caller, const SDValue &Callee,
5450 const PPCSubtarget &Subtarget,
5451 const TargetMachine &TM,
5452 bool IsStrictFPCall = false) {
5453 if (CFlags.IsTailCall)
5454 return PPCISD::TC_RETURN;
5455
5456 unsigned RetOpc = 0;
5457 // This is a call through a function pointer.
5458 if (CFlags.IsIndirect) {
5459 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5460 // indirect calls. The save of the caller's TOC pointer to the stack will be
5461 // inserted into the DAG as part of call lowering. The restore of the TOC
5462 // pointer is modeled by using a pseudo instruction for the call opcode that
5463 // represents the 2 instruction sequence of an indirect branch and link,
5464 // immediately followed by a load of the TOC pointer from the stack save
5465 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5466 // as it is not saved or used.
5468 : PPCISD::BCTRL;
5469 } else if (Subtarget.isUsingPCRelativeCalls()) {
5470 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5471 RetOpc = PPCISD::CALL_NOTOC;
5472 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5473 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5474 // immediately following the call instruction if the caller and callee may
5475 // have different TOC bases. At link time if the linker determines the calls
5476 // may not share a TOC base, the call is redirected to a trampoline inserted
5477 // by the linker. The trampoline will (among other things) save the callers
5478 // TOC pointer at an ABI designated offset in the linkage area and the
5479 // linker will rewrite the nop to be a load of the TOC pointer from the
5480 // linkage area into gpr2.
5481 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5482 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5483 RetOpc =
5485 } else
5486 RetOpc = PPCISD::CALL;
5487 if (IsStrictFPCall) {
5488 switch (RetOpc) {
5489 default:
5490 llvm_unreachable("Unknown call opcode");
5493 break;
5494 case PPCISD::BCTRL:
5495 RetOpc = PPCISD::BCTRL_RM;
5496 break;
5497 case PPCISD::CALL_NOTOC:
5498 RetOpc = PPCISD::CALL_NOTOC_RM;
5499 break;
5500 case PPCISD::CALL:
5501 RetOpc = PPCISD::CALL_RM;
5502 break;
5503 case PPCISD::CALL_NOP:
5504 RetOpc = PPCISD::CALL_NOP_RM;
5505 break;
5506 }
5507 }
5508 return RetOpc;
5509}
5510
5511static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5512 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5513 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5514 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5515 return SDValue(Dest, 0);
5516
5517 // Returns true if the callee is local, and false otherwise.
5518 auto isLocalCallee = [&]() {
5519 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5520 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5521
5522 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5523 !isa_and_nonnull<GlobalIFunc>(GV);
5524 };
5525
5526 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5527 // a static relocation model causes some versions of GNU LD (2.17.50, at
5528 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5529 // built with secure-PLT.
5530 bool UsePlt =
5531 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5533
5534 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5535 const TargetMachine &TM = Subtarget.getTargetMachine();
5536 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5537 MCSymbolXCOFF *S =
5538 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5539
5541 return DAG.getMCSymbol(S, PtrVT);
5542 };
5543
5544 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5545 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5546 if (isFunctionGlobalAddress(GV)) {
5547 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5548
5549 if (Subtarget.isAIXABI()) {
5550 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5551 return getAIXFuncEntryPointSymbolSDNode(GV);
5552 }
5553 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5554 UsePlt ? PPCII::MO_PLT : 0);
5555 }
5556
5557 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5558 const char *SymName = S->getSymbol();
5559 if (Subtarget.isAIXABI()) {
5560 // If there exists a user-declared function whose name is the same as the
5561 // ExternalSymbol's, then we pick up the user-declared version.
5563 if (const Function *F =
5564 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5565 return getAIXFuncEntryPointSymbolSDNode(F);
5566
5567 // On AIX, direct function calls reference the symbol for the function's
5568 // entry point, which is named by prepending a "." before the function's
5569 // C-linkage name. A Qualname is returned here because an external
5570 // function entry point is a csect with XTY_ER property.
5571 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5572 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5573 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5574 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5576 return Sec->getQualNameSymbol();
5577 };
5578
5579 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5580 }
5581 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5582 UsePlt ? PPCII::MO_PLT : 0);
5583 }
5584
5585 // No transformation needed.
5586 assert(Callee.getNode() && "What no callee?");
5587 return Callee;
5588}
5589
5591 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5592 "Expected a CALLSEQ_STARTSDNode.");
5593
5594 // The last operand is the chain, except when the node has glue. If the node
5595 // has glue, then the last operand is the glue, and the chain is the second
5596 // last operand.
5597 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5598 if (LastValue.getValueType() != MVT::Glue)
5599 return LastValue;
5600
5601 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5602}
5603
5604// Creates the node that moves a functions address into the count register
5605// to prepare for an indirect call instruction.
5606static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5607 SDValue &Glue, SDValue &Chain,
5608 const SDLoc &dl) {
5609 SDValue MTCTROps[] = {Chain, Callee, Glue};
5610 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5611 Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5612 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5613 // The glue is the second value produced.
5614 Glue = Chain.getValue(1);
5615}
5616
5618 SDValue &Glue, SDValue &Chain,
5619 SDValue CallSeqStart,
5620 const CallBase *CB, const SDLoc &dl,
5621 bool hasNest,
5622 const PPCSubtarget &Subtarget) {
5623 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5624 // entry point, but to the function descriptor (the function entry point
5625 // address is part of the function descriptor though).
5626 // The function descriptor is a three doubleword structure with the
5627 // following fields: function entry point, TOC base address and
5628 // environment pointer.
5629 // Thus for a call through a function pointer, the following actions need
5630 // to be performed:
5631 // 1. Save the TOC of the caller in the TOC save area of its stack
5632 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5633 // 2. Load the address of the function entry point from the function
5634 // descriptor.
5635 // 3. Load the TOC of the callee from the function descriptor into r2.
5636 // 4. Load the environment pointer from the function descriptor into
5637 // r11.
5638 // 5. Branch to the function entry point address.
5639 // 6. On return of the callee, the TOC of the caller needs to be
5640 // restored (this is done in FinishCall()).
5641 //
5642 // The loads are scheduled at the beginning of the call sequence, and the
5643 // register copies are flagged together to ensure that no other
5644 // operations can be scheduled in between. E.g. without flagging the
5645 // copies together, a TOC access in the caller could be scheduled between
5646 // the assignment of the callee TOC and the branch to the callee, which leads
5647 // to incorrect code.
5648
5649 // Start by loading the function address from the descriptor.
5650 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5651 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5655
5656 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5657
5658 // Registers used in building the DAG.
5659 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5660 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5661
5662 // Offsets of descriptor members.
5663 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5664 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5665
5666 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5667 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5668
5669 // One load for the functions entry point address.
5670 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5671 Alignment, MMOFlags);
5672
5673 // One for loading the TOC anchor for the module that contains the called
5674 // function.
5675 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5676 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5677 SDValue TOCPtr =
5678 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5679 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5680
5681 // One for loading the environment pointer.
5682 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5683 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5684 SDValue LoadEnvPtr =
5685 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5686 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5687
5688
5689 // Then copy the newly loaded TOC anchor to the TOC pointer.
5690 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5691 Chain = TOCVal.getValue(0);
5692 Glue = TOCVal.getValue(1);
5693
5694 // If the function call has an explicit 'nest' parameter, it takes the
5695 // place of the environment pointer.
5696 assert((!hasNest || !Subtarget.isAIXABI()) &&
5697 "Nest parameter is not supported on AIX.");
5698 if (!hasNest) {
5699 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5700 Chain = EnvVal.getValue(0);
5701 Glue = EnvVal.getValue(1);
5702 }
5703
5704 // The rest of the indirect call sequence is the same as the non-descriptor
5705 // DAG.
5706 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5707}
5708
5709static void
5711 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5712 SelectionDAG &DAG,
5713 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5714 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5715 const PPCSubtarget &Subtarget) {
5716 const bool IsPPC64 = Subtarget.isPPC64();
5717 // MVT for a general purpose register.
5718 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5719
5720 // First operand is always the chain.
5721 Ops.push_back(Chain);
5722
5723 // If it's a direct call pass the callee as the second operand.
5724 if (!CFlags.IsIndirect)
5725 Ops.push_back(Callee);
5726 else {
5727 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5728
5729 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5730 // on the stack (this would have been done in `LowerCall_64SVR4` or
5731 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5732 // represents both the indirect branch and a load that restores the TOC
5733 // pointer from the linkage area. The operand for the TOC restore is an add
5734 // of the TOC save offset to the stack pointer. This must be the second
5735 // operand: after the chain input but before any other variadic arguments.
5736 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5737 // saved or used.
5738 if (isTOCSaveRestoreRequired(Subtarget)) {
5739 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5740
5741 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5742 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5743 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5744 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5745 Ops.push_back(AddTOC);
5746 }
5747
5748 // Add the register used for the environment pointer.
5749 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5751 RegVT));
5752
5753
5754 // Add CTR register as callee so a bctr can be emitted later.
5755 if (CFlags.IsTailCall)
5756 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5757 }
5758
5759 // If this is a tail call add stack pointer delta.
5760 if (CFlags.IsTailCall)
5761 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5762
5763 // Add argument registers to the end of the list so that they are known live
5764 // into the call.
5765 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5766 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5767 RegsToPass[i].second.getValueType()));
5768
5769 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5770 // no way to mark dependencies as implicit here.
5771 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5772 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5773 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5774 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5775
5776 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5777 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5778 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5779
5780 // Add a register mask operand representing the call-preserved registers.
5781 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5782 const uint32_t *Mask =
5783 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5784 assert(Mask && "Missing call preserved mask for calling convention");
5785 Ops.push_back(DAG.getRegisterMask(Mask));
5786
5787 // If the glue is valid, it is the last operand.
5788 if (Glue.getNode())
5789 Ops.push_back(Glue);
5790}
5791
5792SDValue PPCTargetLowering::FinishCall(
5793 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5794 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5795 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5796 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5797 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5798
5799 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5800 Subtarget.isAIXABI())
5801 setUsesTOCBasePtr(DAG);
5802
5803 unsigned CallOpc =
5804 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5805 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5806
5807 if (!CFlags.IsIndirect)
5808 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5809 else if (Subtarget.usesFunctionDescriptors())
5810 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5811 dl, CFlags.HasNest, Subtarget);
5812 else
5813 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5814
5815 // Build the operand list for the call instruction.
5817 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5818 SPDiff, Subtarget);
5819
5820 // Emit tail call.
5821 if (CFlags.IsTailCall) {
5822 // Indirect tail call when using PC Relative calls do not have the same
5823 // constraints.
5824 assert(((Callee.getOpcode() == ISD::Register &&
5825 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5826 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5827 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5828 isa<ConstantSDNode>(Callee) ||
5829 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5830 "Expecting a global address, external symbol, absolute value, "
5831 "register or an indirect tail call when PC Relative calls are "
5832 "used.");
5833 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5834 assert(CallOpc == PPCISD::TC_RETURN &&
5835 "Unexpected call opcode for a tail call.");
5837 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5838 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5839 return Ret;
5840 }
5841
5842 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5843 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5844 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5845 Glue = Chain.getValue(1);
5846
5847 // When performing tail call optimization the callee pops its arguments off
5848 // the stack. Account for this here so these bytes can be pushed back on in
5849 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5850 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5852 ? NumBytes
5853 : 0;
5854
5855 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5856 Glue = Chain.getValue(1);
5857
5858 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5859 DAG, InVals);
5860}
5861
5863 CallingConv::ID CalleeCC = CB->getCallingConv();
5864 const Function *CallerFunc = CB->getCaller();
5865 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5866 const Function *CalleeFunc = CB->getCalledFunction();
5867 if (!CalleeFunc)
5868 return false;
5869 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5870
5873
5874 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5875 CalleeFunc->getAttributes(), Outs, *this,
5876 CalleeFunc->getParent()->getDataLayout());
5877
5878 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5879 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5880 false /*isCalleeExternalSymbol*/);
5881}
5882
5883bool PPCTargetLowering::isEligibleForTCO(
5884 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5885 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5887 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5888 bool isCalleeExternalSymbol) const {
5889 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5890 return false;
5891
5892 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5893 return IsEligibleForTailCallOptimization_64SVR4(
5894 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5895 isCalleeExternalSymbol);
5896 else
5897 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5898 isVarArg, Ins);
5899}
5900
5901SDValue
5902PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5903 SmallVectorImpl<SDValue> &InVals) const {
5904 SelectionDAG &DAG = CLI.DAG;
5905 SDLoc &dl = CLI.DL;
5907 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5909 SDValue Chain = CLI.Chain;
5910 SDValue Callee = CLI.Callee;
5911 bool &isTailCall = CLI.IsTailCall;
5912 CallingConv::ID CallConv = CLI.CallConv;
5913 bool isVarArg = CLI.IsVarArg;
5914 bool isPatchPoint = CLI.IsPatchPoint;
5915 const CallBase *CB = CLI.CB;
5916
5917 if (isTailCall) {
5919 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5920 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5921 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5922 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5923
5924 isTailCall =
5925 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5926 &(MF.getFunction()), IsCalleeExternalSymbol);
5927 if (isTailCall) {
5928 ++NumTailCalls;
5929 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5930 ++NumSiblingCalls;
5931
5932 // PC Relative calls no longer guarantee that the callee is a Global
5933 // Address Node. The callee could be an indirect tail call in which
5934 // case the SDValue for the callee could be a load (to load the address
5935 // of a function pointer) or it may be a register copy (to move the
5936 // address of the callee from a function parameter into a virtual
5937 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5938 assert((Subtarget.isUsingPCRelativeCalls() ||
5939 isa<GlobalAddressSDNode>(Callee)) &&
5940 "Callee should be an llvm::Function object.");
5941
5942 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5943 << "\nTCO callee: ");
5944 LLVM_DEBUG(Callee.dump());
5945 }
5946 }
5947
5948 if (!isTailCall && CB && CB->isMustTailCall())
5949 report_fatal_error("failed to perform tail call elimination on a call "
5950 "site marked musttail");
5951
5952 // When long calls (i.e. indirect calls) are always used, calls are always
5953 // made via function pointer. If we have a function name, first translate it
5954 // into a pointer.
5955 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5956 !isTailCall)
5957 Callee = LowerGlobalAddress(Callee, DAG);
5958
5959 CallFlags CFlags(
5960 CallConv, isTailCall, isVarArg, isPatchPoint,
5961 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5962 // hasNest
5963 Subtarget.is64BitELFABI() &&
5964 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5965 CLI.NoMerge);
5966
5967 if (Subtarget.isAIXABI())
5968 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5969 InVals, CB);
5970
5971 assert(Subtarget.isSVR4ABI());
5972 if (Subtarget.isPPC64())
5973 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5974 InVals, CB);
5975 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5976 InVals, CB);
5977}
5978
5979SDValue PPCTargetLowering::LowerCall_32SVR4(
5980 SDValue Chain, SDValue Callee, CallFlags CFlags,
5982 const SmallVectorImpl<SDValue> &OutVals,
5983 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5985 const CallBase *CB) const {
5986 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5987 // of the 32-bit SVR4 ABI stack frame layout.
5988
5989 const CallingConv::ID CallConv = CFlags.CallConv;
5990 const bool IsVarArg = CFlags.IsVarArg;
5991 const bool IsTailCall = CFlags.IsTailCall;
5992
5993 assert((CallConv == CallingConv::C ||
5994 CallConv == CallingConv::Cold ||
5995 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5996
5997 const Align PtrAlign(4);
5998
6000
6001 // Mark this function as potentially containing a function that contains a
6002 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6003 // and restoring the callers stack pointer in this functions epilog. This is
6004 // done because by tail calling the called function might overwrite the value
6005 // in this function's (MF) stack pointer stack slot 0(SP).
6006 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6007 CallConv == CallingConv::Fast)
6008 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6009
6010 // Count how many bytes are to be pushed on the stack, including the linkage
6011 // area, parameter list area and the part of the local variable space which
6012 // contains copies of aggregates which are passed by value.
6013
6014 // Assign locations to all of the outgoing arguments.
6016 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6017
6018 // Reserve space for the linkage area on the stack.
6019 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6020 PtrAlign);
6021 if (useSoftFloat())
6022 CCInfo.PreAnalyzeCallOperands(Outs);
6023
6024 if (IsVarArg) {
6025 // Handle fixed and variable vector arguments differently.
6026 // Fixed vector arguments go into registers as long as registers are
6027 // available. Variable vector arguments always go into memory.
6028 unsigned NumArgs = Outs.size();
6029
6030 for (unsigned i = 0; i != NumArgs; ++i) {
6031 MVT ArgVT = Outs[i].VT;
6032 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6033 bool Result;
6034
6035 if (Outs[i].IsFixed) {
6036 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6037 CCInfo);
6038 } else {
6040 ArgFlags, CCInfo);
6041 }
6042
6043 if (Result) {
6044#ifndef NDEBUG
6045 errs() << "Call operand #" << i << " has unhandled type "
6046 << ArgVT << "\n";
6047#endif
6048 llvm_unreachable(nullptr);
6049 }
6050 }
6051 } else {
6052 // All arguments are treated the same.
6053 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6054 }
6055 CCInfo.clearWasPPCF128();
6056
6057 // Assign locations to all of the outgoing aggregate by value arguments.
6058 SmallVector<CCValAssign, 16> ByValArgLocs;
6059 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6060
6061 // Reserve stack space for the allocations in CCInfo.
6062 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6063
6064 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6065
6066 // Size of the linkage area, parameter list area and the part of the local
6067 // space variable where copies of aggregates which are passed by value are
6068 // stored.
6069 unsigned NumBytes = CCByValInfo.getStackSize();
6070
6071 // Calculate by how many bytes the stack has to be adjusted in case of tail
6072 // call optimization.
6073 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6074
6075 // Adjust the stack pointer for the new arguments...
6076 // These operations are automatically eliminated by the prolog/epilog pass
6077 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6078 SDValue CallSeqStart = Chain;
6079
6080 // Load the return address and frame pointer so it can be moved somewhere else
6081 // later.
6082 SDValue LROp, FPOp;
6083 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6084
6085 // Set up a copy of the stack pointer for use loading and storing any
6086 // arguments that may not fit in the registers available for argument
6087 // passing.
6088 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6089
6091 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6092 SmallVector<SDValue, 8> MemOpChains;
6093
6094 bool seenFloatArg = false;
6095 // Walk the register/memloc assignments, inserting copies/loads.
6096 // i - Tracks the index into the list of registers allocated for the call
6097 // RealArgIdx - Tracks the index into the list of actual function arguments
6098 // j - Tracks the index into the list of byval arguments
6099 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6100 i != e;
6101 ++i, ++RealArgIdx) {
6102 CCValAssign &VA = ArgLocs[i];
6103 SDValue Arg = OutVals[RealArgIdx];
6104 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6105
6106 if (Flags.isByVal()) {
6107 // Argument is an aggregate which is passed by value, thus we need to
6108 // create a copy of it in the local variable space of the current stack
6109 // frame (which is the stack frame of the caller) and pass the address of
6110 // this copy to the callee.
6111 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6112 CCValAssign &ByValVA = ByValArgLocs[j++];
6113 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6114
6115 // Memory reserved in the local variable space of the callers stack frame.
6116 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6117
6118 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6119 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6120 StackPtr, PtrOff);
6121
6122 // Create a copy of the argument in the local area of the current
6123 // stack frame.
6124 SDValue MemcpyCall =
6125 CreateCopyOfByValArgument(Arg, PtrOff,
6126 CallSeqStart.getNode()->getOperand(0),
6127 Flags, DAG, dl);
6128
6129 // This must go outside the CALLSEQ_START..END.
6130 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6131 SDLoc(MemcpyCall));
6132 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6133 NewCallSeqStart.getNode());
6134 Chain = CallSeqStart = NewCallSeqStart;
6135
6136 // Pass the address of the aggregate copy on the stack either in a
6137 // physical register or in the parameter list area of the current stack
6138 // frame to the callee.
6139 Arg = PtrOff;
6140 }
6141
6142 // When useCRBits() is true, there can be i1 arguments.
6143 // It is because getRegisterType(MVT::i1) => MVT::i1,
6144 // and for other integer types getRegisterType() => MVT::i32.
6145 // Extend i1 and ensure callee will get i32.
6146 if (Arg.getValueType() == MVT::i1)
6147 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6148 dl, MVT::i32, Arg);
6149
6150 if (VA.isRegLoc()) {
6151 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6152 // Put argument in a physical register.
6153 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6154 bool IsLE = Subtarget.isLittleEndian();
6155 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6156 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6157 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6158 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6159 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6160 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6161 SVal.getValue(0)));
6162 } else
6163 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6164 } else {
6165 // Put argument in the parameter list area of the current stack frame.
6166 assert(VA.isMemLoc());
6167 unsigned LocMemOffset = VA.getLocMemOffset();
6168
6169 if (!IsTailCall) {
6170 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6171 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6172 StackPtr, PtrOff);
6173
6174 MemOpChains.push_back(
6175 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6176 } else {
6177 // Calculate and remember argument location.
6178 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6179 TailCallArguments);
6180 }
6181 }
6182 }
6183
6184 if (!MemOpChains.empty())
6185 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6186
6187 // Build a sequence of copy-to-reg nodes chained together with token chain
6188 // and flag operands which copy the outgoing args into the appropriate regs.
6189 SDValue InGlue;
6190 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6191 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6192 RegsToPass[i].second, InGlue);
6193 InGlue = Chain.getValue(1);
6194 }
6195
6196 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6197 // registers.
6198 if (IsVarArg) {
6199 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6200 SDValue Ops[] = { Chain, InGlue };
6201
6202 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6203 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6204
6205 InGlue = Chain.getValue(1);
6206 }
6207
6208 if (IsTailCall)
6209 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6210 TailCallArguments);
6211
6212 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6213 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6214}
6215
6216// Copy an argument into memory, being careful to do this outside the
6217// call sequence for the call to which the argument belongs.
6218SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6219 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6220 SelectionDAG &DAG, const SDLoc &dl) const {
6221 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6222 CallSeqStart.getNode()->getOperand(0),
6223 Flags, DAG, dl);
6224 // The MEMCPY must go outside the CALLSEQ_START..END.
6225 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6226 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6227 SDLoc(MemcpyCall));
6228 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6229 NewCallSeqStart.getNode());
6230 return NewCallSeqStart;
6231}
6232
6233SDValue PPCTargetLowering::LowerCall_64SVR4(
6234 SDValue Chain, SDValue Callee, CallFlags CFlags,
6236 const SmallVectorImpl<SDValue> &OutVals,
6237 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6239 const CallBase *CB) const {
6240 bool isELFv2ABI = Subtarget.isELFv2ABI();
6241 bool isLittleEndian = Subtarget.isLittleEndian();
6242 unsigned NumOps = Outs.size();
6243 bool IsSibCall = false;
6244 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6245
6246 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6247 unsigned PtrByteSize = 8;
6248
6250
6251 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6252 IsSibCall = true;
6253
6254 // Mark this function as potentially containing a function that contains a
6255 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6256 // and restoring the callers stack pointer in this functions epilog. This is
6257 // done because by tail calling the called function might overwrite the value
6258 // in this function's (MF) stack pointer stack slot 0(SP).
6259 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6260 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6261
6262 assert(!(IsFastCall && CFlags.IsVarArg) &&
6263 "fastcc not supported on varargs functions");
6264
6265 // Count how many bytes are to be pushed on the stack, including the linkage
6266 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6267 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6268 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6269 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6270 unsigned NumBytes = LinkageSize;
6271 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6272
6273 static const MCPhysReg GPR[] = {
6274 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6275 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6276 };
6277 static const MCPhysReg VR[] = {
6278 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6279 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6280 };
6281
6282 const unsigned NumGPRs = std::size(GPR);
6283 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6284 const unsigned NumVRs = std::size(VR);
6285
6286 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6287 // can be passed to the callee in registers.
6288 // For the fast calling convention, there is another check below.
6289 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6290 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6291 if (!HasParameterArea) {
6292 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6293 unsigned AvailableFPRs = NumFPRs;
6294 unsigned AvailableVRs = NumVRs;
6295 unsigned NumBytesTmp = NumBytes;
6296 for (unsigned i = 0; i != NumOps; ++i) {
6297 if (Outs[i].Flags.isNest()) continue;
6298 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6299 PtrByteSize, LinkageSize, ParamAreaSize,
6300 NumBytesTmp, AvailableFPRs, AvailableVRs))
6301 HasParameterArea = true;
6302 }
6303 }
6304
6305 // When using the fast calling convention, we don't provide backing for
6306 // arguments that will be in registers.
6307 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6308
6309 // Avoid allocating parameter area for fastcc functions if all the arguments
6310 // can be passed in the registers.
6311 if (IsFastCall)
6312 HasParameterArea = false;
6313
6314 // Add up all the space actually used.
6315 for (unsigned i = 0; i != NumOps; ++i) {
6316 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6317 EVT ArgVT = Outs[i].VT;
6318 EVT OrigVT = Outs[i].ArgVT;
6319
6320 if (Flags.isNest())
6321 continue;
6322
6323 if (IsFastCall) {
6324 if (Flags.isByVal()) {
6325 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6326 if (NumGPRsUsed > NumGPRs)
6327 HasParameterArea = true;
6328 } else {
6329 switch (ArgVT.getSimpleVT().SimpleTy) {
6330 default: llvm_unreachable("Unexpected ValueType for argument!");
6331 case MVT::i1:
6332 case MVT::i32:
6333 case MVT::i64:
6334 if (++NumGPRsUsed <= NumGPRs)
6335 continue;
6336 break;
6337 case MVT::v4i32:
6338 case MVT::v8i16:
6339 case MVT::v16i8:
6340 case MVT::v2f64:
6341 case MVT::v2i64:
6342 case MVT::v1i128:
6343 case MVT::f128:
6344 if (++NumVRsUsed <= NumVRs)
6345 continue;
6346 break;
6347 case MVT::v4f32:
6348 if (++NumVRsUsed <= NumVRs)
6349 continue;
6350 break;
6351 case MVT::f32:
6352 case MVT::f64:
6353 if (++NumFPRsUsed <= NumFPRs)
6354 continue;
6355 break;
6356 }
6357 HasParameterArea = true;
6358 }
6359 }
6360
6361 /* Respect alignment of argument on the stack. */
6362 auto Alignement =
6363 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6364 NumBytes = alignTo(NumBytes, Alignement);
6365
6366 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6367 if (Flags.isInConsecutiveRegsLast())
6368 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6369 }
6370
6371 unsigned NumBytesActuallyUsed = NumBytes;
6372
6373 // In the old ELFv1 ABI,
6374 // the prolog code of the callee may store up to 8 GPR argument registers to
6375 // the stack, allowing va_start to index over them in memory if its varargs.
6376 // Because we cannot tell if this is needed on the caller side, we have to
6377 // conservatively assume that it is needed. As such, make sure we have at
6378 // least enough stack space for the caller to store the 8 GPRs.
6379 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6380 // really requires memory operands, e.g. a vararg function.
6381 if (HasParameterArea)
6382 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6383 else
6384 NumBytes = LinkageSize;
6385
6386 // Tail call needs the stack to be aligned.
6387 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6388 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6389
6390 int SPDiff = 0;
6391
6392 // Calculate by how many bytes the stack has to be adjusted in case of tail
6393 // call optimization.
6394 if (!IsSibCall)
6395 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6396
6397 // To protect arguments on the stack from being clobbered in a tail call,
6398 // force all the loads to happen before doing any other lowering.
6399 if (CFlags.IsTailCall)
6400 Chain = DAG.getStackArgumentTokenFactor(Chain);
6401
6402 // Adjust the stack pointer for the new arguments...
6403 // These operations are automatically eliminated by the prolog/epilog pass
6404 if (!IsSibCall)
6405 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6406 SDValue CallSeqStart = Chain;
6407
6408 // Load the return address and frame pointer so it can be move somewhere else
6409 // later.
6410 SDValue LROp, FPOp;
6411 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6412
6413 // Set up a copy of the stack pointer for use loading and storing any
6414 // arguments that may not fit in the registers available for argument
6415 // passing.
6416 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6417
6418 // Figure out which arguments are going to go in registers, and which in
6419 // memory. Also, if this is a vararg function, floating point operations
6420 // must be stored to our stack, and loaded into integer regs as well, if
6421 // any integer regs are available for argument passing.
6422 unsigned ArgOffset = LinkageSize;
6423
6425 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6426
6427 SmallVector<SDValue, 8> MemOpChains;
6428 for (unsigned i = 0; i != NumOps; ++i) {
6429 SDValue Arg = OutVals[i];
6430 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6431 EVT ArgVT = Outs[i].VT;
6432 EVT OrigVT = Outs[i].ArgVT;
6433
6434 // PtrOff will be used to store the current argument to the stack if a
6435 // register cannot be found for it.
6436 SDValue PtrOff;
6437
6438 // We re-align the argument offset for each argument, except when using the
6439 // fast calling convention, when we need to make sure we do that only when
6440 // we'll actually use a stack slot.
6441 auto ComputePtrOff = [&]() {
6442 /* Respect alignment of argument on the stack. */
6443 auto Alignment =
6444 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6445 ArgOffset = alignTo(ArgOffset, Alignment);
6446
6447 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6448
6449 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6450 };
6451
6452 if (!IsFastCall) {
6453 ComputePtrOff();
6454
6455 /* Compute GPR index associated with argument offset. */
6456 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6457 GPR_idx = std::min(GPR_idx, NumGPRs);
6458 }
6459
6460 // Promote integers to 64-bit values.
6461 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6462 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6463 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6464 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6465 }
6466
6467 // FIXME memcpy is used way more than necessary. Correctness first.
6468 // Note: "by value" is code for passing a structure by value, not
6469 // basic types.
6470 if (Flags.isByVal()) {
6471 // Note: Size includes alignment padding, so
6472 // struct x { short a; char b; }
6473 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6474 // These are the proper values we need for right-justifying the
6475 // aggregate in a parameter register.
6476 unsigned Size = Flags.getByValSize();
6477
6478 // An empty aggregate parameter takes up no storage and no
6479 // registers.
6480 if (Size == 0)
6481 continue;
6482
6483 if (IsFastCall)
6484 ComputePtrOff();
6485
6486 // All aggregates smaller than 8 bytes must be passed right-justified.
6487 if (Size==1 || Size==2 || Size==4) {
6488 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6489 if (GPR_idx != NumGPRs) {
6490 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6491 MachinePointerInfo(), VT);
6492 MemOpChains.push_back(Load.getValue(1));
6493 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6494
6495 ArgOffset += PtrByteSize;
6496 continue;
6497 }
6498 }
6499
6500 if (GPR_idx == NumGPRs && Size < 8) {
6501 SDValue AddPtr = PtrOff;
6502 if (!isLittleEndian) {
6503 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6504 PtrOff.getValueType());
6505 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6506 }
6507 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6508 CallSeqStart,
6509 Flags, DAG, dl);
6510 ArgOffset += PtrByteSize;
6511 continue;
6512 }
6513 // Copy the object to parameter save area if it can not be entirely passed
6514 // by registers.
6515 // FIXME: we only need to copy the parts which need to be passed in
6516 // parameter save area. For the parts passed by registers, we don't need
6517 // to copy them to the stack although we need to allocate space for them
6518 // in parameter save area.
6519 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6520 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6521 CallSeqStart,
6522 Flags, DAG, dl);
6523
6524 // When a register is available, pass a small aggregate right-justified.
6525 if (Size < 8 && GPR_idx != NumGPRs) {
6526 // The easiest way to get this right-justified in a register
6527 // is to copy the structure into the rightmost portion of a
6528 // local variable slot, then load the whole slot into the
6529 // register.
6530 // FIXME: The memcpy seems to produce pretty awful code for
6531 // small aggregates, particularly for packed ones.
6532 // FIXME: It would be preferable to use the slot in the
6533 // parameter save area instead of a new local variable.
6534 SDValue AddPtr = PtrOff;
6535 if (!isLittleEndian) {
6536 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6537 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6538 }
6539 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6540 CallSeqStart,
6541 Flags, DAG, dl);
6542
6543 // Load the slot into the register.
6544 SDValue Load =
6545 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6546 MemOpChains.push_back(Load.getValue(1));
6547 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6548
6549 // Done with this argument.
6550 ArgOffset += PtrByteSize;
6551 continue;
6552 }
6553
6554 // For aggregates larger than PtrByteSize, copy the pieces of the
6555 // object that fit into registers from the parameter save area.
6556 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6557 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6558 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6559 if (GPR_idx != NumGPRs) {
6560 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6561 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6562 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6563 MachinePointerInfo(), ObjType);
6564
6565 MemOpChains.push_back(Load.getValue(1));
6566 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6567 ArgOffset += PtrByteSize;
6568 } else {
6569 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6570 break;
6571 }
6572 }
6573 continue;
6574 }
6575
6576 switch (Arg.getSimpleValueType().SimpleTy) {
6577 default: llvm_unreachable("Unexpected ValueType for argument!");
6578 case MVT::i1:
6579 case MVT::i32:
6580 case MVT::i64:
6581 if (Flags.isNest()) {
6582 // The 'nest' parameter, if any, is passed in R11.
6583 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6584 break;
6585 }
6586
6587 // These can be scalar arguments or elements of an integer array type
6588 // passed directly. Clang may use those instead of "byval" aggregate
6589 // types to avoid forcing arguments to memory unnecessarily.
6590 if (GPR_idx != NumGPRs) {
6591 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6592 } else {
6593 if (IsFastCall)
6594 ComputePtrOff();
6595
6596 assert(HasParameterArea &&
6597 "Parameter area must exist to pass an argument in memory.");
6598 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6599 true, CFlags.IsTailCall, false, MemOpChains,
6600 TailCallArguments, dl);
6601 if (IsFastCall)
6602 ArgOffset += PtrByteSize;
6603 }
6604 if (!IsFastCall)
6605 ArgOffset += PtrByteSize;
6606 break;
6607 case MVT::f32:
6608 case MVT::f64: {
6609 // These can be scalar arguments or elements of a float array type
6610 // passed directly. The latter are used to implement ELFv2 homogenous
6611 // float aggregates.
6612
6613 // Named arguments go into FPRs first, and once they overflow, the
6614 // remaining arguments go into GPRs and then the parameter save area.
6615 // Unnamed arguments for vararg functions always go to GPRs and
6616 // then the parameter save area. For now, put all arguments to vararg
6617 // routines always in both locations (FPR *and* GPR or stack slot).
6618 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6619 bool NeededLoad = false;
6620
6621 // First load the argument into the next available FPR.
6622 if (FPR_idx != NumFPRs)
6623 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6624
6625 // Next, load the argument into GPR or stack slot if needed.
6626 if (!NeedGPROrStack)
6627 ;
6628 else if (GPR_idx != NumGPRs && !IsFastCall) {
6629 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6630 // once we support fp <-> gpr moves.
6631
6632 // In the non-vararg case, this can only ever happen in the
6633 // presence of f32 array types, since otherwise we never run
6634 // out of FPRs before running out of GPRs.
6635 SDValue ArgVal;
6636
6637 // Double values are always passed in a single GPR.
6638 if (Arg.getValueType() != MVT::f32) {
6639 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6640
6641 // Non-array float values are extended and passed in a GPR.
6642 } else if (!Flags.isInConsecutiveRegs()) {
6643 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6644 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6645
6646 // If we have an array of floats, we collect every odd element
6647 // together with its predecessor into one GPR.
6648 } else if (ArgOffset % PtrByteSize != 0) {
6649 SDValue Lo, Hi;
6650 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6651 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652 if (!isLittleEndian)
6653 std::swap(Lo, Hi);
6654 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6655
6656 // The final element, if even, goes into the first half of a GPR.
6657 } else if (Flags.isInConsecutiveRegsLast()) {
6658 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6659 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6660 if (!isLittleEndian)
6661 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6662 DAG.getConstant(32, dl, MVT::i32));
6663
6664 // Non-final even elements are skipped; they will be handled
6665 // together the with subsequent argument on the next go-around.
6666 } else
6667 ArgVal = SDValue();
6668
6669 if (ArgVal.getNode())
6670 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6671 } else {
6672 if (IsFastCall)
6673 ComputePtrOff();
6674
6675 // Single-precision floating-point values are mapped to the
6676 // second (rightmost) word of the stack doubleword.
6677 if (Arg.getValueType() == MVT::f32 &&
6678 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6679 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6680 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6681 }
6682
6683 assert(HasParameterArea &&
6684 "Parameter area must exist to pass an argument in memory.");
6685 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6686 true, CFlags.IsTailCall, false, MemOpChains,
6687 TailCallArguments, dl);
6688
6689 NeededLoad = true;
6690 }
6691 // When passing an array of floats, the array occupies consecutive
6692 // space in the argument area; only round up to the next doubleword
6693 // at the end of the array. Otherwise, each float takes 8 bytes.
6694 if (!IsFastCall || NeededLoad) {
6695 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6696 Flags.isInConsecutiveRegs()) ? 4 : 8;
6697 if (Flags.isInConsecutiveRegsLast())
6698 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6699 }
6700 break;
6701 }
6702 case MVT::v4f32:
6703 case MVT::v4i32:
6704 case MVT::v8i16:
6705 case MVT::v16i8:
6706 case MVT::v2f64:
6707 case MVT::v2i64:
6708 case MVT::v1i128:
6709 case MVT::f128:
6710 // These can be scalar arguments or elements of a vector array type
6711 // passed directly. The latter are used to implement ELFv2 homogenous
6712 // vector aggregates.
6713
6714 // For a varargs call, named arguments go into VRs or on the stack as
6715 // usual; unnamed arguments always go to the stack or the corresponding
6716 // GPRs when within range. For now, we always put the value in both
6717 // locations (or even all three).
6718 if (CFlags.IsVarArg) {
6719 assert(HasParameterArea &&
6720 "Parameter area must exist if we have a varargs call.");
6721 // We could elide this store in the case where the object fits
6722 // entirely in R registers. Maybe later.
6723 SDValue Store =
6724 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6725 MemOpChains.push_back(Store);
6726 if (VR_idx != NumVRs) {
6727 SDValue Load =
6728 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6729 MemOpChains.push_back(Load.getValue(1));
6730 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6731 }
6732 ArgOffset += 16;
6733 for (unsigned i=0; i<16; i+=PtrByteSize) {
6734 if (GPR_idx == NumGPRs)
6735 break;
6736 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6737 DAG.getConstant(i, dl, PtrVT));
6738 SDValue Load =
6739 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6740 MemOpChains.push_back(Load.getValue(1));
6741 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6742 }
6743 break;
6744 }
6745
6746 // Non-varargs Altivec params go into VRs or on the stack.
6747 if (VR_idx != NumVRs) {
6748 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6749 } else {
6750 if (IsFastCall)
6751 ComputePtrOff();
6752
6753 assert(HasParameterArea &&
6754 "Parameter area must exist to pass an argument in memory.");
6755 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6756 true, CFlags.IsTailCall, true, MemOpChains,
6757 TailCallArguments, dl);
6758 if (IsFastCall)
6759 ArgOffset += 16;
6760 }
6761
6762 if (!IsFastCall)
6763 ArgOffset += 16;
6764 break;
6765 }
6766 }
6767
6768 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6769 "mismatch in size of parameter area");
6770 (void)NumBytesActuallyUsed;
6771
6772 if (!MemOpChains.empty())
6773 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6774
6775 // Check if this is an indirect call (MTCTR/BCTRL).
6776 // See prepareDescriptorIndirectCall and buildCallOperands for more
6777 // information about calls through function pointers in the 64-bit SVR4 ABI.
6778 if (CFlags.IsIndirect) {
6779 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6780 // caller in the TOC save area.
6781 if (isTOCSaveRestoreRequired(Subtarget)) {
6782 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6783 // Load r2 into a virtual register and store it to the TOC save area.
6784 setUsesTOCBasePtr(DAG);
6785 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6786 // TOC save area offset.
6787 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6788 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6789 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6790 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6792 DAG.getMachineFunction(), TOCSaveOffset));
6793 }
6794 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6795 // This does not mean the MTCTR instruction must use R12; it's easier
6796 // to model this as an extra parameter, so do that.
6797 if (isELFv2ABI && !CFlags.IsPatchPoint)
6798 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6799 }
6800
6801 // Build a sequence of copy-to-reg nodes chained together with token chain
6802 // and flag operands which copy the outgoing args into the appropriate regs.
6803 SDValue InGlue;
6804 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6805 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6806 RegsToPass[i].second, InGlue);
6807 InGlue = Chain.getValue(1);
6808 }
6809
6810 if (CFlags.IsTailCall && !IsSibCall)
6811 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6812 TailCallArguments);
6813
6814 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6815 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6816}
6817
6818// Returns true when the shadow of a general purpose argument register
6819// in the parameter save area is aligned to at least 'RequiredAlign'.
6820static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6821 assert(RequiredAlign.value() <= 16 &&
6822 "Required alignment greater than stack alignment.");
6823 switch (Reg) {
6824 default:
6825 report_fatal_error("called on invalid register.");
6826 case PPC::R5:
6827 case PPC::R9:
6828 case PPC::X3:
6829 case PPC::X5:
6830 case PPC::X7:
6831 case PPC::X9:
6832 // These registers are 16 byte aligned which is the most strict aligment
6833 // we can support.
6834 return true;
6835 case PPC::R3:
6836 case PPC::R7:
6837 case PPC::X4:
6838 case PPC::X6:
6839 case PPC::X8:
6840 case PPC::X10:
6841 // The shadow of these registers in the PSA is 8 byte aligned.
6842 return RequiredAlign <= 8;
6843 case PPC::R4:
6844 case PPC::R6:
6845 case PPC::R8:
6846 case PPC::R10:
6847 return RequiredAlign <= 4;
6848 }
6849}
6850
6851static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6852 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6853 CCState &S) {
6854 AIXCCState &State = static_cast<AIXCCState &>(S);
6855 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6857 const bool IsPPC64 = Subtarget.isPPC64();
6858 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6859 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6860
6861 if (ValVT == MVT::f128)
6862 report_fatal_error("f128 is unimplemented on AIX.");
6863
6864 if (ArgFlags.isNest())
6865 report_fatal_error("Nest arguments are unimplemented.");
6866
6867 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6868 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6869 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6870 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6871 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6872 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6873
6874 static const MCPhysReg VR[] = {// Vector registers.
6875 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6876 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6877 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6878
6879 if (ArgFlags.isByVal()) {
6880 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6881 report_fatal_error("Pass-by-value arguments with alignment greater than "
6882 "register width are not supported.");
6883
6884 const unsigned ByValSize = ArgFlags.getByValSize();
6885
6886 // An empty aggregate parameter takes up no storage and no registers,
6887 // but needs a MemLoc for a stack slot for the formal arguments side.
6888 if (ByValSize == 0) {
6890 State.getStackSize(), RegVT, LocInfo));
6891 return false;
6892 }
6893
6894 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6895 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6896 for (const unsigned E = Offset + StackSize; Offset < E;
6897 Offset += PtrAlign.value()) {
6898 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6899 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6900 else {
6903 LocInfo));
6904 break;
6905 }
6906 }
6907 return false;
6908 }
6909
6910 // Arguments always reserve parameter save area.
6911 switch (ValVT.SimpleTy) {
6912 default:
6913 report_fatal_error("Unhandled value type for argument.");
6914 case MVT::i64:
6915 // i64 arguments should have been split to i32 for PPC32.
6916 assert(IsPPC64 && "PPC32 should have split i64 values.");
6917 [[fallthrough]];
6918 case MVT::i1:
6919 case MVT::i32: {
6920 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6921 // AIX integer arguments are always passed in register width.
6922 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6923 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6925 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6926 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6927 else
6928 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6929
6930 return false;
6931 }
6932 case MVT::f32:
6933 case MVT::f64: {
6934 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6935 const unsigned StoreSize = LocVT.getStoreSize();
6936 // Floats are always 4-byte aligned in the PSA on AIX.
6937 // This includes f64 in 64-bit mode for ABI compatibility.
6938 const unsigned Offset =
6939 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6940 unsigned FReg = State.AllocateReg(FPR);
6941 if (FReg)
6942 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6943
6944 // Reserve and initialize GPRs or initialize the PSA as required.
6945 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6946 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6947 assert(FReg && "An FPR should be available when a GPR is reserved.");
6948 if (State.isVarArg()) {
6949 // Successfully reserved GPRs are only initialized for vararg calls.
6950 // Custom handling is required for:
6951 // f64 in PPC32 needs to be split into 2 GPRs.
6952 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6953 State.addLoc(
6954 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6955 }
6956 } else {
6957 // If there are insufficient GPRs, the PSA needs to be initialized.
6958 // Initialization occurs even if an FPR was initialized for
6959 // compatibility with the AIX XL compiler. The full memory for the
6960 // argument will be initialized even if a prior word is saved in GPR.
6961 // A custom memLoc is used when the argument also passes in FPR so
6962 // that the callee handling can skip over it easily.
6963 State.addLoc(
6964 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6965 LocInfo)
6966 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6967 break;
6968 }
6969 }
6970
6971 return false;
6972 }
6973 case MVT::v4f32:
6974 case MVT::v4i32:
6975 case MVT::v8i16:
6976 case MVT::v16i8:
6977 case MVT::v2i64:
6978 case MVT::v2f64:
6979 case MVT::v1i128: {
6980 const unsigned VecSize = 16;
6981 const Align VecAlign(VecSize);
6982
6983 if (!State.isVarArg()) {
6984 // If there are vector registers remaining we don't consume any stack
6985 // space.
6986 if (unsigned VReg = State.AllocateReg(VR)) {
6987 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6988 return false;
6989 }
6990 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6991 // might be allocated in the portion of the PSA that is shadowed by the
6992 // GPRs.
6993 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6994 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6995 return false;
6996 }
6997
6998 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6999 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
7000
7001 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7002 // Burn any underaligned registers and their shadowed stack space until
7003 // we reach the required alignment.
7004 while (NextRegIndex != GPRs.size() &&
7005 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7006 // Shadow allocate register and its stack shadow.
7007 unsigned Reg = State.AllocateReg(GPRs);
7008 State.AllocateStack(PtrSize, PtrAlign);
7009 assert(Reg && "Allocating register unexpectedly failed.");
7010 (void)Reg;
7011 NextRegIndex = State.getFirstUnallocated(GPRs);
7012 }
7013
7014 // Vectors that are passed as fixed arguments are handled differently.
7015 // They are passed in VRs if any are available (unlike arguments passed
7016 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7017 // functions)
7018 if (State.isFixed(ValNo)) {
7019 if (unsigned VReg = State.AllocateReg(VR)) {
7020 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7021 // Shadow allocate GPRs and stack space even though we pass in a VR.
7022 for (unsigned I = 0; I != VecSize; I += PtrSize)
7023 State.AllocateReg(GPRs);
7024 State.AllocateStack(VecSize, VecAlign);
7025 return false;
7026 }
7027 // No vector registers remain so pass on the stack.
7028 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7029 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7030 return false;
7031 }
7032
7033 // If all GPRS are consumed then we pass the argument fully on the stack.
7034 if (NextRegIndex == GPRs.size()) {
7035 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7036 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7037 return false;
7038 }
7039
7040 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7041 // half of the argument, and then need to pass the remaining half on the
7042 // stack.
7043 if (GPRs[NextRegIndex] == PPC::R9) {
7044 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7045 State.addLoc(
7046 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7047
7048 const unsigned FirstReg = State.AllocateReg(PPC::R9);
7049 const unsigned SecondReg = State.AllocateReg(PPC::R10);
7050 assert(FirstReg && SecondReg &&
7051 "Allocating R9 or R10 unexpectedly failed.");
7052 State.addLoc(
7053 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7054 State.addLoc(
7055 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7056 return false;
7057 }
7058
7059 // We have enough GPRs to fully pass the vector argument, and we have
7060 // already consumed any underaligned registers. Start with the custom
7061 // MemLoc and then the custom RegLocs.
7062 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7063 State.addLoc(
7064 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7065 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7066 const unsigned Reg = State.AllocateReg(GPRs);
7067 assert(Reg && "Failed to allocated register for vararg vector argument");
7068 State.addLoc(
7069 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7070 }
7071 return false;
7072 }
7073 }
7074 return true;
7075}
7076
7077// So far, this function is only used by LowerFormalArguments_AIX()
7079 bool IsPPC64,
7080 bool HasP8Vector,
7081 bool HasVSX) {
7082 assert((IsPPC64 || SVT != MVT::i64) &&
7083 "i64 should have been split for 32-bit codegen.");
7084
7085 switch (SVT) {
7086 default:
7087 report_fatal_error("Unexpected value type for formal argument");
7088 case MVT::i1:
7089 case MVT::i32:
7090 case MVT::i64:
7091 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7092 case MVT::f32:
7093 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7094 case MVT::f64:
7095 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7096 case MVT::v4f32:
7097 case MVT::v4i32:
7098 case MVT::v8i16:
7099 case MVT::v16i8:
7100 case MVT::v2i64:
7101 case MVT::v2f64:
7102 case MVT::v1i128:
7103 return &PPC::VRRCRegClass;
7104 }
7105}
7106
7108 SelectionDAG &DAG, SDValue ArgValue,
7109 MVT LocVT, const SDLoc &dl) {
7110 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7111 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7112
7113 if (Flags.isSExt())
7114 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7115 DAG.getValueType(ValVT));
7116 else if (Flags.isZExt())
7117 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7118 DAG.getValueType(ValVT));
7119
7120 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7121}
7122
7123static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7124 const unsigned LASize = FL->getLinkageSize();
7125
7126 if (PPC::GPRCRegClass.contains(Reg)) {
7127 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7128 "Reg must be a valid argument register!");
7129 return LASize + 4 * (Reg - PPC::R3);
7130 }
7131
7132 if (PPC::G8RCRegClass.contains(Reg)) {
7133 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7134 "Reg must be a valid argument register!");
7135 return LASize + 8 * (Reg - PPC::X3);
7136 }
7137
7138 llvm_unreachable("Only general purpose registers expected.");
7139}
7140
7141// AIX ABI Stack Frame Layout:
7142//
7143// Low Memory +--------------------------------------------+
7144// SP +---> | Back chain | ---+
7145// | +--------------------------------------------+ |
7146// | | Saved Condition Register | |
7147// | +--------------------------------------------+ |
7148// | | Saved Linkage Register | |
7149// | +--------------------------------------------+ | Linkage Area
7150// | | Reserved for compilers | |
7151// | +--------------------------------------------+ |
7152// | | Reserved for binders | |
7153// | +--------------------------------------------+ |
7154// | | Saved TOC pointer | ---+
7155// | +--------------------------------------------+
7156// | | Parameter save area |
7157// | +--------------------------------------------+
7158// | | Alloca space |
7159// | +--------------------------------------------+
7160// | | Local variable space |
7161// | +--------------------------------------------+
7162// | | Float/int conversion temporary |
7163// | +--------------------------------------------+
7164// | | Save area for AltiVec registers |
7165// | +--------------------------------------------+
7166// | | AltiVec alignment padding |
7167// | +--------------------------------------------+
7168// | | Save area for VRSAVE register |
7169// | +--------------------------------------------+
7170// | | Save area for General Purpose registers |
7171// | +--------------------------------------------+
7172// | | Save area for Floating Point registers |
7173// | +--------------------------------------------+
7174// +---- | Back chain |
7175// High Memory +--------------------------------------------+
7176//
7177// Specifications:
7178// AIX 7.2 Assembler Language Reference
7179// Subroutine linkage convention
7180
7181SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7182 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7183 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7184 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7185
7186 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7187 CallConv == CallingConv::Fast) &&
7188 "Unexpected calling convention!");
7189
7190 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7191 report_fatal_error("Tail call support is unimplemented on AIX.");
7192
7193 if (useSoftFloat())
7194 report_fatal_error("Soft float support is unimplemented on AIX.");
7195
7196 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7197
7198 const bool IsPPC64 = Subtarget.isPPC64();
7199 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7200
7201 // Assign locations to all of the incoming arguments.
7204 MachineFrameInfo &MFI = MF.getFrameInfo();
7205 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7206 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7207
7208 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7209 // Reserve space for the linkage area on the stack.
7210 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7211 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7212 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7213
7215
7216 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7217 CCValAssign &VA = ArgLocs[I++];
7218 MVT LocVT = VA.getLocVT();
7219 MVT ValVT = VA.getValVT();
7220 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7221 // For compatibility with the AIX XL compiler, the float args in the
7222 // parameter save area are initialized even if the argument is available
7223 // in register. The caller is required to initialize both the register
7224 // and memory, however, the callee can choose to expect it in either.
7225 // The memloc is dismissed here because the argument is retrieved from
7226 // the register.
7227 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7228 continue;
7229
7230 auto HandleMemLoc = [&]() {
7231 const unsigned LocSize = LocVT.getStoreSize();
7232 const unsigned ValSize = ValVT.getStoreSize();
7233 assert((ValSize <= LocSize) &&
7234 "Object size is larger than size of MemLoc");
7235 int CurArgOffset = VA.getLocMemOffset();
7236 // Objects are right-justified because AIX is big-endian.
7237 if (LocSize > ValSize)
7238 CurArgOffset += LocSize - ValSize;
7239 // Potential tail calls could cause overwriting of argument stack slots.
7240 const bool IsImmutable =
7242 (CallConv == CallingConv::Fast));
7243 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7244 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7245 SDValue ArgValue =
7246 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7247 InVals.push_back(ArgValue);
7248 };
7249
7250 // Vector arguments to VaArg functions are passed both on the stack, and
7251 // in any available GPRs. Load the value from the stack and add the GPRs
7252 // as live ins.
7253 if (VA.isMemLoc() && VA.needsCustom()) {
7254 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7255 assert(isVarArg && "Only use custom memloc for vararg.");
7256 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7257 // matching custom RegLocs.
7258 const unsigned OriginalValNo = VA.getValNo();
7259 (void)OriginalValNo;
7260
7261 auto HandleCustomVecRegLoc = [&]() {
7262 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7263 "Missing custom RegLoc.");
7264 VA = ArgLocs[I++];
7265 assert(VA.getValVT().isVector() &&
7266 "Unexpected Val type for custom RegLoc.");
7267 assert(VA.getValNo() == OriginalValNo &&
7268 "ValNo mismatch between custom MemLoc and RegLoc.");
7270 MF.addLiveIn(VA.getLocReg(),
7271 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7272 Subtarget.hasVSX()));
7273 };
7274
7275 HandleMemLoc();
7276 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7277 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7278 // R10.
7279 HandleCustomVecRegLoc();
7280 HandleCustomVecRegLoc();
7281
7282 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7283 // we passed the vector in R5, R6, R7 and R8.
7284 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7285 assert(!IsPPC64 &&
7286 "Only 2 custom RegLocs expected for 64-bit codegen.");
7287 HandleCustomVecRegLoc();
7288 HandleCustomVecRegLoc();
7289 }
7290
7291 continue;
7292 }
7293
7294 if (VA.isRegLoc()) {
7295 if (VA.getValVT().isScalarInteger())
7297 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7298 switch (VA.getValVT().SimpleTy) {
7299 default:
7300 report_fatal_error("Unhandled value type for argument.");
7301 case MVT::f32:
7303 break;
7304 case MVT::f64:
7306 break;
7307 }
7308 } else if (VA.getValVT().isVector()) {
7309 switch (VA.getValVT().SimpleTy) {
7310 default:
7311 report_fatal_error("Unhandled value type for argument.");
7312 case MVT::v16i8:
7314 break;
7315 case MVT::v8i16:
7317 break;
7318 case MVT::v4i32:
7319 case MVT::v2i64:
7320 case MVT::v1i128:
7322 break;
7323 case MVT::v4f32:
7324 case MVT::v2f64:
7326 break;
7327 }
7328 }
7329 }
7330
7331 if (Flags.isByVal() && VA.isMemLoc()) {
7332 const unsigned Size =
7333 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7334 PtrByteSize);
7335 const int FI = MF.getFrameInfo().CreateFixedObject(
7336 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7337 /* IsAliased */ true);
7338 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7339 InVals.push_back(FIN);
7340
7341 continue;
7342 }
7343
7344 if (Flags.isByVal()) {
7345 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7346
7347 const MCPhysReg ArgReg = VA.getLocReg();
7348 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7349
7350 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7351 report_fatal_error("Over aligned byvals not supported yet.");
7352
7353 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7354 const int FI = MF.getFrameInfo().CreateFixedObject(
7355 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7356 /* IsAliased */ true);
7357 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7358 InVals.push_back(FIN);
7359
7360 // Add live ins for all the RegLocs for the same ByVal.
7361 const TargetRegisterClass *RegClass =
7362 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7363
7364 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7365 unsigned Offset) {
7366 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7367 // Since the callers side has left justified the aggregate in the
7368 // register, we can simply store the entire register into the stack
7369 // slot.
7370 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7371 // The store to the fixedstack object is needed becuase accessing a
7372 // field of the ByVal will use a gep and load. Ideally we will optimize
7373 // to extracting the value from the register directly, and elide the
7374 // stores when the arguments address is not taken, but that will need to
7375 // be future work.
7376 SDValue Store = DAG.getStore(
7377 CopyFrom.getValue(1), dl, CopyFrom,
7380
7381 MemOps.push_back(Store);
7382 };
7383
7384 unsigned Offset = 0;
7385 HandleRegLoc(VA.getLocReg(), Offset);
7386 Offset += PtrByteSize;
7387 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7388 Offset += PtrByteSize) {
7389 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7390 "RegLocs should be for ByVal argument.");
7391
7392 const CCValAssign RL = ArgLocs[I++];
7393 HandleRegLoc(RL.getLocReg(), Offset);
7395 }
7396
7397 if (Offset != StackSize) {
7398 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7399 "Expected MemLoc for remaining bytes.");
7400 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7401 // Consume the MemLoc.The InVal has already been emitted, so nothing
7402 // more needs to be done.
7403 ++I;
7404 }
7405
7406 continue;
7407 }
7408
7409 if (VA.isRegLoc() && !VA.needsCustom()) {
7410 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7411 Register VReg =
7412 MF.addLiveIn(VA.getLocReg(),
7413 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7414 Subtarget.hasVSX()));
7415 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7416 if (ValVT.isScalarInteger() &&
7417 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7418 ArgValue =
7419 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7420 }
7421 InVals.push_back(ArgValue);
7422 continue;
7423 }
7424 if (VA.isMemLoc()) {
7425 HandleMemLoc();
7426 continue;
7427 }
7428 }
7429
7430 // On AIX a minimum of 8 words is saved to the parameter save area.
7431 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7432 // Area that is at least reserved in the caller of this function.
7433 unsigned CallerReservedArea = std::max<unsigned>(
7434 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7435
7436 // Set the size that is at least reserved in caller of this function. Tail
7437 // call optimized function's reserved stack space needs to be aligned so
7438 // that taking the difference between two stack areas will result in an
7439 // aligned stack.
7440 CallerReservedArea =
7441 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7442 FuncInfo->setMinReservedArea(CallerReservedArea);
7443
7444 if (isVarArg) {
7445 FuncInfo->setVarArgsFrameIndex(
7446 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7447 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7448
7449 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7450 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7451
7452 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7453 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7454 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7455
7456 // The fixed integer arguments of a variadic function are stored to the
7457 // VarArgsFrameIndex on the stack so that they may be loaded by
7458 // dereferencing the result of va_next.
7459 for (unsigned GPRIndex =
7460 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7461 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7462
7463 const Register VReg =
7464 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7465 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7466
7467 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7468 SDValue Store =
7469 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7470 MemOps.push_back(Store);
7471 // Increment the address for the next argument to store.
7472 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7473 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7474 }
7475 }
7476
7477 if (!MemOps.empty())
7478 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7479
7480 return Chain;
7481}
7482
7483SDValue PPCTargetLowering::LowerCall_AIX(
7484 SDValue Chain, SDValue Callee, CallFlags CFlags,
7486 const SmallVectorImpl<SDValue> &OutVals,
7487 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7489 const CallBase *CB) const {
7490 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7491 // AIX ABI stack frame layout.
7492
7493 assert((CFlags.CallConv == CallingConv::C ||
7494 CFlags.CallConv == CallingConv::Cold ||
7495 CFlags.CallConv == CallingConv::Fast) &&
7496 "Unexpected calling convention!");
7497
7498 if (CFlags.IsPatchPoint)
7499 report_fatal_error("This call type is unimplemented on AIX.");
7500
7501 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7502
7505 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7506 *DAG.getContext());
7507
7508 // Reserve space for the linkage save area (LSA) on the stack.
7509 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7510 // [SP][CR][LR][2 x reserved][TOC].
7511 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7512 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7513 const bool IsPPC64 = Subtarget.isPPC64();
7514 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7515 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7516 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7517 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7518
7519 // The prolog code of the callee may store up to 8 GPR argument registers to
7520 // the stack, allowing va_start to index over them in memory if the callee
7521 // is variadic.
7522 // Because we cannot tell if this is needed on the caller side, we have to
7523 // conservatively assume that it is needed. As such, make sure we have at
7524 // least enough stack space for the caller to store the 8 GPRs.
7525 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7526 const unsigned NumBytes = std::max<unsigned>(
7527 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7528
7529 // Adjust the stack pointer for the new arguments...
7530 // These operations are automatically eliminated by the prolog/epilog pass.
7531 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7532 SDValue CallSeqStart = Chain;
7533
7535 SmallVector<SDValue, 8> MemOpChains;
7536
7537 // Set up a copy of the stack pointer for loading and storing any
7538 // arguments that may not fit in the registers available for argument
7539 // passing.
7540 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7541 : DAG.getRegister(PPC::R1, MVT::i32);
7542
7543 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7544 const unsigned ValNo = ArgLocs[I].getValNo();
7545 SDValue Arg = OutVals[ValNo];
7546 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7547
7548 if (Flags.isByVal()) {
7549 const unsigned ByValSize = Flags.getByValSize();
7550
7551 // Nothing to do for zero-sized ByVals on the caller side.
7552 if (!ByValSize) {
7553 ++I;
7554 continue;
7555 }
7556
7557 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7558 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7559 (LoadOffset != 0)
7560 ? DAG.getObjectPtrOffset(
7561 dl, Arg, TypeSize::getFixed(LoadOffset))
7562 : Arg,
7563 MachinePointerInfo(), VT);
7564 };
7565
7566 unsigned LoadOffset = 0;
7567
7568 // Initialize registers, which are fully occupied by the by-val argument.
7569 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7570 SDValue Load = GetLoad(PtrVT, LoadOffset);
7571 MemOpChains.push_back(Load.getValue(1));
7572 LoadOffset += PtrByteSize;
7573 const CCValAssign &ByValVA = ArgLocs[I++];
7574 assert(ByValVA.getValNo() == ValNo &&
7575 "Unexpected location for pass-by-value argument.");
7576 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7577 }
7578
7579 if (LoadOffset == ByValSize)
7580 continue;
7581
7582 // There must be one more loc to handle the remainder.
7583 assert(ArgLocs[I].getValNo() == ValNo &&
7584 "Expected additional location for by-value argument.");
7585
7586 if (ArgLocs[I].isMemLoc()) {
7587 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7588 const CCValAssign &ByValVA = ArgLocs[I++];
7589 ISD::ArgFlagsTy MemcpyFlags = Flags;
7590 // Only memcpy the bytes that don't pass in register.
7591 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7592 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7593 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7594 dl, Arg, TypeSize::getFixed(LoadOffset))
7595 : Arg,
7597 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7598 CallSeqStart, MemcpyFlags, DAG, dl);
7599 continue;
7600 }
7601
7602 // Initialize the final register residue.
7603 // Any residue that occupies the final by-val arg register must be
7604 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7605 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7606 // 2 and 1 byte loads.
7607 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7608 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7609 "Unexpected register residue for by-value argument.");
7610 SDValue ResidueVal;
7611 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7612 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7613 const MVT VT =
7614 N == 1 ? MVT::i8
7615 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7616 SDValue Load = GetLoad(VT, LoadOffset);
7617 MemOpChains.push_back(Load.getValue(1));
7618 LoadOffset += N;
7619 Bytes += N;
7620
7621 // By-val arguments are passed left-justfied in register.
7622 // Every load here needs to be shifted, otherwise a full register load
7623 // should have been used.
7624 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7625 "Unexpected load emitted during handling of pass-by-value "
7626 "argument.");
7627 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7628 EVT ShiftAmountTy =
7629 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7630 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7631 SDValue ShiftedLoad =
7632 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7633 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7634 ShiftedLoad)
7635 : ShiftedLoad;
7636 }
7637
7638 const CCValAssign &ByValVA = ArgLocs[I++];
7639 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7640 continue;
7641 }
7642
7643 CCValAssign &VA = ArgLocs[I++];
7644 const MVT LocVT = VA.getLocVT();
7645 const MVT ValVT = VA.getValVT();
7646
7647 switch (VA.getLocInfo()) {
7648 default:
7649 report_fatal_error("Unexpected argument extension type.");
7650 case CCValAssign::Full:
7651 break;
7652 case CCValAssign::ZExt:
7653 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7654 break;
7655 case CCValAssign::SExt:
7656 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7657 break;
7658 }
7659
7660 if (VA.isRegLoc() && !VA.needsCustom()) {
7661 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7662 continue;
7663 }
7664
7665 // Vector arguments passed to VarArg functions need custom handling when
7666 // they are passed (at least partially) in GPRs.
7667 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7668 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7669 // Store value to its stack slot.
7670 SDValue PtrOff =
7671 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7672 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7673 SDValue Store =
7674 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7675 MemOpChains.push_back(Store);
7676 const unsigned OriginalValNo = VA.getValNo();
7677 // Then load the GPRs from the stack
7678 unsigned LoadOffset = 0;
7679 auto HandleCustomVecRegLoc = [&]() {
7680 assert(I != E && "Unexpected end of CCvalAssigns.");
7681 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7682 "Expected custom RegLoc.");
7683 CCValAssign RegVA = ArgLocs[I++];
7684 assert(RegVA.getValNo() == OriginalValNo &&
7685 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7686 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7687 DAG.getConstant(LoadOffset, dl, PtrVT));
7688 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7689 MemOpChains.push_back(Load.getValue(1));
7690 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7691 LoadOffset += PtrByteSize;
7692 };
7693
7694 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7695 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7696 // R10.
7697 HandleCustomVecRegLoc();
7698 HandleCustomVecRegLoc();
7699
7700 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7701 ArgLocs[I].getValNo() == OriginalValNo) {
7702 assert(!IsPPC64 &&
7703 "Only 2 custom RegLocs expected for 64-bit codegen.");
7704 HandleCustomVecRegLoc();
7705 HandleCustomVecRegLoc();
7706 }
7707
7708 continue;
7709 }
7710
7711 if (VA.isMemLoc()) {
7712 SDValue PtrOff =
7713 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7714 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7715 MemOpChains.push_back(
7716 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7717
7718 continue;
7719 }
7720
7721 if (!ValVT.isFloatingPoint())
7723 "Unexpected register handling for calling convention.");
7724
7725 // Custom handling is used for GPR initializations for vararg float
7726 // arguments.
7727 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7728 LocVT.isInteger() &&
7729 "Custom register handling only expected for VarArg.");
7730
7731 SDValue ArgAsInt =
7732 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7733
7734 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7735 // f32 in 32-bit GPR
7736 // f64 in 64-bit GPR
7737 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7738 else if (Arg.getValueType().getFixedSizeInBits() <
7739 LocVT.getFixedSizeInBits())
7740 // f32 in 64-bit GPR.
7741 RegsToPass.push_back(std::make_pair(
7742 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7743 else {
7744 // f64 in two 32-bit GPRs
7745 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7746 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7747 "Unexpected custom register for argument!");
7748 CCValAssign &GPR1 = VA;
7749 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7750 DAG.getConstant(32, dl, MVT::i8));
7751 RegsToPass.push_back(std::make_pair(
7752 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7753
7754 if (I != E) {
7755 // If only 1 GPR was available, there will only be one custom GPR and
7756 // the argument will also pass in memory.
7757 CCValAssign &PeekArg = ArgLocs[I];
7758 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7759 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7760 CCValAssign &GPR2 = ArgLocs[I++];
7761 RegsToPass.push_back(std::make_pair(
7762 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7763 }
7764 }
7765 }
7766 }
7767
7768 if (!MemOpChains.empty())
7769 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7770
7771 // For indirect calls, we need to save the TOC base to the stack for
7772 // restoration after the call.
7773 if (CFlags.IsIndirect) {
7774 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7775 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7776 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7777 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7778 const unsigned TOCSaveOffset =
7779 Subtarget.getFrameLowering()->getTOCSaveOffset();
7780
7781 setUsesTOCBasePtr(DAG);
7782 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7783 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7784 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7785 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7786 Chain = DAG.getStore(
7787 Val.getValue(1), dl, Val, AddPtr,
7788 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7789 }
7790
7791 // Build a sequence of copy-to-reg nodes chained together with token chain
7792 // and flag operands which copy the outgoing args into the appropriate regs.
7793 SDValue InGlue;
7794 for (auto Reg : RegsToPass) {
7795 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7796 InGlue = Chain.getValue(1);
7797 }
7798
7799 const int SPDiff = 0;
7800 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7801 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7802}
7803
7804bool
7805PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7806 MachineFunction &MF, bool isVarArg,
7808 LLVMContext &Context) const {
7810 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7811 return CCInfo.CheckReturn(
7812 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7814 : RetCC_PPC);
7815}
7816
7817SDValue
7818PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7819 bool isVarArg,
7821 const SmallVectorImpl<SDValue> &OutVals,
7822 const SDLoc &dl, SelectionDAG &DAG) const {
7824 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7825 *DAG.getContext());
7826 CCInfo.AnalyzeReturn(Outs,
7827 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7829 : RetCC_PPC);
7830
7831 SDValue Glue;
7832 SmallVector<SDValue, 4> RetOps(1, Chain);
7833
7834 // Copy the result values into the output registers.
7835 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7836 CCValAssign &VA = RVLocs[i];
7837 assert(VA.isRegLoc() && "Can only return in registers!");
7838
7839 SDValue Arg = OutVals[RealResIdx];
7840
7841 switch (VA.getLocInfo()) {
7842 default: llvm_unreachable("Unknown loc info!");
7843 case CCValAssign::Full: break;
7844 case CCValAssign::AExt:
7845 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7846 break;
7847 case CCValAssign::ZExt:
7848 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7849 break;
7850 case CCValAssign::SExt:
7851 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7852 break;
7853 }
7854 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7855 bool isLittleEndian = Subtarget.isLittleEndian();
7856 // Legalize ret f64 -> ret 2 x i32.
7857 SDValue SVal =
7858 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7859 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7860 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7861 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7862 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7863 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7864 Glue = Chain.getValue(1);
7865 VA = RVLocs[++i]; // skip ahead to next loc
7866 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7867 } else
7868 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7869 Glue = Chain.getValue(1);
7870 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7871 }
7872
7873 RetOps[0] = Chain; // Update chain.
7874
7875 // Add the glue if we have it.
7876 if (Glue.getNode())
7877 RetOps.push_back(Glue);
7878
7879 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7880}
7881
7882SDValue
7883PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7884 SelectionDAG &DAG) const {
7885 SDLoc dl(Op);
7886
7887 // Get the correct type for integers.
7888 EVT IntVT = Op.getValueType();
7889
7890 // Get the inputs.
7891 SDValue Chain = Op.getOperand(0);
7892 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7893 // Build a DYNAREAOFFSET node.
7894 SDValue Ops[2] = {Chain, FPSIdx};
7895 SDVTList VTs = DAG.getVTList(IntVT);
7896 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7897}
7898
7899SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7900 SelectionDAG &DAG) const {
7901 // When we pop the dynamic allocation we need to restore the SP link.
7902 SDLoc dl(Op);
7903
7904 // Get the correct type for pointers.
7905 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7906
7907 // Construct the stack pointer operand.
7908 bool isPPC64 = Subtarget.isPPC64();
7909 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7910 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7911
7912 // Get the operands for the STACKRESTORE.
7913 SDValue Chain = Op.getOperand(0);
7914 SDValue SaveSP = Op.getOperand(1);
7915
7916 // Load the old link SP.
7917 SDValue LoadLinkSP =
7918 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7919
7920 // Restore the stack pointer.
7921 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7922
7923 // Store the old link SP.
7924 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7925}
7926
7927SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7929 bool isPPC64 = Subtarget.isPPC64();
7930 EVT PtrVT = getPointerTy(MF.getDataLayout());
7931
7932 // Get current frame pointer save index. The users of this index will be
7933 // primarily DYNALLOC instructions.
7935 int RASI = FI->getReturnAddrSaveIndex();
7936
7937 // If the frame pointer save index hasn't been defined yet.
7938 if (!RASI) {
7939 // Find out what the fix offset of the frame pointer save area.
7940 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7941 // Allocate the frame index for frame pointer save area.
7942 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7943 // Save the result.
7944 FI->setReturnAddrSaveIndex(RASI);
7945 }
7946 return DAG.getFrameIndex(RASI, PtrVT);
7947}
7948
7949SDValue
7950PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7952 bool isPPC64 = Subtarget.isPPC64();
7953 EVT PtrVT = getPointerTy(MF.getDataLayout());
7954
7955 // Get current frame pointer save index. The users of this index will be
7956 // primarily DYNALLOC instructions.
7958 int FPSI = FI->getFramePointerSaveIndex();
7959
7960 // If the frame pointer save index hasn't been defined yet.
7961 if (!FPSI) {
7962 // Find out what the fix offset of the frame pointer save area.
7963 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7964 // Allocate the frame index for frame pointer save area.
7965 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7966 // Save the result.
7967 FI->setFramePointerSaveIndex(FPSI);
7968 }
7969 return DAG.getFrameIndex(FPSI, PtrVT);
7970}
7971
7972SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7973 SelectionDAG &DAG) const {
7975 // Get the inputs.
7976 SDValue Chain = Op.getOperand(0);
7977 SDValue Size = Op.getOperand(1);
7978 SDLoc dl(Op);
7979
7980 // Get the correct type for pointers.
7981 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7982 // Negate the size.
7983 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7984 DAG.getConstant(0, dl, PtrVT), Size);
7985 // Construct a node for the frame pointer save index.
7986 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7987 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7988 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7989 if (hasInlineStackProbe(MF))
7990 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7991 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7992}
7993
7994SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7995 SelectionDAG &DAG) const {
7997
7998 bool isPPC64 = Subtarget.isPPC64();
7999 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8000
8001 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8002 return DAG.getFrameIndex(FI, PtrVT);
8003}
8004
8005SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8006 SelectionDAG &DAG) const {
8007 SDLoc DL(Op);
8008 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8009 DAG.getVTList(MVT::i32, MVT::Other),
8010 Op.getOperand(0), Op.getOperand(1));
8011}
8012
8013SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 SDLoc DL(Op);
8016 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8017 Op.getOperand(0), Op.getOperand(1));
8018}
8019
8020SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8021 if (Op.getValueType().isVector())
8022 return LowerVectorLoad(Op, DAG);
8023
8024 assert(Op.getValueType() == MVT::i1 &&
8025 "Custom lowering only for i1 loads");
8026
8027 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8028
8029 SDLoc dl(Op);
8030 LoadSDNode *LD = cast<LoadSDNode>(Op);
8031
8032 SDValue Chain = LD->getChain();
8033 SDValue BasePtr = LD->getBasePtr();
8034 MachineMemOperand *MMO = LD->getMemOperand();
8035
8036 SDValue NewLD =
8037 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8038 BasePtr, MVT::i8, MMO);
8039 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8040
8041 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8042 return DAG.getMergeValues(Ops, dl);
8043}
8044
8045SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8046 if (Op.getOperand(1).getValueType().isVector())
8047 return LowerVectorStore(Op, DAG);
8048
8049 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8050 "Custom lowering only for i1 stores");
8051
8052 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8053
8054 SDLoc dl(Op);
8055 StoreSDNode *ST = cast<StoreSDNode>(Op);
8056
8057 SDValue Chain = ST->getChain();
8058 SDValue BasePtr = ST->getBasePtr();
8059 SDValue Value = ST->getValue();
8060 MachineMemOperand *MMO = ST->getMemOperand();
8061
8063 Value);
8064 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8065}
8066
8067// FIXME: Remove this once the ANDI glue bug is fixed:
8068SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8069 assert(Op.getValueType() == MVT::i1 &&
8070 "Custom lowering only for i1 results");
8071
8072 SDLoc DL(Op);
8073 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8074}
8075
8076SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8077 SelectionDAG &DAG) const {
8078
8079 // Implements a vector truncate that fits in a vector register as a shuffle.
8080 // We want to legalize vector truncates down to where the source fits in
8081 // a vector register (and target is therefore smaller than vector register
8082 // size). At that point legalization will try to custom lower the sub-legal
8083 // result and get here - where we can contain the truncate as a single target
8084 // operation.
8085
8086 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8087 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8088 //
8089 // We will implement it for big-endian ordering as this (where x denotes
8090 // undefined):
8091 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8092 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8093 //
8094 // The same operation in little-endian ordering will be:
8095 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8096 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8097
8098 EVT TrgVT = Op.getValueType();
8099 assert(TrgVT.isVector() && "Vector type expected.");
8100 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8101 EVT EltVT = TrgVT.getVectorElementType();
8102 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8103 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8104 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8105 return SDValue();
8106
8107 SDValue N1 = Op.getOperand(0);
8108 EVT SrcVT = N1.getValueType();
8109 unsigned SrcSize = SrcVT.getSizeInBits();
8110 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8111 !llvm::has_single_bit<uint32_t>(
8113 return SDValue();
8114 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8115 return SDValue();
8116
8117 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8118 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8119
8120 SDLoc DL(Op);
8121 SDValue Op1, Op2;
8122 if (SrcSize == 256) {
8123 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8124 EVT SplitVT =
8126 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8127 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8128 DAG.getConstant(0, DL, VecIdxTy));
8129 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8130 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8131 }
8132 else {
8133 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8134 Op2 = DAG.getUNDEF(WideVT);
8135 }
8136
8137 // First list the elements we want to keep.
8138 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8139 SmallVector<int, 16> ShuffV;
8140 if (Subtarget.isLittleEndian())
8141 for (unsigned i = 0; i < TrgNumElts; ++i)
8142 ShuffV.push_back(i * SizeMult);
8143 else
8144 for (unsigned i = 1; i <= TrgNumElts; ++i)
8145 ShuffV.push_back(i * SizeMult - 1);
8146
8147 // Populate the remaining elements with undefs.
8148 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8149 // ShuffV.push_back(i + WideNumElts);
8150 ShuffV.push_back(WideNumElts + 1);
8151
8152 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8153 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8154 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8155}
8156
8157/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8158/// possible.
8159SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8160 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8161 EVT ResVT = Op.getValueType();
8162 EVT CmpVT = Op.getOperand(0).getValueType();
8163 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8164 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8165 SDLoc dl(Op);
8166
8167 // Without power9-vector, we don't have native instruction for f128 comparison.
8168 // Following transformation to libcall is needed for setcc:
8169 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8170 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8171 SDValue Z = DAG.getSetCC(
8172 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8173 LHS, RHS, CC);
8174 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8175 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8176 }
8177
8178 // Not FP, or using SPE? Not a fsel.
8179 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8180 Subtarget.hasSPE())
8181 return Op;
8182
8183 SDNodeFlags Flags = Op.getNode()->getFlags();
8184
8185 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8186 // presence of infinities.
8187 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8188 switch (CC) {
8189 default:
8190 break;
8191 case ISD::SETOGT:
8192 case ISD::SETGT:
8193 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8194 case ISD::SETOLT:
8195 case ISD::SETLT:
8196 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8197 }
8198 }
8199
8200 // We might be able to do better than this under some circumstances, but in
8201 // general, fsel-based lowering of select is a finite-math-only optimization.
8202 // For more information, see section F.3 of the 2.06 ISA specification.
8203 // With ISA 3.0
8204 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8205 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8206 ResVT == MVT::f128)
8207 return Op;
8208
8209 // If the RHS of the comparison is a 0.0, we don't need to do the
8210 // subtraction at all.
8211 SDValue Sel1;
8212 if (isFloatingPointZero(RHS))
8213 switch (CC) {
8214 default: break; // SETUO etc aren't handled by fsel.
8215 case ISD::SETNE:
8216 std::swap(TV, FV);
8217 [[fallthrough]];
8218 case ISD::SETEQ:
8219 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8220 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8221 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8222 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8223 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8224 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8225 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8226 case ISD::SETULT:
8227 case ISD::SETLT:
8228 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8229 [[fallthrough]];
8230 case ISD::SETOGE:
8231 case ISD::SETGE:
8232 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8233 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8234 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8235 case ISD::SETUGT:
8236 case ISD::SETGT:
8237 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8238 [[fallthrough]];
8239 case ISD::SETOLE:
8240 case ISD::SETLE:
8241 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8242 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8243 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8244 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8245 }
8246
8247 SDValue Cmp;
8248 switch (CC) {
8249 default: break; // SETUO etc aren't handled by fsel.
8250 case ISD::SETNE:
8251 std::swap(TV, FV);
8252 [[fallthrough]];
8253 case ISD::SETEQ:
8254 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8255 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8256 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8257 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8258 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8259 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8260 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8261 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8262 case ISD::SETULT:
8263 case ISD::SETLT:
8264 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8265 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8266 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8267 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8268 case ISD::SETOGE:
8269 case ISD::SETGE:
8270 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8271 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8272 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8273 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8274 case ISD::SETUGT:
8275 case ISD::SETGT:
8276 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8277 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8278 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8279 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8280 case ISD::SETOLE:
8281 case ISD::SETLE:
8282 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8283 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8284 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8285 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8286 }
8287 return Op;
8288}
8289
8290static unsigned getPPCStrictOpcode(unsigned Opc) {
8291 switch (Opc) {
8292 default:
8293 llvm_unreachable("No strict version of this opcode!");
8294 case PPCISD::FCTIDZ:
8295 return PPCISD::STRICT_FCTIDZ;
8296 case PPCISD::FCTIWZ:
8297 return PPCISD::STRICT_FCTIWZ;
8298 case PPCISD::FCTIDUZ:
8300 case PPCISD::FCTIWUZ:
8302 case PPCISD::FCFID:
8303 return PPCISD::STRICT_FCFID;
8304 case PPCISD::FCFIDU:
8305 return PPCISD::STRICT_FCFIDU;
8306 case PPCISD::FCFIDS:
8307 return PPCISD::STRICT_FCFIDS;
8308 case PPCISD::FCFIDUS:
8310 }
8311}
8312
8314 const PPCSubtarget &Subtarget) {
8315 SDLoc dl(Op);
8316 bool IsStrict = Op->isStrictFPOpcode();
8317 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8318 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8319
8320 // TODO: Any other flags to propagate?
8321 SDNodeFlags Flags;
8322 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8323
8324 // For strict nodes, source is the second operand.
8325 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8326 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8327 MVT DestTy = Op.getSimpleValueType();
8328 assert(Src.getValueType().isFloatingPoint() &&
8329 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8330 DestTy == MVT::i64) &&
8331 "Invalid FP_TO_INT types");
8332 if (Src.getValueType() == MVT::f32) {
8333 if (IsStrict) {
8334 Src =
8336 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8337 Chain = Src.getValue(1);
8338 } else
8339 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8340 }
8341 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8342 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8343 unsigned Opc = ISD::DELETED_NODE;
8344 switch (DestTy.SimpleTy) {
8345 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8346 case MVT::i32:
8347 Opc = IsSigned ? PPCISD::FCTIWZ
8348 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8349 break;
8350 case MVT::i64:
8351 assert((IsSigned || Subtarget.hasFPCVT()) &&
8352 "i64 FP_TO_UINT is supported only with FPCVT");
8353 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8354 }
8355 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8356 SDValue Conv;
8357 if (IsStrict) {
8358 Opc = getPPCStrictOpcode(Opc);
8359 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8360 Flags);
8361 } else {
8362 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8363 }
8364 return Conv;
8365}
8366
8367void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8368 SelectionDAG &DAG,
8369 const SDLoc &dl) const {
8370 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8371 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8372 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8373 bool IsStrict = Op->isStrictFPOpcode();
8374
8375 // Convert the FP value to an int value through memory.
8376 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8377 (IsSigned || Subtarget.hasFPCVT());
8378 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8379 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8380 MachinePointerInfo MPI =
8382
8383 // Emit a store to the stack slot.
8384 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8385 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8386 if (i32Stack) {
8388 Alignment = Align(4);
8389 MachineMemOperand *MMO =
8390 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8391 SDValue Ops[] = { Chain, Tmp, FIPtr };
8392 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8393 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8394 } else
8395 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8396
8397 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8398 // add in a bias on big endian.
8399 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8400 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8401 DAG.getConstant(4, dl, FIPtr.getValueType()));
8402 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8403 }
8404
8405 RLI.Chain = Chain;
8406 RLI.Ptr = FIPtr;
8407 RLI.MPI = MPI;
8408 RLI.Alignment = Alignment;
8409}
8410
8411/// Custom lowers floating point to integer conversions to use
8412/// the direct move instructions available in ISA 2.07 to avoid the
8413/// need for load/store combinations.
8414SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8415 SelectionDAG &DAG,
8416 const SDLoc &dl) const {
8417 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8418 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8419 if (Op->isStrictFPOpcode())
8420 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8421 else
8422 return Mov;
8423}
8424
8425SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8426 const SDLoc &dl) const {
8427 bool IsStrict = Op->isStrictFPOpcode();
8428 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8429 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8430 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8431 EVT SrcVT = Src.getValueType();
8432 EVT DstVT = Op.getValueType();
8433
8434 // FP to INT conversions are legal for f128.
8435 if (SrcVT == MVT::f128)
8436 return Subtarget.hasP9Vector() ? Op : SDValue();
8437
8438 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8439 // PPC (the libcall is not available).
8440 if (SrcVT == MVT::ppcf128) {
8441 if (DstVT == MVT::i32) {
8442 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8443 // set other fast-math flags to FP operations in both strict and
8444 // non-strict cases. (FP_TO_SINT, FSUB)
8446 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8447
8448 if (IsSigned) {
8449 SDValue Lo, Hi;
8450 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8451
8452 // Add the two halves of the long double in round-to-zero mode, and use
8453 // a smaller FP_TO_SINT.
8454 if (IsStrict) {
8456 DAG.getVTList(MVT::f64, MVT::Other),
8457 {Op.getOperand(0), Lo, Hi}, Flags);
8458 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8459 DAG.getVTList(MVT::i32, MVT::Other),
8460 {Res.getValue(1), Res}, Flags);
8461 } else {
8462 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8463 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8464 }
8465 } else {
8466 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8467 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8468 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8469 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8470 if (IsStrict) {
8471 // Sel = Src < 0x80000000
8472 // FltOfs = select Sel, 0.0, 0x80000000
8473 // IntOfs = select Sel, 0, 0x80000000
8474 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8475 SDValue Chain = Op.getOperand(0);
8476 EVT SetCCVT =
8477 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8478 EVT DstSetCCVT =
8479 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8480 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8481 Chain, true);
8482 Chain = Sel.getValue(1);
8483
8484 SDValue FltOfs = DAG.getSelect(
8485 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8486 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8487
8488 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8489 DAG.getVTList(SrcVT, MVT::Other),
8490 {Chain, Src, FltOfs}, Flags);
8491 Chain = Val.getValue(1);
8492 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8493 DAG.getVTList(DstVT, MVT::Other),
8494 {Chain, Val}, Flags);
8495 Chain = SInt.getValue(1);
8496 SDValue IntOfs = DAG.getSelect(
8497 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8498 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8499 return DAG.getMergeValues({Result, Chain}, dl);
8500 } else {
8501 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8502 // FIXME: generated code sucks.
8503 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8504 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8505 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8506 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8507 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8508 }
8509 }
8510 }
8511
8512 return SDValue();
8513 }
8514
8515 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8516 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8517
8518 ReuseLoadInfo RLI;
8519 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8520
8521 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8522 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8523}
8524
8525// We're trying to insert a regular store, S, and then a load, L. If the
8526// incoming value, O, is a load, we might just be able to have our load use the
8527// address used by O. However, we don't know if anything else will store to
8528// that address before we can load from it. To prevent this situation, we need
8529// to insert our load, L, into the chain as a peer of O. To do this, we give L
8530// the same chain operand as O, we create a token factor from the chain results
8531// of O and L, and we replace all uses of O's chain result with that token
8532// factor (see spliceIntoChain below for this last part).
8533bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8534 ReuseLoadInfo &RLI,
8535 SelectionDAG &DAG,
8536 ISD::LoadExtType ET) const {
8537 // Conservatively skip reusing for constrained FP nodes.
8538 if (Op->isStrictFPOpcode())
8539 return false;
8540
8541 SDLoc dl(Op);
8542 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8543 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8544 if (ET == ISD::NON_EXTLOAD &&
8545 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8546 isOperationLegalOrCustom(Op.getOpcode(),
8547 Op.getOperand(0).getValueType())) {
8548
8549 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8550 return true;
8551 }
8552
8553 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8554 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8555 LD->isNonTemporal())
8556 return false;
8557 if (LD->getMemoryVT() != MemVT)
8558 return false;
8559
8560 // If the result of the load is an illegal type, then we can't build a
8561 // valid chain for reuse since the legalised loads and token factor node that
8562 // ties the legalised loads together uses a different output chain then the
8563 // illegal load.
8564 if (!isTypeLegal(LD->getValueType(0)))
8565 return false;
8566
8567 RLI.Ptr = LD->getBasePtr();
8568 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8569 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8570 "Non-pre-inc AM on PPC?");
8571 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8572 LD->getOffset());
8573 }
8574
8575 RLI.Chain = LD->getChain();
8576 RLI.MPI = LD->getPointerInfo();
8577 RLI.IsDereferenceable = LD->isDereferenceable();
8578 RLI.IsInvariant = LD->isInvariant();
8579 RLI.Alignment = LD->getAlign();
8580 RLI.AAInfo = LD->getAAInfo();
8581 RLI.Ranges = LD->getRanges();
8582
8583 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8584 return true;
8585}
8586
8587// Given the head of the old chain, ResChain, insert a token factor containing
8588// it and NewResChain, and make users of ResChain now be users of that token
8589// factor.
8590// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8591void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8592 SDValue NewResChain,
8593 SelectionDAG &DAG) const {
8594 if (!ResChain)
8595 return;
8596
8597 SDLoc dl(NewResChain);
8598
8599 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8600 NewResChain, DAG.getUNDEF(MVT::Other));
8601 assert(TF.getNode() != NewResChain.getNode() &&
8602 "A new TF really is required here");
8603
8604 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8605 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8606}
8607
8608/// Analyze profitability of direct move
8609/// prefer float load to int load plus direct move
8610/// when there is no integer use of int load
8611bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8612 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8613 if (Origin->getOpcode() != ISD::LOAD)
8614 return true;
8615
8616 // If there is no LXSIBZX/LXSIHZX, like Power8,
8617 // prefer direct move if the memory size is 1 or 2 bytes.
8618 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8619 if (!Subtarget.hasP9Vector() &&
8620 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8621 return true;
8622
8623 for (SDNode::use_iterator UI = Origin->use_begin(),
8624 UE = Origin->use_end();
8625 UI != UE; ++UI) {
8626
8627 // Only look at the users of the loaded value.
8628 if (UI.getUse().get().getResNo() != 0)
8629 continue;
8630
8631 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8632 UI->getOpcode() != ISD::UINT_TO_FP &&
8633 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8634 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8635 return true;
8636 }
8637
8638 return false;
8639}
8640
8642 const PPCSubtarget &Subtarget,
8643 SDValue Chain = SDValue()) {
8644 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8645 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8646 SDLoc dl(Op);
8647
8648 // TODO: Any other flags to propagate?
8649 SDNodeFlags Flags;
8650 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8651
8652 // If we have FCFIDS, then use it when converting to single-precision.
8653 // Otherwise, convert to double-precision and then round.
8654 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8655 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8656 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8657 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8658 if (Op->isStrictFPOpcode()) {
8659 if (!Chain)
8660 Chain = Op.getOperand(0);
8661 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8662 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8663 } else
8664 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8665}
8666
8667/// Custom lowers integer to floating point conversions to use
8668/// the direct move instructions available in ISA 2.07 to avoid the
8669/// need for load/store combinations.
8670SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8671 SelectionDAG &DAG,
8672 const SDLoc &dl) const {
8673 assert((Op.getValueType() == MVT::f32 ||
8674 Op.getValueType() == MVT::f64) &&
8675 "Invalid floating point type as target of conversion");
8676 assert(Subtarget.hasFPCVT() &&
8677 "Int to FP conversions with direct moves require FPCVT");
8678 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8679 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8680 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8681 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8682 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8683 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8684 return convertIntToFP(Op, Mov, DAG, Subtarget);
8685}
8686
8687static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8688
8689 EVT VecVT = Vec.getValueType();
8690 assert(VecVT.isVector() && "Expected a vector type.");
8691 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8692
8693 EVT EltVT = VecVT.getVectorElementType();
8694 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8695 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8696
8697 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8698 SmallVector<SDValue, 16> Ops(NumConcat);
8699 Ops[0] = Vec;
8700 SDValue UndefVec = DAG.getUNDEF(VecVT);
8701 for (unsigned i = 1; i < NumConcat; ++i)
8702 Ops[i] = UndefVec;
8703
8704 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8705}
8706
8707SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8708 const SDLoc &dl) const {
8709 bool IsStrict = Op->isStrictFPOpcode();
8710 unsigned Opc = Op.getOpcode();
8711 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8712 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8714 "Unexpected conversion type");
8715 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8716 "Supports conversions to v2f64/v4f32 only.");
8717
8718 // TODO: Any other flags to propagate?
8720 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8721
8722 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8723 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8724
8725 SDValue Wide = widenVec(DAG, Src, dl);
8726 EVT WideVT = Wide.getValueType();
8727 unsigned WideNumElts = WideVT.getVectorNumElements();
8728 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8729
8730 SmallVector<int, 16> ShuffV;
8731 for (unsigned i = 0; i < WideNumElts; ++i)
8732 ShuffV.push_back(i + WideNumElts);
8733
8734 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8735 int SaveElts = FourEltRes ? 4 : 2;
8736 if (Subtarget.isLittleEndian())
8737 for (int i = 0; i < SaveElts; i++)
8738 ShuffV[i * Stride] = i;
8739 else
8740 for (int i = 1; i <= SaveElts; i++)
8741 ShuffV[i * Stride - 1] = i - 1;
8742
8743 SDValue ShuffleSrc2 =
8744 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8745 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8746
8747 SDValue Extend;
8748 if (SignedConv) {
8749 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8750 EVT ExtVT = Src.getValueType();
8751 if (Subtarget.hasP9Altivec())
8752 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8753 IntermediateVT.getVectorNumElements());
8754
8755 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8756 DAG.getValueType(ExtVT));
8757 } else
8758 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8759
8760 if (IsStrict)
8761 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8762 {Op.getOperand(0), Extend}, Flags);
8763
8764 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8765}
8766
8767SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8768 SelectionDAG &DAG) const {
8769 SDLoc dl(Op);
8770 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8771 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8772 bool IsStrict = Op->isStrictFPOpcode();
8773 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8774 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8775
8776 // TODO: Any other flags to propagate?
8778 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8779
8780 EVT InVT = Src.getValueType();
8781 EVT OutVT = Op.getValueType();
8782 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8783 isOperationCustom(Op.getOpcode(), InVT))
8784 return LowerINT_TO_FPVector(Op, DAG, dl);
8785
8786 // Conversions to f128 are legal.
8787 if (Op.getValueType() == MVT::f128)
8788 return Subtarget.hasP9Vector() ? Op : SDValue();
8789
8790 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8791 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8792 return SDValue();
8793
8794 if (Src.getValueType() == MVT::i1) {
8795 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8796 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8797 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8798 if (IsStrict)
8799 return DAG.getMergeValues({Sel, Chain}, dl);
8800 else
8801 return Sel;
8802 }
8803
8804 // If we have direct moves, we can do all the conversion, skip the store/load
8805 // however, without FPCVT we can't do most conversions.
8806 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8807 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8808 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8809
8810 assert((IsSigned || Subtarget.hasFPCVT()) &&
8811 "UINT_TO_FP is supported only with FPCVT");
8812
8813 if (Src.getValueType() == MVT::i64) {
8814 SDValue SINT = Src;
8815 // When converting to single-precision, we actually need to convert
8816 // to double-precision first and then round to single-precision.
8817 // To avoid double-rounding effects during that operation, we have
8818 // to prepare the input operand. Bits that might be truncated when
8819 // converting to double-precision are replaced by a bit that won't
8820 // be lost at this stage, but is below the single-precision rounding
8821 // position.
8822 //
8823 // However, if -enable-unsafe-fp-math is in effect, accept double
8824 // rounding to avoid the extra overhead.
8825 if (Op.getValueType() == MVT::f32 &&
8826 !Subtarget.hasFPCVT() &&
8828
8829 // Twiddle input to make sure the low 11 bits are zero. (If this
8830 // is the case, we are guaranteed the value will fit into the 53 bit
8831 // mantissa of an IEEE double-precision value without rounding.)
8832 // If any of those low 11 bits were not zero originally, make sure
8833 // bit 12 (value 2048) is set instead, so that the final rounding
8834 // to single-precision gets the correct result.
8835 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8836 SINT, DAG.getConstant(2047, dl, MVT::i64));
8837 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8838 Round, DAG.getConstant(2047, dl, MVT::i64));
8839 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8840 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8841 Round, DAG.getConstant(-2048, dl, MVT::i64));
8842
8843 // However, we cannot use that value unconditionally: if the magnitude
8844 // of the input value is small, the bit-twiddling we did above might
8845 // end up visibly changing the output. Fortunately, in that case, we
8846 // don't need to twiddle bits since the original input will convert
8847 // exactly to double-precision floating-point already. Therefore,
8848 // construct a conditional to use the original value if the top 11
8849 // bits are all sign-bit copies, and use the rounded value computed
8850 // above otherwise.
8851 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8852 SINT, DAG.getConstant(53, dl, MVT::i32));
8853 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8854 Cond, DAG.getConstant(1, dl, MVT::i64));
8855 Cond = DAG.getSetCC(
8856 dl,
8857 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8858 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8859
8860 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8861 }
8862
8863 ReuseLoadInfo RLI;
8864 SDValue Bits;
8865
8867 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8868 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8869 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8870 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8871 } else if (Subtarget.hasLFIWAX() &&
8872 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8873 MachineMemOperand *MMO =
8875 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8878 DAG.getVTList(MVT::f64, MVT::Other),
8879 Ops, MVT::i32, MMO);
8880 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8881 } else if (Subtarget.hasFPCVT() &&
8882 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8883 MachineMemOperand *MMO =
8885 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8886 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8888 DAG.getVTList(MVT::f64, MVT::Other),
8889 Ops, MVT::i32, MMO);
8890 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8891 } else if (((Subtarget.hasLFIWAX() &&
8892 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8893 (Subtarget.hasFPCVT() &&
8894 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8895 SINT.getOperand(0).getValueType() == MVT::i32) {
8896 MachineFrameInfo &MFI = MF.getFrameInfo();
8897 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8898
8899 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8900 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8901
8902 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8904 DAG.getMachineFunction(), FrameIdx));
8905 Chain = Store;
8906
8907 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8908 "Expected an i32 store");
8909
8910 RLI.Ptr = FIdx;
8911 RLI.Chain = Chain;
8912 RLI.MPI =
8914 RLI.Alignment = Align(4);
8915
8916 MachineMemOperand *MMO =
8918 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8919 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8922 dl, DAG.getVTList(MVT::f64, MVT::Other),
8923 Ops, MVT::i32, MMO);
8924 Chain = Bits.getValue(1);
8925 } else
8926 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8927
8928 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8929 if (IsStrict)
8930 Chain = FP.getValue(1);
8931
8932 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8933 if (IsStrict)
8935 DAG.getVTList(MVT::f32, MVT::Other),
8936 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8937 else
8938 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8939 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8940 }
8941 return FP;
8942 }
8943
8944 assert(Src.getValueType() == MVT::i32 &&
8945 "Unhandled INT_TO_FP type in custom expander!");
8946 // Since we only generate this in 64-bit mode, we can take advantage of
8947 // 64-bit registers. In particular, sign extend the input value into the
8948 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8949 // then lfd it and fcfid it.
8951 MachineFrameInfo &MFI = MF.getFrameInfo();
8952 EVT PtrVT = getPointerTy(MF.getDataLayout());
8953
8954 SDValue Ld;
8955 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8956 ReuseLoadInfo RLI;
8957 bool ReusingLoad;
8958 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8959 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8960 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8961
8962 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8964 DAG.getMachineFunction(), FrameIdx));
8965 Chain = Store;
8966
8967 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8968 "Expected an i32 store");
8969
8970 RLI.Ptr = FIdx;
8971 RLI.Chain = Chain;
8972 RLI.MPI =
8974 RLI.Alignment = Align(4);
8975 }
8976
8977 MachineMemOperand *MMO =
8979 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8980 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8981 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8982 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8983 MVT::i32, MMO);
8984 Chain = Ld.getValue(1);
8985 if (ReusingLoad)
8986 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8987 } else {
8988 assert(Subtarget.isPPC64() &&
8989 "i32->FP without LFIWAX supported only on PPC64");
8990
8991 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8992 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8993
8994 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8995
8996 // STD the extended value into the stack slot.
8997 SDValue Store = DAG.getStore(
8998 Chain, dl, Ext64, FIdx,
9000 Chain = Store;
9001
9002 // Load the value as a double.
9003 Ld = DAG.getLoad(
9004 MVT::f64, dl, Chain, FIdx,
9006 Chain = Ld.getValue(1);
9007 }
9008
9009 // FCFID it and return it.
9010 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9011 if (IsStrict)
9012 Chain = FP.getValue(1);
9013 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9014 if (IsStrict)
9016 DAG.getVTList(MVT::f32, MVT::Other),
9017 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
9018 else
9019 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9020 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9021 }
9022 return FP;
9023}
9024
9025SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9026 SelectionDAG &DAG) const {
9027 SDLoc dl(Op);
9028 /*
9029 The rounding mode is in bits 30:31 of FPSR, and has the following
9030 settings:
9031 00 Round to nearest
9032 01 Round to 0
9033 10 Round to +inf
9034 11 Round to -inf
9035
9036 GET_ROUNDING, on the other hand, expects the following:
9037 -1 Undefined
9038 0 Round to 0
9039 1 Round to nearest
9040 2 Round to +inf
9041 3 Round to -inf
9042
9043 To perform the conversion, we do:
9044 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9045 */
9046
9048 EVT VT = Op.getValueType();
9049 EVT PtrVT = getPointerTy(MF.getDataLayout());
9050
9051 // Save FP Control Word to register
9052 SDValue Chain = Op.getOperand(0);
9053 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9054 Chain = MFFS.getValue(1);
9055
9056 SDValue CWD;
9057 if (isTypeLegal(MVT::i64)) {
9058 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9059 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9060 } else {
9061 // Save FP register to stack slot
9062 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9063 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9064 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9065
9066 // Load FP Control Word from low 32 bits of stack slot.
9068 "Stack slot adjustment is valid only on big endian subtargets!");
9069 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9070 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9071 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9072 Chain = CWD.getValue(1);
9073 }
9074
9075 // Transform as necessary
9076 SDValue CWD1 =
9077 DAG.getNode(ISD::AND, dl, MVT::i32,
9078 CWD, DAG.getConstant(3, dl, MVT::i32));
9079 SDValue CWD2 =
9080 DAG.getNode(ISD::SRL, dl, MVT::i32,
9081 DAG.getNode(ISD::AND, dl, MVT::i32,
9082 DAG.getNode(ISD::XOR, dl, MVT::i32,
9083 CWD, DAG.getConstant(3, dl, MVT::i32)),
9084 DAG.getConstant(3, dl, MVT::i32)),
9085 DAG.getConstant(1, dl, MVT::i32));
9086
9087 SDValue RetVal =
9088 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9089
9090 RetVal =
9092 dl, VT, RetVal);
9093
9094 return DAG.getMergeValues({RetVal, Chain}, dl);
9095}
9096
9097SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9098 EVT VT = Op.getValueType();
9099 unsigned BitWidth = VT.getSizeInBits();
9100 SDLoc dl(Op);
9101 assert(Op.getNumOperands() == 3 &&
9102 VT == Op.getOperand(1).getValueType() &&
9103 "Unexpected SHL!");
9104
9105 // Expand into a bunch of logical ops. Note that these ops
9106 // depend on the PPC behavior for oversized shift amounts.
9107 SDValue Lo = Op.getOperand(0);
9108 SDValue Hi = Op.getOperand(1);
9109 SDValue Amt = Op.getOperand(2);
9110 EVT AmtVT = Amt.getValueType();
9111
9112 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9113 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9114 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9115 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9116 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9117 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9118 DAG.getConstant(-BitWidth, dl, AmtVT));
9119 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9120 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9121 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9122 SDValue OutOps[] = { OutLo, OutHi };
9123 return DAG.getMergeValues(OutOps, dl);
9124}
9125
9126SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9127 EVT VT = Op.getValueType();
9128 SDLoc dl(Op);
9129 unsigned BitWidth = VT.getSizeInBits();
9130 assert(Op.getNumOperands() == 3 &&
9131 VT == Op.getOperand(1).getValueType() &&
9132 "Unexpected SRL!");
9133
9134 // Expand into a bunch of logical ops. Note that these ops
9135 // depend on the PPC behavior for oversized shift amounts.
9136 SDValue Lo = Op.getOperand(0);
9137 SDValue Hi = Op.getOperand(1);
9138 SDValue Amt = Op.getOperand(2);
9139 EVT AmtVT = Amt.getValueType();
9140
9141 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9142 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9143 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9144 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9145 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9146 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9147 DAG.getConstant(-BitWidth, dl, AmtVT));
9148 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9149 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9150 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9151 SDValue OutOps[] = { OutLo, OutHi };
9152 return DAG.getMergeValues(OutOps, dl);
9153}
9154
9155SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9156 SDLoc dl(Op);
9157 EVT VT = Op.getValueType();
9158 unsigned BitWidth = VT.getSizeInBits();
9159 assert(Op.getNumOperands() == 3 &&
9160 VT == Op.getOperand(1).getValueType() &&
9161 "Unexpected SRA!");
9162
9163 // Expand into a bunch of logical ops, followed by a select_cc.
9164 SDValue Lo = Op.getOperand(0);
9165 SDValue Hi = Op.getOperand(1);
9166 SDValue Amt = Op.getOperand(2);
9167 EVT AmtVT = Amt.getValueType();
9168
9169 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9170 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9171 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9172 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9173 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9174 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9175 DAG.getConstant(-BitWidth, dl, AmtVT));
9176 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9177 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9178 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9179 Tmp4, Tmp6, ISD::SETLE);
9180 SDValue OutOps[] = { OutLo, OutHi };
9181 return DAG.getMergeValues(OutOps, dl);
9182}
9183
9184SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9185 SelectionDAG &DAG) const {
9186 SDLoc dl(Op);
9187 EVT VT = Op.getValueType();
9188 unsigned BitWidth = VT.getSizeInBits();
9189
9190 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9191 SDValue X = Op.getOperand(0);
9192 SDValue Y = Op.getOperand(1);
9193 SDValue Z = Op.getOperand(2);
9194 EVT AmtVT = Z.getValueType();
9195
9196 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9197 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9198 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9199 // on PowerPC shift by BW being well defined.
9200 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9201 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9202 SDValue SubZ =
9203 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9204 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9205 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9206 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9207}
9208
9209//===----------------------------------------------------------------------===//
9210// Vector related lowering.
9211//
9212
9213/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9214/// element size of SplatSize. Cast the result to VT.
9215static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9216 SelectionDAG &DAG, const SDLoc &dl) {
9217 static const MVT VTys[] = { // canonical VT to use for each size.
9218 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9219 };
9220
9221 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9222
9223 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9224 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9225 SplatSize = 1;
9226 Val = 0xFF;
9227 }
9228
9229 EVT CanonicalVT = VTys[SplatSize-1];
9230
9231 // Build a canonical splat for this value.
9232 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9233}
9234
9235/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9236/// specified intrinsic ID.
9238 const SDLoc &dl, EVT DestVT = MVT::Other) {
9239 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9240 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9241 DAG.getConstant(IID, dl, MVT::i32), Op);
9242}
9243
9244/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9245/// specified intrinsic ID.
9246static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9247 SelectionDAG &DAG, const SDLoc &dl,
9248 EVT DestVT = MVT::Other) {
9249 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9250 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9251 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9252}
9253
9254/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9255/// specified intrinsic ID.
9256static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9257 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9258 EVT DestVT = MVT::Other) {
9259 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9260 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9261 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9262}
9263
9264/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9265/// amount. The result has the specified value type.
9266static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9267 SelectionDAG &DAG, const SDLoc &dl) {
9268 // Force LHS/RHS to be the right type.
9269 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9270 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9271
9272 int Ops[16];
9273 for (unsigned i = 0; i != 16; ++i)
9274 Ops[i] = i + Amt;
9275 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9276 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9277}
9278
9279/// Do we have an efficient pattern in a .td file for this node?
9280///
9281/// \param V - pointer to the BuildVectorSDNode being matched
9282/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9283///
9284/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9285/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9286/// the opposite is true (expansion is beneficial) are:
9287/// - The node builds a vector out of integers that are not 32 or 64-bits
9288/// - The node builds a vector out of constants
9289/// - The node is a "load-and-splat"
9290/// In all other cases, we will choose to keep the BUILD_VECTOR.
9292 bool HasDirectMove,
9293 bool HasP8Vector) {
9294 EVT VecVT = V->getValueType(0);
9295 bool RightType = VecVT == MVT::v2f64 ||
9296 (HasP8Vector && VecVT == MVT::v4f32) ||
9297 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9298 if (!RightType)
9299 return false;
9300
9301 bool IsSplat = true;
9302 bool IsLoad = false;
9303 SDValue Op0 = V->getOperand(0);
9304
9305 // This function is called in a block that confirms the node is not a constant
9306 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9307 // different constants.
9308 if (V->isConstant())
9309 return false;
9310 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9311 if (V->getOperand(i).isUndef())
9312 return false;
9313 // We want to expand nodes that represent load-and-splat even if the
9314 // loaded value is a floating point truncation or conversion to int.
9315 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9316 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9317 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9318 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9319 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9320 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9321 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9322 IsLoad = true;
9323 // If the operands are different or the input is not a load and has more
9324 // uses than just this BV node, then it isn't a splat.
9325 if (V->getOperand(i) != Op0 ||
9326 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9327 IsSplat = false;
9328 }
9329 return !(IsSplat && IsLoad);
9330}
9331
9332// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9333SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9334
9335 SDLoc dl(Op);
9336 SDValue Op0 = Op->getOperand(0);
9337
9338 if ((Op.getValueType() != MVT::f128) ||
9339 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9340 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9341 (Op0.getOperand(1).getValueType() != MVT::i64))
9342 return SDValue();
9343
9344 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9345 Op0.getOperand(1));
9346}
9347
9348static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9349 const SDValue *InputLoad = &Op;
9350 while (InputLoad->getOpcode() == ISD::BITCAST)
9351 InputLoad = &InputLoad->getOperand(0);
9352 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9354 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9355 InputLoad = &InputLoad->getOperand(0);
9356 }
9357 if (InputLoad->getOpcode() != ISD::LOAD)
9358 return nullptr;
9359 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9360 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9361}
9362
9363// Convert the argument APFloat to a single precision APFloat if there is no
9364// loss in information during the conversion to single precision APFloat and the
9365// resulting number is not a denormal number. Return true if successful.
9367 APFloat APFloatToConvert = ArgAPFloat;
9368 bool LosesInfo = true;
9370 &LosesInfo);
9371 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9372 if (Success)
9373 ArgAPFloat = APFloatToConvert;
9374 return Success;
9375}
9376
9377// Bitcast the argument APInt to a double and convert it to a single precision
9378// APFloat, bitcast the APFloat to an APInt and assign it to the original
9379// argument if there is no loss in information during the conversion from
9380// double to single precision APFloat and the resulting number is not a denormal
9381// number. Return true if successful.
9383 double DpValue = ArgAPInt.bitsToDouble();
9384 APFloat APFloatDp(DpValue);
9385 bool Success = convertToNonDenormSingle(APFloatDp);
9386 if (Success)
9387 ArgAPInt = APFloatDp.bitcastToAPInt();
9388 return Success;
9389}
9390
9391// Nondestructive check for convertTonNonDenormSingle.
9393 // Only convert if it loses info, since XXSPLTIDP should
9394 // handle the other case.
9395 APFloat APFloatToConvert = ArgAPFloat;
9396 bool LosesInfo = true;
9398 &LosesInfo);
9399
9400 return (!LosesInfo && !APFloatToConvert.isDenormal());
9401}
9402
9403static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9404 unsigned &Opcode) {
9405 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9406 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9407 return false;
9408
9409 EVT Ty = Op->getValueType(0);
9410 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9411 // as we cannot handle extending loads for these types.
9412 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9413 ISD::isNON_EXTLoad(InputNode))
9414 return true;
9415
9416 EVT MemVT = InputNode->getMemoryVT();
9417 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9418 // memory VT is the same vector element VT type.
9419 // The loads feeding into the v8i16 and v16i8 types will be extending because
9420 // scalar i8/i16 are not legal types.
9421 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9422 (MemVT == Ty.getVectorElementType()))
9423 return true;
9424
9425 if (Ty == MVT::v2i64) {
9426 // Check the extend type, when the input type is i32, and the output vector
9427 // type is v2i64.
9428 if (MemVT == MVT::i32) {
9429 if (ISD::isZEXTLoad(InputNode))
9430 Opcode = PPCISD::ZEXT_LD_SPLAT;
9431 if (ISD::isSEXTLoad(InputNode))
9432 Opcode = PPCISD::SEXT_LD_SPLAT;
9433 }
9434 return true;
9435 }
9436 return false;
9437}
9438
9439// If this is a case we can't handle, return null and let the default
9440// expansion code take care of it. If we CAN select this case, and if it
9441// selects to a single instruction, return Op. Otherwise, if we can codegen
9442// this case more efficiently than a constant pool load, lower it to the
9443// sequence of ops that should be used.
9444SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9445 SelectionDAG &DAG) const {
9446 SDLoc dl(Op);
9447 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9448 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9449
9450 // Check if this is a splat of a constant value.
9451 APInt APSplatBits, APSplatUndef;
9452 unsigned SplatBitSize;
9453 bool HasAnyUndefs;
9454 bool BVNIsConstantSplat =
9455 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9456 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9457
9458 // If it is a splat of a double, check if we can shrink it to a 32 bit
9459 // non-denormal float which when converted back to double gives us the same
9460 // double. This is to exploit the XXSPLTIDP instruction.
9461 // If we lose precision, we use XXSPLTI32DX.
9462 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9463 Subtarget.hasPrefixInstrs()) {
9464 // Check the type first to short-circuit so we don't modify APSplatBits if
9465 // this block isn't executed.
9466 if ((Op->getValueType(0) == MVT::v2f64) &&
9467 convertToNonDenormSingle(APSplatBits)) {
9468 SDValue SplatNode = DAG.getNode(
9469 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9470 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9471 return DAG.getBitcast(Op.getValueType(), SplatNode);
9472 } else {
9473 // We may lose precision, so we have to use XXSPLTI32DX.
9474
9475 uint32_t Hi =
9476 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9477 uint32_t Lo =
9478 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9479 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9480
9481 if (!Hi || !Lo)
9482 // If either load is 0, then we should generate XXLXOR to set to 0.
9483 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9484
9485 if (Hi)
9486 SplatNode = DAG.getNode(
9487 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9488 DAG.getTargetConstant(0, dl, MVT::i32),
9489 DAG.getTargetConstant(Hi, dl, MVT::i32));
9490
9491 if (Lo)
9492 SplatNode =
9493 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9494 DAG.getTargetConstant(1, dl, MVT::i32),
9495 DAG.getTargetConstant(Lo, dl, MVT::i32));
9496
9497 return DAG.getBitcast(Op.getValueType(), SplatNode);
9498 }
9499 }
9500
9501 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9502 unsigned NewOpcode = PPCISD::LD_SPLAT;
9503
9504 // Handle load-and-splat patterns as we have instructions that will do this
9505 // in one go.
9506 if (DAG.isSplatValue(Op, true) &&
9507 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9508 const SDValue *InputLoad = &Op.getOperand(0);
9509 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9510
9511 // If the input load is an extending load, it will be an i32 -> i64
9512 // extending load and isValidSplatLoad() will update NewOpcode.
9513 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9514 unsigned ElementSize =
9515 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9516
9517 assert(((ElementSize == 2 * MemorySize)
9518 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9519 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9520 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9521 "Unmatched element size and opcode!\n");
9522
9523 // Checking for a single use of this load, we have to check for vector
9524 // width (128 bits) / ElementSize uses (since each operand of the
9525 // BUILD_VECTOR is a separate use of the value.
9526 unsigned NumUsesOfInputLD = 128 / ElementSize;
9527 for (SDValue BVInOp : Op->ops())
9528 if (BVInOp.isUndef())
9529 NumUsesOfInputLD--;
9530
9531 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9532 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9533 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9534 // 15", but function IsValidSplatLoad() now will only return true when
9535 // the data at index 0 is not nullptr. So we will not get into trouble for
9536 // these cases.
9537 //
9538 // case 1 - lfiwzx/lfiwax
9539 // 1.1: load result is i32 and is sign/zero extend to i64;
9540 // 1.2: build a v2i64 vector type with above loaded value;
9541 // 1.3: the vector has only one value at index 0, others are all undef;
9542 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9543 if (NumUsesOfInputLD == 1 &&
9544 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9545 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9546 Subtarget.hasLFIWAX()))
9547 return SDValue();
9548
9549 // case 2 - lxvr[hb]x
9550 // 2.1: load result is at most i16;
9551 // 2.2: build a vector with above loaded value;
9552 // 2.3: the vector has only one value at index 0, others are all undef;
9553 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9554 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9555 Subtarget.isISA3_1() && ElementSize <= 16)
9556 return SDValue();
9557
9558 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9559 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9560 Subtarget.hasVSX()) {
9561 SDValue Ops[] = {
9562 LD->getChain(), // Chain
9563 LD->getBasePtr(), // Ptr
9564 DAG.getValueType(Op.getValueType()) // VT
9565 };
9566 SDValue LdSplt = DAG.getMemIntrinsicNode(
9567 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9568 LD->getMemoryVT(), LD->getMemOperand());
9569 // Replace all uses of the output chain of the original load with the
9570 // output chain of the new load.
9571 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9572 LdSplt.getValue(1));
9573 return LdSplt;
9574 }
9575 }
9576
9577 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9578 // 32-bits can be lowered to VSX instructions under certain conditions.
9579 // Without VSX, there is no pattern more efficient than expanding the node.
9580 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9581 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9582 Subtarget.hasP8Vector()))
9583 return Op;
9584 return SDValue();
9585 }
9586
9587 uint64_t SplatBits = APSplatBits.getZExtValue();
9588 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9589 unsigned SplatSize = SplatBitSize / 8;
9590
9591 // First, handle single instruction cases.
9592
9593 // All zeros?
9594 if (SplatBits == 0) {
9595 // Canonicalize all zero vectors to be v4i32.
9596 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9597 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9598 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9599 }
9600 return Op;
9601 }
9602
9603 // We have XXSPLTIW for constant splats four bytes wide.
9604 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9605 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9606 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9607 // turned into a 4-byte splat of 0xABABABAB.
9608 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9609 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9610 Op.getValueType(), DAG, dl);
9611
9612 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9613 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9614 dl);
9615
9616 // We have XXSPLTIB for constant splats one byte wide.
9617 if (Subtarget.hasP9Vector() && SplatSize == 1)
9618 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9619 dl);
9620
9621 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9622 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9623 (32-SplatBitSize));
9624 if (SextVal >= -16 && SextVal <= 15)
9625 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9626 dl);
9627
9628 // Two instruction sequences.
9629
9630 // If this value is in the range [-32,30] and is even, use:
9631 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9632 // If this value is in the range [17,31] and is odd, use:
9633 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9634 // If this value is in the range [-31,-17] and is odd, use:
9635 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9636 // Note the last two are three-instruction sequences.
9637 if (SextVal >= -32 && SextVal <= 31) {
9638 // To avoid having these optimizations undone by constant folding,
9639 // we convert to a pseudo that will be expanded later into one of
9640 // the above forms.
9641 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9642 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9643 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9644 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9645 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9646 if (VT == Op.getValueType())
9647 return RetVal;
9648 else
9649 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9650 }
9651
9652 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9653 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9654 // for fneg/fabs.
9655 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9656 // Make -1 and vspltisw -1:
9657 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9658
9659 // Make the VSLW intrinsic, computing 0x8000_0000.
9660 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9661 OnesV, DAG, dl);
9662
9663 // xor by OnesV to invert it.
9664 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9665 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9666 }
9667
9668 // Check to see if this is a wide variety of vsplti*, binop self cases.
9669 static const signed char SplatCsts[] = {
9670 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9671 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9672 };
9673
9674 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9675 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9676 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9677 int i = SplatCsts[idx];
9678
9679 // Figure out what shift amount will be used by altivec if shifted by i in
9680 // this splat size.
9681 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9682
9683 // vsplti + shl self.
9684 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9685 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9686 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9687 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9688 Intrinsic::ppc_altivec_vslw
9689 };
9690 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9691 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9692 }
9693
9694 // vsplti + srl self.
9695 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9696 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9697 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9698 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9699 Intrinsic::ppc_altivec_vsrw
9700 };
9701 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9702 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9703 }
9704
9705 // vsplti + rol self.
9706 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9707 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9708 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9709 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9710 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9711 Intrinsic::ppc_altivec_vrlw
9712 };
9713 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9714 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9715 }
9716
9717 // t = vsplti c, result = vsldoi t, t, 1
9718 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9719 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9720 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9721 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9722 }
9723 // t = vsplti c, result = vsldoi t, t, 2
9724 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9725 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9726 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9727 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9728 }
9729 // t = vsplti c, result = vsldoi t, t, 3
9730 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9731 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9732 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9733 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9734 }
9735 }
9736
9737 return SDValue();
9738}
9739
9740/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9741/// the specified operations to build the shuffle.
9742static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9743 SDValue RHS, SelectionDAG &DAG,
9744 const SDLoc &dl) {
9745 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9746 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9747 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9748
9749 enum {
9750 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9751 OP_VMRGHW,
9752 OP_VMRGLW,
9753 OP_VSPLTISW0,
9754 OP_VSPLTISW1,
9755 OP_VSPLTISW2,
9756 OP_VSPLTISW3,
9757 OP_VSLDOI4,
9758 OP_VSLDOI8,
9759 OP_VSLDOI12
9760 };
9761
9762 if (OpNum == OP_COPY) {
9763 if (LHSID == (1*9+2)*9+3) return LHS;
9764 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9765 return RHS;
9766 }
9767
9768 SDValue OpLHS, OpRHS;
9769 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9770 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9771
9772 int ShufIdxs[16];
9773 switch (OpNum) {
9774 default: llvm_unreachable("Unknown i32 permute!");
9775 case OP_VMRGHW:
9776 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9777 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9778 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9779 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9780 break;
9781 case OP_VMRGLW:
9782 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9783 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9784 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9785 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9786 break;
9787 case OP_VSPLTISW0:
9788 for (unsigned i = 0; i != 16; ++i)
9789 ShufIdxs[i] = (i&3)+0;
9790 break;
9791 case OP_VSPLTISW1:
9792 for (unsigned i = 0; i != 16; ++i)
9793 ShufIdxs[i] = (i&3)+4;
9794 break;
9795 case OP_VSPLTISW2:
9796 for (unsigned i = 0; i != 16; ++i)
9797 ShufIdxs[i] = (i&3)+8;
9798 break;
9799 case OP_VSPLTISW3:
9800 for (unsigned i = 0; i != 16; ++i)
9801 ShufIdxs[i] = (i&3)+12;
9802 break;
9803 case OP_VSLDOI4:
9804 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9805 case OP_VSLDOI8:
9806 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9807 case OP_VSLDOI12:
9808 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9809 }
9810 EVT VT = OpLHS.getValueType();
9811 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9812 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9813 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9814 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9815}
9816
9817/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9818/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9819/// SDValue.
9820SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9821 SelectionDAG &DAG) const {
9822 const unsigned BytesInVector = 16;
9823 bool IsLE = Subtarget.isLittleEndian();
9824 SDLoc dl(N);
9825 SDValue V1 = N->getOperand(0);
9826 SDValue V2 = N->getOperand(1);
9827 unsigned ShiftElts = 0, InsertAtByte = 0;
9828 bool Swap = false;
9829
9830 // Shifts required to get the byte we want at element 7.
9831 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9832 0, 15, 14, 13, 12, 11, 10, 9};
9833 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9834 1, 2, 3, 4, 5, 6, 7, 8};
9835
9836 ArrayRef<int> Mask = N->getMask();
9837 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9838
9839 // For each mask element, find out if we're just inserting something
9840 // from V2 into V1 or vice versa.
9841 // Possible permutations inserting an element from V2 into V1:
9842 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9843 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9844 // ...
9845 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9846 // Inserting from V1 into V2 will be similar, except mask range will be
9847 // [16,31].
9848
9849 bool FoundCandidate = false;
9850 // If both vector operands for the shuffle are the same vector, the mask
9851 // will contain only elements from the first one and the second one will be
9852 // undef.
9853 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9854 // Go through the mask of half-words to find an element that's being moved
9855 // from one vector to the other.
9856 for (unsigned i = 0; i < BytesInVector; ++i) {
9857 unsigned CurrentElement = Mask[i];
9858 // If 2nd operand is undefined, we should only look for element 7 in the
9859 // Mask.
9860 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9861 continue;
9862
9863 bool OtherElementsInOrder = true;
9864 // Examine the other elements in the Mask to see if they're in original
9865 // order.
9866 for (unsigned j = 0; j < BytesInVector; ++j) {
9867 if (j == i)
9868 continue;
9869 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9870 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9871 // in which we always assume we're always picking from the 1st operand.
9872 int MaskOffset =
9873 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9874 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9875 OtherElementsInOrder = false;
9876 break;
9877 }
9878 }
9879 // If other elements are in original order, we record the number of shifts
9880 // we need to get the element we want into element 7. Also record which byte
9881 // in the vector we should insert into.
9882 if (OtherElementsInOrder) {
9883 // If 2nd operand is undefined, we assume no shifts and no swapping.
9884 if (V2.isUndef()) {
9885 ShiftElts = 0;
9886 Swap = false;
9887 } else {
9888 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9889 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9890 : BigEndianShifts[CurrentElement & 0xF];
9891 Swap = CurrentElement < BytesInVector;
9892 }
9893 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9894 FoundCandidate = true;
9895 break;
9896 }
9897 }
9898
9899 if (!FoundCandidate)
9900 return SDValue();
9901
9902 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9903 // optionally with VECSHL if shift is required.
9904 if (Swap)
9905 std::swap(V1, V2);
9906 if (V2.isUndef())
9907 V2 = V1;
9908 if (ShiftElts) {
9909 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9910 DAG.getConstant(ShiftElts, dl, MVT::i32));
9911 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9912 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9913 }
9914 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9915 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9916}
9917
9918/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9919/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9920/// SDValue.
9921SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9922 SelectionDAG &DAG) const {
9923 const unsigned NumHalfWords = 8;
9924 const unsigned BytesInVector = NumHalfWords * 2;
9925 // Check that the shuffle is on half-words.
9926 if (!isNByteElemShuffleMask(N, 2, 1))
9927 return SDValue();
9928
9929 bool IsLE = Subtarget.isLittleEndian();
9930 SDLoc dl(N);
9931 SDValue V1 = N->getOperand(0);
9932 SDValue V2 = N->getOperand(1);
9933 unsigned ShiftElts = 0, InsertAtByte = 0;
9934 bool Swap = false;
9935
9936 // Shifts required to get the half-word we want at element 3.
9937 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9938 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9939
9940 uint32_t Mask = 0;
9941 uint32_t OriginalOrderLow = 0x1234567;
9942 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9943 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9944 // 32-bit space, only need 4-bit nibbles per element.
9945 for (unsigned i = 0; i < NumHalfWords; ++i) {
9946 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9947 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9948 }
9949
9950 // For each mask element, find out if we're just inserting something
9951 // from V2 into V1 or vice versa. Possible permutations inserting an element
9952 // from V2 into V1:
9953 // X, 1, 2, 3, 4, 5, 6, 7
9954 // 0, X, 2, 3, 4, 5, 6, 7
9955 // 0, 1, X, 3, 4, 5, 6, 7
9956 // 0, 1, 2, X, 4, 5, 6, 7
9957 // 0, 1, 2, 3, X, 5, 6, 7
9958 // 0, 1, 2, 3, 4, X, 6, 7
9959 // 0, 1, 2, 3, 4, 5, X, 7
9960 // 0, 1, 2, 3, 4, 5, 6, X
9961 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9962
9963 bool FoundCandidate = false;
9964 // Go through the mask of half-words to find an element that's being moved
9965 // from one vector to the other.
9966 for (unsigned i = 0; i < NumHalfWords; ++i) {
9967 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9968 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9969 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9970 uint32_t TargetOrder = 0x0;
9971
9972 // If both vector operands for the shuffle are the same vector, the mask
9973 // will contain only elements from the first one and the second one will be
9974 // undef.
9975 if (V2.isUndef()) {
9976 ShiftElts = 0;
9977 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9978 TargetOrder = OriginalOrderLow;
9979 Swap = false;
9980 // Skip if not the correct element or mask of other elements don't equal
9981 // to our expected order.
9982 if (MaskOneElt == VINSERTHSrcElem &&
9983 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9984 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9985 FoundCandidate = true;
9986 break;
9987 }
9988 } else { // If both operands are defined.
9989 // Target order is [8,15] if the current mask is between [0,7].
9990 TargetOrder =
9991 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9992 // Skip if mask of other elements don't equal our expected order.
9993 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9994 // We only need the last 3 bits for the number of shifts.
9995 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9996 : BigEndianShifts[MaskOneElt & 0x7];
9997 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9998 Swap = MaskOneElt < NumHalfWords;
9999 FoundCandidate = true;
10000 break;
10001 }
10002 }
10003 }
10004
10005 if (!FoundCandidate)
10006 return SDValue();
10007
10008 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10009 // optionally with VECSHL if shift is required.
10010 if (Swap)
10011 std::swap(V1, V2);
10012 if (V2.isUndef())
10013 V2 = V1;
10014 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10015 if (ShiftElts) {
10016 // Double ShiftElts because we're left shifting on v16i8 type.
10017 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10018 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10019 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10020 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10021 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10022 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10023 }
10024 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10025 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10026 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10027 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10028}
10029
10030/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10031/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10032/// return the default SDValue.
10033SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10034 SelectionDAG &DAG) const {
10035 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10036 // to v16i8. Peek through the bitcasts to get the actual operands.
10039
10040 auto ShuffleMask = SVN->getMask();
10041 SDValue VecShuffle(SVN, 0);
10042 SDLoc DL(SVN);
10043
10044 // Check that we have a four byte shuffle.
10045 if (!isNByteElemShuffleMask(SVN, 4, 1))
10046 return SDValue();
10047
10048 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10049 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10050 std::swap(LHS, RHS);
10052 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10053 if (!CommutedSV)
10054 return SDValue();
10055 ShuffleMask = CommutedSV->getMask();
10056 }
10057
10058 // Ensure that the RHS is a vector of constants.
10059 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10060 if (!BVN)
10061 return SDValue();
10062
10063 // Check if RHS is a splat of 4-bytes (or smaller).
10064 APInt APSplatValue, APSplatUndef;
10065 unsigned SplatBitSize;
10066 bool HasAnyUndefs;
10067 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10068 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10069 SplatBitSize > 32)
10070 return SDValue();
10071
10072 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10073 // The instruction splats a constant C into two words of the source vector
10074 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10075 // Thus we check that the shuffle mask is the equivalent of
10076 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10077 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10078 // within each word are consecutive, so we only need to check the first byte.
10079 SDValue Index;
10080 bool IsLE = Subtarget.isLittleEndian();
10081 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10082 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10083 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10084 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10085 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10086 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10087 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10088 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10089 else
10090 return SDValue();
10091
10092 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10093 // for XXSPLTI32DX.
10094 unsigned SplatVal = APSplatValue.getZExtValue();
10095 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10096 SplatVal |= (SplatVal << SplatBitSize);
10097
10098 SDValue SplatNode = DAG.getNode(
10099 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10100 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10101 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10102}
10103
10104/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10105/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10106/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10107/// i.e (or (shl x, C1), (srl x, 128-C1)).
10108SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10109 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10110 assert(Op.getValueType() == MVT::v1i128 &&
10111 "Only set v1i128 as custom, other type shouldn't reach here!");
10112 SDLoc dl(Op);
10113 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10114 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10115 unsigned SHLAmt = N1.getConstantOperandVal(0);
10116 if (SHLAmt % 8 == 0) {
10117 std::array<int, 16> Mask;
10118 std::iota(Mask.begin(), Mask.end(), 0);
10119 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10120 if (SDValue Shuffle =
10121 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10122 DAG.getUNDEF(MVT::v16i8), Mask))
10123 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10124 }
10125 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10126 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10127 DAG.getConstant(SHLAmt, dl, MVT::i32));
10128 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10129 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10130 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10131 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10132}
10133
10134/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10135/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10136/// return the code it can be lowered into. Worst case, it can always be
10137/// lowered into a vperm.
10138SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10139 SelectionDAG &DAG) const {
10140 SDLoc dl(Op);
10141 SDValue V1 = Op.getOperand(0);
10142 SDValue V2 = Op.getOperand(1);
10143 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10144
10145 // Any nodes that were combined in the target-independent combiner prior
10146 // to vector legalization will not be sent to the target combine. Try to
10147 // combine it here.
10148 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10149 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10150 return NewShuffle;
10151 Op = NewShuffle;
10152 SVOp = cast<ShuffleVectorSDNode>(Op);
10153 V1 = Op.getOperand(0);
10154 V2 = Op.getOperand(1);
10155 }
10156 EVT VT = Op.getValueType();
10157 bool isLittleEndian = Subtarget.isLittleEndian();
10158
10159 unsigned ShiftElts, InsertAtByte;
10160 bool Swap = false;
10161
10162 // If this is a load-and-splat, we can do that with a single instruction
10163 // in some cases. However if the load has multiple uses, we don't want to
10164 // combine it because that will just produce multiple loads.
10165 bool IsPermutedLoad = false;
10166 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10167 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10168 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10169 InputLoad->hasOneUse()) {
10170 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10171 int SplatIdx =
10172 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10173
10174 // The splat index for permuted loads will be in the left half of the vector
10175 // which is strictly wider than the loaded value by 8 bytes. So we need to
10176 // adjust the splat index to point to the correct address in memory.
10177 if (IsPermutedLoad) {
10178 assert((isLittleEndian || IsFourByte) &&
10179 "Unexpected size for permuted load on big endian target");
10180 SplatIdx += IsFourByte ? 2 : 1;
10181 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10182 "Splat of a value outside of the loaded memory");
10183 }
10184
10185 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10186 // For 4-byte load-and-splat, we need Power9.
10187 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10188 uint64_t Offset = 0;
10189 if (IsFourByte)
10190 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10191 else
10192 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10193
10194 // If the width of the load is the same as the width of the splat,
10195 // loading with an offset would load the wrong memory.
10196 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10197 Offset = 0;
10198
10199 SDValue BasePtr = LD->getBasePtr();
10200 if (Offset != 0)
10202 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10203 SDValue Ops[] = {
10204 LD->getChain(), // Chain
10205 BasePtr, // BasePtr
10206 DAG.getValueType(Op.getValueType()) // VT
10207 };
10208 SDVTList VTL =
10209 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10210 SDValue LdSplt =
10212 Ops, LD->getMemoryVT(), LD->getMemOperand());
10213 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10214 if (LdSplt.getValueType() != SVOp->getValueType(0))
10215 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10216 return LdSplt;
10217 }
10218 }
10219
10220 // All v2i64 and v2f64 shuffles are legal
10221 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10222 return Op;
10223
10224 if (Subtarget.hasP9Vector() &&
10225 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10226 isLittleEndian)) {
10227 if (V2.isUndef())
10228 V2 = V1;
10229 else if (Swap)
10230 std::swap(V1, V2);
10231 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10232 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10233 if (ShiftElts) {
10234 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10235 DAG.getConstant(ShiftElts, dl, MVT::i32));
10236 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10237 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10238 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10239 }
10240 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10241 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10242 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10243 }
10244
10245 if (Subtarget.hasPrefixInstrs()) {
10246 SDValue SplatInsertNode;
10247 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10248 return SplatInsertNode;
10249 }
10250
10251 if (Subtarget.hasP9Altivec()) {
10252 SDValue NewISDNode;
10253 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10254 return NewISDNode;
10255
10256 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10257 return NewISDNode;
10258 }
10259
10260 if (Subtarget.hasVSX() &&
10261 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10262 if (Swap)
10263 std::swap(V1, V2);
10264 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10265 SDValue Conv2 =
10266 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10267
10268 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10269 DAG.getConstant(ShiftElts, dl, MVT::i32));
10270 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10271 }
10272
10273 if (Subtarget.hasVSX() &&
10274 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10275 if (Swap)
10276 std::swap(V1, V2);
10277 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10278 SDValue Conv2 =
10279 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10280
10281 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10282 DAG.getConstant(ShiftElts, dl, MVT::i32));
10283 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10284 }
10285
10286 if (Subtarget.hasP9Vector()) {
10287 if (PPC::isXXBRHShuffleMask(SVOp)) {
10288 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10289 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10290 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10291 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10292 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10293 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10294 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10295 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10296 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10297 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10298 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10299 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10300 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10301 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10302 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10303 }
10304 }
10305
10306 if (Subtarget.hasVSX()) {
10307 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10308 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10309
10310 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10311 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10312 DAG.getConstant(SplatIdx, dl, MVT::i32));
10313 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10314 }
10315
10316 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10317 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10318 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10319 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10320 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10321 }
10322 }
10323
10324 // Cases that are handled by instructions that take permute immediates
10325 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10326 // selected by the instruction selector.
10327 if (V2.isUndef()) {
10328 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10329 PPC::isSplatShuffleMask(SVOp, 2) ||
10330 PPC::isSplatShuffleMask(SVOp, 4) ||
10331 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10332 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10333 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10334 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10335 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10336 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10337 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10338 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10339 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10340 (Subtarget.hasP8Altivec() && (
10341 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10342 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10343 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10344 return Op;
10345 }
10346 }
10347
10348 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10349 // and produce a fixed permutation. If any of these match, do not lower to
10350 // VPERM.
10351 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10352 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10353 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10354 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10355 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10356 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10357 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10358 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10359 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10360 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10361 (Subtarget.hasP8Altivec() && (
10362 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10363 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10364 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10365 return Op;
10366
10367 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10368 // perfect shuffle table to emit an optimal matching sequence.
10369 ArrayRef<int> PermMask = SVOp->getMask();
10370
10371 if (!DisablePerfectShuffle && !isLittleEndian) {
10372 unsigned PFIndexes[4];
10373 bool isFourElementShuffle = true;
10374 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10375 ++i) { // Element number
10376 unsigned EltNo = 8; // Start out undef.
10377 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10378 if (PermMask[i * 4 + j] < 0)
10379 continue; // Undef, ignore it.
10380
10381 unsigned ByteSource = PermMask[i * 4 + j];
10382 if ((ByteSource & 3) != j) {
10383 isFourElementShuffle = false;
10384 break;
10385 }
10386
10387 if (EltNo == 8) {
10388 EltNo = ByteSource / 4;
10389 } else if (EltNo != ByteSource / 4) {
10390 isFourElementShuffle = false;
10391 break;
10392 }
10393 }
10394 PFIndexes[i] = EltNo;
10395 }
10396
10397 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10398 // perfect shuffle vector to determine if it is cost effective to do this as
10399 // discrete instructions, or whether we should use a vperm.
10400 // For now, we skip this for little endian until such time as we have a
10401 // little-endian perfect shuffle table.
10402 if (isFourElementShuffle) {
10403 // Compute the index in the perfect shuffle table.
10404 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10405 PFIndexes[2] * 9 + PFIndexes[3];
10406
10407 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10408 unsigned Cost = (PFEntry >> 30);
10409
10410 // Determining when to avoid vperm is tricky. Many things affect the cost
10411 // of vperm, particularly how many times the perm mask needs to be
10412 // computed. For example, if the perm mask can be hoisted out of a loop or
10413 // is already used (perhaps because there are multiple permutes with the
10414 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10415 // permute mask out of the loop requires an extra register.
10416 //
10417 // As a compromise, we only emit discrete instructions if the shuffle can
10418 // be generated in 3 or fewer operations. When we have loop information
10419 // available, if this block is within a loop, we should avoid using vperm
10420 // for 3-operation perms and use a constant pool load instead.
10421 if (Cost < 3)
10422 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10423 }
10424 }
10425
10426 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10427 // vector that will get spilled to the constant pool.
10428 if (V2.isUndef()) V2 = V1;
10429
10430 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10431}
10432
10433SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10434 ArrayRef<int> PermMask, EVT VT,
10435 SDValue V1, SDValue V2) const {
10436 unsigned Opcode = PPCISD::VPERM;
10437 EVT ValType = V1.getValueType();
10438 SDLoc dl(Op);
10439 bool NeedSwap = false;
10440 bool isLittleEndian = Subtarget.isLittleEndian();
10441 bool isPPC64 = Subtarget.isPPC64();
10442
10443 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10444 (V1->hasOneUse() || V2->hasOneUse())) {
10445 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10446 "XXPERM instead\n");
10447 Opcode = PPCISD::XXPERM;
10448
10449 // The second input to XXPERM is also an output so if the second input has
10450 // multiple uses then copying is necessary, as a result we want the
10451 // single-use operand to be used as the second input to prevent copying.
10452 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10453 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10454 std::swap(V1, V2);
10455 NeedSwap = !NeedSwap;
10456 }
10457 }
10458
10459 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10460 // that it is in input element units, not in bytes. Convert now.
10461
10462 // For little endian, the order of the input vectors is reversed, and
10463 // the permutation mask is complemented with respect to 31. This is
10464 // necessary to produce proper semantics with the big-endian-based vperm
10465 // instruction.
10466 EVT EltVT = V1.getValueType().getVectorElementType();
10467 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10468
10469 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10470 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10471
10472 /*
10473 Vectors will be appended like so: [ V1 | v2 ]
10474 XXSWAPD on V1:
10475 [ A | B | C | D ] -> [ C | D | A | B ]
10476 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10477 i.e. index of A, B += 8, and index of C, D -= 8.
10478 XXSWAPD on V2:
10479 [ E | F | G | H ] -> [ G | H | E | F ]
10480 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10481 i.e. index of E, F += 8, index of G, H -= 8
10482 Swap V1 and V2:
10483 [ V1 | V2 ] -> [ V2 | V1 ]
10484 0-15 16-31 0-15 16-31
10485 i.e. index of V1 += 16, index of V2 -= 16
10486 */
10487
10488 SmallVector<SDValue, 16> ResultMask;
10489 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10490 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10491
10492 if (V1HasXXSWAPD) {
10493 if (SrcElt < 8)
10494 SrcElt += 8;
10495 else if (SrcElt < 16)
10496 SrcElt -= 8;
10497 }
10498 if (V2HasXXSWAPD) {
10499 if (SrcElt > 23)
10500 SrcElt -= 8;
10501 else if (SrcElt > 15)
10502 SrcElt += 8;
10503 }
10504 if (NeedSwap) {
10505 if (SrcElt < 16)
10506 SrcElt += 16;
10507 else
10508 SrcElt -= 16;
10509 }
10510 for (unsigned j = 0; j != BytesPerElement; ++j)
10511 if (isLittleEndian)
10512 ResultMask.push_back(
10513 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10514 else
10515 ResultMask.push_back(
10516 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10517 }
10518
10519 if (V1HasXXSWAPD) {
10520 dl = SDLoc(V1->getOperand(0));
10521 V1 = V1->getOperand(0)->getOperand(1);
10522 }
10523 if (V2HasXXSWAPD) {
10524 dl = SDLoc(V2->getOperand(0));
10525 V2 = V2->getOperand(0)->getOperand(1);
10526 }
10527
10528 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10529 if (ValType != MVT::v2f64)
10530 V1 = DAG.getBitcast(MVT::v2f64, V1);
10531 if (V2.getValueType() != MVT::v2f64)
10532 V2 = DAG.getBitcast(MVT::v2f64, V2);
10533 }
10534
10535 ShufflesHandledWithVPERM++;
10536 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10537 LLVM_DEBUG({
10538 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10539 if (Opcode == PPCISD::XXPERM) {
10540 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10541 } else {
10542 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10543 }
10544 SVOp->dump();
10545 dbgs() << "With the following permute control vector:\n";
10546 VPermMask.dump();
10547 });
10548
10549 if (Opcode == PPCISD::XXPERM)
10550 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10551
10552 // Only need to place items backwards in LE,
10553 // the mask was properly calculated.
10554 if (isLittleEndian)
10555 std::swap(V1, V2);
10556
10557 SDValue VPERMNode =
10558 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10559
10560 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10561 return VPERMNode;
10562}
10563
10564/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10565/// vector comparison. If it is, return true and fill in Opc/isDot with
10566/// information about the intrinsic.
10567static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10568 bool &isDot, const PPCSubtarget &Subtarget) {
10569 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10570 CompareOpc = -1;
10571 isDot = false;
10572 switch (IntrinsicID) {
10573 default:
10574 return false;
10575 // Comparison predicates.
10576 case Intrinsic::ppc_altivec_vcmpbfp_p:
10577 CompareOpc = 966;
10578 isDot = true;
10579 break;
10580 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10581 CompareOpc = 198;
10582 isDot = true;
10583 break;
10584 case Intrinsic::ppc_altivec_vcmpequb_p:
10585 CompareOpc = 6;
10586 isDot = true;
10587 break;
10588 case Intrinsic::ppc_altivec_vcmpequh_p:
10589 CompareOpc = 70;
10590 isDot = true;
10591 break;
10592 case Intrinsic::ppc_altivec_vcmpequw_p:
10593 CompareOpc = 134;
10594 isDot = true;
10595 break;
10596 case Intrinsic::ppc_altivec_vcmpequd_p:
10597 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10598 CompareOpc = 199;
10599 isDot = true;
10600 } else
10601 return false;
10602 break;
10603 case Intrinsic::ppc_altivec_vcmpneb_p:
10604 case Intrinsic::ppc_altivec_vcmpneh_p:
10605 case Intrinsic::ppc_altivec_vcmpnew_p:
10606 case Intrinsic::ppc_altivec_vcmpnezb_p:
10607 case Intrinsic::ppc_altivec_vcmpnezh_p:
10608 case Intrinsic::ppc_altivec_vcmpnezw_p:
10609 if (Subtarget.hasP9Altivec()) {
10610 switch (IntrinsicID) {
10611 default:
10612 llvm_unreachable("Unknown comparison intrinsic.");
10613 case Intrinsic::ppc_altivec_vcmpneb_p:
10614 CompareOpc = 7;
10615 break;
10616 case Intrinsic::ppc_altivec_vcmpneh_p:
10617 CompareOpc = 71;
10618 break;
10619 case Intrinsic::ppc_altivec_vcmpnew_p:
10620 CompareOpc = 135;
10621 break;
10622 case Intrinsic::ppc_altivec_vcmpnezb_p:
10623 CompareOpc = 263;
10624 break;
10625 case Intrinsic::ppc_altivec_vcmpnezh_p:
10626 CompareOpc = 327;
10627 break;
10628 case Intrinsic::ppc_altivec_vcmpnezw_p:
10629 CompareOpc = 391;
10630 break;
10631 }
10632 isDot = true;
10633 } else
10634 return false;
10635 break;
10636 case Intrinsic::ppc_altivec_vcmpgefp_p:
10637 CompareOpc = 454;
10638 isDot = true;
10639 break;
10640 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10641 CompareOpc = 710;
10642 isDot = true;
10643 break;
10644 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10645 CompareOpc = 774;
10646 isDot = true;
10647 break;
10648 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10649 CompareOpc = 838;
10650 isDot = true;
10651 break;
10652 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10653 CompareOpc = 902;
10654 isDot = true;
10655 break;
10656 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10657 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10658 CompareOpc = 967;
10659 isDot = true;
10660 } else
10661 return false;
10662 break;
10663 case Intrinsic::ppc_altivec_vcmpgtub_p:
10664 CompareOpc = 518;
10665 isDot = true;
10666 break;
10667 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10668 CompareOpc = 582;
10669 isDot = true;
10670 break;
10671 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10672 CompareOpc = 646;
10673 isDot = true;
10674 break;
10675 case Intrinsic::ppc_altivec_vcmpgtud_p:
10676 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10677 CompareOpc = 711;
10678 isDot = true;
10679 } else
10680 return false;
10681 break;
10682
10683 case Intrinsic::ppc_altivec_vcmpequq:
10684 case Intrinsic::ppc_altivec_vcmpgtsq:
10685 case Intrinsic::ppc_altivec_vcmpgtuq:
10686 if (!Subtarget.isISA3_1())
10687 return false;
10688 switch (IntrinsicID) {
10689 default:
10690 llvm_unreachable("Unknown comparison intrinsic.");
10691 case Intrinsic::ppc_altivec_vcmpequq:
10692 CompareOpc = 455;
10693 break;
10694 case Intrinsic::ppc_altivec_vcmpgtsq:
10695 CompareOpc = 903;
10696 break;
10697 case Intrinsic::ppc_altivec_vcmpgtuq:
10698 CompareOpc = 647;
10699 break;
10700 }
10701 break;
10702
10703 // VSX predicate comparisons use the same infrastructure
10704 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10705 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10706 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10707 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10708 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10709 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10710 if (Subtarget.hasVSX()) {
10711 switch (IntrinsicID) {
10712 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10713 CompareOpc = 99;
10714 break;
10715 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10716 CompareOpc = 115;
10717 break;
10718 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10719 CompareOpc = 107;
10720 break;
10721 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10722 CompareOpc = 67;
10723 break;
10724 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10725 CompareOpc = 83;
10726 break;
10727 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10728 CompareOpc = 75;
10729 break;
10730 }
10731 isDot = true;
10732 } else
10733 return false;
10734 break;
10735
10736 // Normal Comparisons.
10737 case Intrinsic::ppc_altivec_vcmpbfp:
10738 CompareOpc = 966;
10739 break;
10740 case Intrinsic::ppc_altivec_vcmpeqfp:
10741 CompareOpc = 198;
10742 break;
10743 case Intrinsic::ppc_altivec_vcmpequb:
10744 CompareOpc = 6;
10745 break;
10746 case Intrinsic::ppc_altivec_vcmpequh:
10747 CompareOpc = 70;
10748 break;
10749 case Intrinsic::ppc_altivec_vcmpequw:
10750 CompareOpc = 134;
10751 break;
10752 case Intrinsic::ppc_altivec_vcmpequd:
10753 if (Subtarget.hasP8Altivec())
10754 CompareOpc = 199;
10755 else
10756 return false;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpneb:
10759 case Intrinsic::ppc_altivec_vcmpneh:
10760 case Intrinsic::ppc_altivec_vcmpnew:
10761 case Intrinsic::ppc_altivec_vcmpnezb:
10762 case Intrinsic::ppc_altivec_vcmpnezh:
10763 case Intrinsic::ppc_altivec_vcmpnezw:
10764 if (Subtarget.hasP9Altivec())
10765 switch (IntrinsicID) {
10766 default:
10767 llvm_unreachable("Unknown comparison intrinsic.");
10768 case Intrinsic::ppc_altivec_vcmpneb:
10769 CompareOpc = 7;
10770 break;
10771 case Intrinsic::ppc_altivec_vcmpneh:
10772 CompareOpc = 71;
10773 break;
10774 case Intrinsic::ppc_altivec_vcmpnew:
10775 CompareOpc = 135;
10776 break;
10777 case Intrinsic::ppc_altivec_vcmpnezb:
10778 CompareOpc = 263;
10779 break;
10780 case Intrinsic::ppc_altivec_vcmpnezh:
10781 CompareOpc = 327;
10782 break;
10783 case Intrinsic::ppc_altivec_vcmpnezw:
10784 CompareOpc = 391;
10785 break;
10786 }
10787 else
10788 return false;
10789 break;
10790 case Intrinsic::ppc_altivec_vcmpgefp:
10791 CompareOpc = 454;
10792 break;
10793 case Intrinsic::ppc_altivec_vcmpgtfp:
10794 CompareOpc = 710;
10795 break;
10796 case Intrinsic::ppc_altivec_vcmpgtsb:
10797 CompareOpc = 774;
10798 break;
10799 case Intrinsic::ppc_altivec_vcmpgtsh:
10800 CompareOpc = 838;
10801 break;
10802 case Intrinsic::ppc_altivec_vcmpgtsw:
10803 CompareOpc = 902;
10804 break;
10805 case Intrinsic::ppc_altivec_vcmpgtsd:
10806 if (Subtarget.hasP8Altivec())
10807 CompareOpc = 967;
10808 else
10809 return false;
10810 break;
10811 case Intrinsic::ppc_altivec_vcmpgtub:
10812 CompareOpc = 518;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtuh:
10815 CompareOpc = 582;
10816 break;
10817 case Intrinsic::ppc_altivec_vcmpgtuw:
10818 CompareOpc = 646;
10819 break;
10820 case Intrinsic::ppc_altivec_vcmpgtud:
10821 if (Subtarget.hasP8Altivec())
10822 CompareOpc = 711;
10823 else
10824 return false;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpequq_p:
10827 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10828 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10829 if (!Subtarget.isISA3_1())
10830 return false;
10831 switch (IntrinsicID) {
10832 default:
10833 llvm_unreachable("Unknown comparison intrinsic.");
10834 case Intrinsic::ppc_altivec_vcmpequq_p:
10835 CompareOpc = 455;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10838 CompareOpc = 903;
10839 break;
10840 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10841 CompareOpc = 647;
10842 break;
10843 }
10844 isDot = true;
10845 break;
10846 }
10847 return true;
10848}
10849
10850/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10851/// lower, do it, otherwise return null.
10852SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10853 SelectionDAG &DAG) const {
10854 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10855
10856 SDLoc dl(Op);
10857
10858 switch (IntrinsicID) {
10859 case Intrinsic::thread_pointer:
10860 // Reads the thread pointer register, used for __builtin_thread_pointer.
10861 if (Subtarget.isPPC64())
10862 return DAG.getRegister(PPC::X13, MVT::i64);
10863 return DAG.getRegister(PPC::R2, MVT::i32);
10864
10865 case Intrinsic::ppc_rldimi: {
10866 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10867 SDValue Src = Op.getOperand(1);
10868 APInt Mask = Op.getConstantOperandAPInt(4);
10869 if (Mask.isZero())
10870 return Op.getOperand(2);
10871 if (Mask.isAllOnes())
10872 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10873 uint64_t SH = Op.getConstantOperandVal(3);
10874 unsigned MB = 0, ME = 0;
10875 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10876 report_fatal_error("invalid rldimi mask!");
10877 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10878 if (ME < 63 - SH) {
10879 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10880 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10881 } else if (ME > 63 - SH) {
10882 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10883 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10884 }
10885 return SDValue(
10886 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10887 {Op.getOperand(2), Src,
10888 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10889 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10890 0);
10891 }
10892
10893 case Intrinsic::ppc_rlwimi: {
10894 APInt Mask = Op.getConstantOperandAPInt(4);
10895 if (Mask.isZero())
10896 return Op.getOperand(2);
10897 if (Mask.isAllOnes())
10898 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10899 Op.getOperand(3));
10900 unsigned MB = 0, ME = 0;
10901 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10902 report_fatal_error("invalid rlwimi mask!");
10903 return SDValue(DAG.getMachineNode(
10904 PPC::RLWIMI, dl, MVT::i32,
10905 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10906 DAG.getTargetConstant(MB, dl, MVT::i32),
10907 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10908 0);
10909 }
10910
10911 case Intrinsic::ppc_rlwnm: {
10912 if (Op.getConstantOperandVal(3) == 0)
10913 return DAG.getConstant(0, dl, MVT::i32);
10914 unsigned MB = 0, ME = 0;
10915 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10916 report_fatal_error("invalid rlwnm mask!");
10917 return SDValue(
10918 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10919 {Op.getOperand(1), Op.getOperand(2),
10920 DAG.getTargetConstant(MB, dl, MVT::i32),
10921 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10922 0);
10923 }
10924
10925 case Intrinsic::ppc_mma_disassemble_acc: {
10926 if (Subtarget.isISAFuture()) {
10927 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10928 SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10929 ArrayRef(ReturnTypes, 2),
10930 Op.getOperand(1)),
10931 0);
10933 SDValue Value = SDValue(WideVec.getNode(), 0);
10934 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10935
10936 SDValue Extract;
10937 Extract = DAG.getNode(
10938 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10939 Subtarget.isLittleEndian() ? Value2 : Value,
10940 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10941 dl, getPointerTy(DAG.getDataLayout())));
10942 RetOps.push_back(Extract);
10943 Extract = DAG.getNode(
10944 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10945 Subtarget.isLittleEndian() ? Value2 : Value,
10946 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10947 dl, getPointerTy(DAG.getDataLayout())));
10948 RetOps.push_back(Extract);
10949 Extract = DAG.getNode(
10950 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10951 Subtarget.isLittleEndian() ? Value : Value2,
10952 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10953 dl, getPointerTy(DAG.getDataLayout())));
10954 RetOps.push_back(Extract);
10955 Extract = DAG.getNode(
10956 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10957 Subtarget.isLittleEndian() ? Value : Value2,
10958 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10959 dl, getPointerTy(DAG.getDataLayout())));
10960 RetOps.push_back(Extract);
10961 return DAG.getMergeValues(RetOps, dl);
10962 }
10963 [[fallthrough]];
10964 }
10965 case Intrinsic::ppc_vsx_disassemble_pair: {
10966 int NumVecs = 2;
10967 SDValue WideVec = Op.getOperand(1);
10968 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10969 NumVecs = 4;
10970 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10971 }
10973 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10974 SDValue Extract = DAG.getNode(
10975 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10976 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10977 : VecNo,
10978 dl, getPointerTy(DAG.getDataLayout())));
10979 RetOps.push_back(Extract);
10980 }
10981 return DAG.getMergeValues(RetOps, dl);
10982 }
10983
10984 case Intrinsic::ppc_mma_xxmfacc:
10985 case Intrinsic::ppc_mma_xxmtacc: {
10986 // Allow pre-isa-future subtargets to lower as normal.
10987 if (!Subtarget.isISAFuture())
10988 return SDValue();
10989 // The intrinsics for xxmtacc and xxmfacc take one argument of
10990 // type v512i1, for future cpu the corresponding wacc instruction
10991 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10992 // the need to produce the xxm[t|f]acc.
10993 SDValue WideVec = Op.getOperand(1);
10994 DAG.ReplaceAllUsesWith(Op, WideVec);
10995 return SDValue();
10996 }
10997
10998 case Intrinsic::ppc_unpack_longdouble: {
10999 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11000 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11001 "Argument of long double unpack must be 0 or 1!");
11002 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11003 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11004 Idx->getValueType(0)));
11005 }
11006
11007 case Intrinsic::ppc_compare_exp_lt:
11008 case Intrinsic::ppc_compare_exp_gt:
11009 case Intrinsic::ppc_compare_exp_eq:
11010 case Intrinsic::ppc_compare_exp_uo: {
11011 unsigned Pred;
11012 switch (IntrinsicID) {
11013 case Intrinsic::ppc_compare_exp_lt:
11014 Pred = PPC::PRED_LT;
11015 break;
11016 case Intrinsic::ppc_compare_exp_gt:
11017 Pred = PPC::PRED_GT;
11018 break;
11019 case Intrinsic::ppc_compare_exp_eq:
11020 Pred = PPC::PRED_EQ;
11021 break;
11022 case Intrinsic::ppc_compare_exp_uo:
11023 Pred = PPC::PRED_UN;
11024 break;
11025 }
11026 return SDValue(
11027 DAG.getMachineNode(
11028 PPC::SELECT_CC_I4, dl, MVT::i32,
11029 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11030 Op.getOperand(1), Op.getOperand(2)),
11031 0),
11032 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11033 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11034 0);
11035 }
11036 case Intrinsic::ppc_test_data_class: {
11037 EVT OpVT = Op.getOperand(1).getValueType();
11038 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11039 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11040 : PPC::XSTSTDCSP);
11041 return SDValue(
11042 DAG.getMachineNode(
11043 PPC::SELECT_CC_I4, dl, MVT::i32,
11044 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11045 Op.getOperand(1)),
11046 0),
11047 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11048 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11049 0);
11050 }
11051 case Intrinsic::ppc_fnmsub: {
11052 EVT VT = Op.getOperand(1).getValueType();
11053 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11054 return DAG.getNode(
11055 ISD::FNEG, dl, VT,
11056 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11057 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11058 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11059 Op.getOperand(2), Op.getOperand(3));
11060 }
11061 case Intrinsic::ppc_convert_f128_to_ppcf128:
11062 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11063 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11064 ? RTLIB::CONVERT_PPCF128_F128
11065 : RTLIB::CONVERT_F128_PPCF128;
11066 MakeLibCallOptions CallOptions;
11067 std::pair<SDValue, SDValue> Result =
11068 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11069 dl, SDValue());
11070 return Result.first;
11071 }
11072 case Intrinsic::ppc_maxfe:
11073 case Intrinsic::ppc_maxfl:
11074 case Intrinsic::ppc_maxfs:
11075 case Intrinsic::ppc_minfe:
11076 case Intrinsic::ppc_minfl:
11077 case Intrinsic::ppc_minfs: {
11078 EVT VT = Op.getValueType();
11079 assert(
11080 all_of(Op->ops().drop_front(4),
11081 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11082 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11083 (void)VT;
11085 if (IntrinsicID == Intrinsic::ppc_minfe ||
11086 IntrinsicID == Intrinsic::ppc_minfl ||
11087 IntrinsicID == Intrinsic::ppc_minfs)
11088 CC = ISD::SETLT;
11089 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11090 SDValue Res = Op.getOperand(I);
11091 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11092 Res =
11093 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11094 }
11095 return Res;
11096 }
11097 }
11098
11099 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11100 // opcode number of the comparison.
11101 int CompareOpc;
11102 bool isDot;
11103 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11104 return SDValue(); // Don't custom lower most intrinsics.
11105
11106 // If this is a non-dot comparison, make the VCMP node and we are done.
11107 if (!isDot) {
11108 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11109 Op.getOperand(1), Op.getOperand(2),
11110 DAG.getConstant(CompareOpc, dl, MVT::i32));
11111 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11112 }
11113
11114 // Create the PPCISD altivec 'dot' comparison node.
11115 SDValue Ops[] = {
11116 Op.getOperand(2), // LHS
11117 Op.getOperand(3), // RHS
11118 DAG.getConstant(CompareOpc, dl, MVT::i32)
11119 };
11120 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11121 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11122
11123 // Now that we have the comparison, emit a copy from the CR to a GPR.
11124 // This is flagged to the above dot comparison.
11125 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11126 DAG.getRegister(PPC::CR6, MVT::i32),
11127 CompNode.getValue(1));
11128
11129 // Unpack the result based on how the target uses it.
11130 unsigned BitNo; // Bit # of CR6.
11131 bool InvertBit; // Invert result?
11132 switch (Op.getConstantOperandVal(1)) {
11133 default: // Can't happen, don't crash on invalid number though.
11134 case 0: // Return the value of the EQ bit of CR6.
11135 BitNo = 0; InvertBit = false;
11136 break;
11137 case 1: // Return the inverted value of the EQ bit of CR6.
11138 BitNo = 0; InvertBit = true;
11139 break;
11140 case 2: // Return the value of the LT bit of CR6.
11141 BitNo = 2; InvertBit = false;
11142 break;
11143 case 3: // Return the inverted value of the LT bit of CR6.
11144 BitNo = 2; InvertBit = true;
11145 break;
11146 }
11147
11148 // Shift the bit into the low position.
11149 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11150 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11151 // Isolate the bit.
11152 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11153 DAG.getConstant(1, dl, MVT::i32));
11154
11155 // If we are supposed to, toggle the bit.
11156 if (InvertBit)
11157 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11158 DAG.getConstant(1, dl, MVT::i32));
11159 return Flags;
11160}
11161
11162SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11163 SelectionDAG &DAG) const {
11164 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11165 // the beginning of the argument list.
11166 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11167 SDLoc DL(Op);
11168 switch (Op.getConstantOperandVal(ArgStart)) {
11169 case Intrinsic::ppc_cfence: {
11170 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11171 SDValue Val = Op.getOperand(ArgStart + 1);
11172 EVT Ty = Val.getValueType();
11173 if (Ty == MVT::i128) {
11174 // FIXME: Testing one of two paired registers is sufficient to guarantee
11175 // ordering?
11176 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11177 }
11178 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11179 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11180 return SDValue(
11181 DAG.getMachineNode(Opcode, DL, MVT::Other,
11182 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11183 Op.getOperand(0)),
11184 0);
11185 }
11186 default:
11187 break;
11188 }
11189 return SDValue();
11190}
11191
11192// Lower scalar BSWAP64 to xxbrd.
11193SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11194 SDLoc dl(Op);
11195 if (!Subtarget.isPPC64())
11196 return Op;
11197 // MTVSRDD
11198 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11199 Op.getOperand(0));
11200 // XXBRD
11201 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11202 // MFVSRD
11203 int VectorIndex = 0;
11204 if (Subtarget.isLittleEndian())
11205 VectorIndex = 1;
11206 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11207 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11208 return Op;
11209}
11210
11211// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11212// compared to a value that is atomically loaded (atomic loads zero-extend).
11213SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11214 SelectionDAG &DAG) const {
11215 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11216 "Expecting an atomic compare-and-swap here.");
11217 SDLoc dl(Op);
11218 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11219 EVT MemVT = AtomicNode->getMemoryVT();
11220 if (MemVT.getSizeInBits() >= 32)
11221 return Op;
11222
11223 SDValue CmpOp = Op.getOperand(2);
11224 // If this is already correctly zero-extended, leave it alone.
11225 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11226 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11227 return Op;
11228
11229 // Clear the high bits of the compare operand.
11230 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11231 SDValue NewCmpOp =
11232 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11233 DAG.getConstant(MaskVal, dl, MVT::i32));
11234
11235 // Replace the existing compare operand with the properly zero-extended one.
11237 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11238 Ops.push_back(AtomicNode->getOperand(i));
11239 Ops[2] = NewCmpOp;
11240 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11241 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11242 auto NodeTy =
11244 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11245}
11246
11247SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11248 SelectionDAG &DAG) const {
11249 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11250 EVT MemVT = N->getMemoryVT();
11251 assert(MemVT.getSimpleVT() == MVT::i128 &&
11252 "Expect quadword atomic operations");
11253 SDLoc dl(N);
11254 unsigned Opc = N->getOpcode();
11255 switch (Opc) {
11256 case ISD::ATOMIC_LOAD: {
11257 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11258 // lowered to ppc instructions by pattern matching instruction selector.
11259 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11261 N->getOperand(0),
11262 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11263 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11264 Ops.push_back(N->getOperand(I));
11265 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11266 Ops, MemVT, N->getMemOperand());
11267 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11268 SDValue ValHi =
11269 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11270 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11271 DAG.getConstant(64, dl, MVT::i32));
11272 SDValue Val =
11273 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11274 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11275 {Val, LoadedVal.getValue(2)});
11276 }
11277 case ISD::ATOMIC_STORE: {
11278 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11279 // lowered to ppc instructions by pattern matching instruction selector.
11280 SDVTList Tys = DAG.getVTList(MVT::Other);
11282 N->getOperand(0),
11283 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11284 SDValue Val = N->getOperand(1);
11285 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11286 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11287 DAG.getConstant(64, dl, MVT::i32));
11288 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11289 Ops.push_back(ValLo);
11290 Ops.push_back(ValHi);
11291 Ops.push_back(N->getOperand(2));
11292 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11293 N->getMemOperand());
11294 }
11295 default:
11296 llvm_unreachable("Unexpected atomic opcode");
11297 }
11298}
11299
11301 SelectionDAG &DAG,
11302 const PPCSubtarget &Subtarget) {
11303 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11304
11305 enum DataClassMask {
11306 DC_NAN = 1 << 6,
11307 DC_NEG_INF = 1 << 4,
11308 DC_POS_INF = 1 << 5,
11309 DC_NEG_ZERO = 1 << 2,
11310 DC_POS_ZERO = 1 << 3,
11311 DC_NEG_SUBNORM = 1,
11312 DC_POS_SUBNORM = 1 << 1,
11313 };
11314
11315 EVT VT = Op.getValueType();
11316
11317 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11318 : VT == MVT::f64 ? PPC::XSTSTDCDP
11319 : PPC::XSTSTDCSP;
11320
11321 if (Mask == fcAllFlags)
11322 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11323 if (Mask == 0)
11324 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11325
11326 // When it's cheaper or necessary to test reverse flags.
11327 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11328 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11329 return DAG.getNOT(Dl, Rev, MVT::i1);
11330 }
11331
11332 // Power doesn't support testing whether a value is 'normal'. Test the rest
11333 // first, and test if it's 'not not-normal' with expected sign.
11334 if (Mask & fcNormal) {
11335 SDValue Rev(DAG.getMachineNode(
11336 TestOp, Dl, MVT::i32,
11337 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11338 DC_NEG_ZERO | DC_POS_ZERO |
11339 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11340 Dl, MVT::i32),
11341 Op),
11342 0);
11343 // Sign are stored in CR bit 0, result are in CR bit 2.
11344 SDValue Sign(
11345 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11346 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11347 0);
11348 SDValue Normal(DAG.getNOT(
11349 Dl,
11351 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11352 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11353 0),
11354 MVT::i1));
11355 if (Mask & fcPosNormal)
11356 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11357 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11358 if (Mask == fcPosNormal || Mask == fcNegNormal)
11359 return Result;
11360
11361 return DAG.getNode(
11362 ISD::OR, Dl, MVT::i1,
11363 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11364 }
11365
11366 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11367 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11368 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11369 bool IsQuiet = Mask & fcQNan;
11370 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11371
11372 // Quietness is determined by the first bit in fraction field.
11373 uint64_t QuietMask = 0;
11374 SDValue HighWord;
11375 if (VT == MVT::f128) {
11376 HighWord = DAG.getNode(
11377 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11378 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11379 QuietMask = 0x8000;
11380 } else if (VT == MVT::f64) {
11381 if (Subtarget.isPPC64()) {
11382 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11383 DAG.getBitcast(MVT::i64, Op),
11384 DAG.getConstant(1, Dl, MVT::i32));
11385 } else {
11386 SDValue Vec = DAG.getBitcast(
11387 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11388 HighWord = DAG.getNode(
11389 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11390 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11391 }
11392 QuietMask = 0x80000;
11393 } else if (VT == MVT::f32) {
11394 HighWord = DAG.getBitcast(MVT::i32, Op);
11395 QuietMask = 0x400000;
11396 }
11397 SDValue NanRes = DAG.getSetCC(
11398 Dl, MVT::i1,
11399 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11400 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11401 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11402 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11403 if (Mask == fcQNan || Mask == fcSNan)
11404 return NanRes;
11405
11406 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11407 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11408 NanRes);
11409 }
11410
11411 unsigned NativeMask = 0;
11412 if ((Mask & fcNan) == fcNan)
11413 NativeMask |= DC_NAN;
11414 if (Mask & fcNegInf)
11415 NativeMask |= DC_NEG_INF;
11416 if (Mask & fcPosInf)
11417 NativeMask |= DC_POS_INF;
11418 if (Mask & fcNegZero)
11419 NativeMask |= DC_NEG_ZERO;
11420 if (Mask & fcPosZero)
11421 NativeMask |= DC_POS_ZERO;
11422 if (Mask & fcNegSubnormal)
11423 NativeMask |= DC_NEG_SUBNORM;
11424 if (Mask & fcPosSubnormal)
11425 NativeMask |= DC_POS_SUBNORM;
11426 return SDValue(
11427 DAG.getMachineNode(
11428 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11430 TestOp, Dl, MVT::i32,
11431 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11432 0),
11433 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11434 0);
11435}
11436
11437SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11438 SelectionDAG &DAG) const {
11439 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11440 SDValue LHS = Op.getOperand(0);
11441 uint64_t RHSC = Op.getConstantOperandVal(1);
11442 SDLoc Dl(Op);
11443 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11444 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11445}
11446
11447SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11448 SelectionDAG &DAG) const {
11449 SDLoc dl(Op);
11450 // Create a stack slot that is 16-byte aligned.
11452 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11453 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11454 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11455
11456 // Store the input value into Value#0 of the stack slot.
11457 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11459 // Load it out.
11460 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11461}
11462
11463SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11464 SelectionDAG &DAG) const {
11465 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11466 "Should only be called for ISD::INSERT_VECTOR_ELT");
11467
11468 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11469
11470 EVT VT = Op.getValueType();
11471 SDLoc dl(Op);
11472 SDValue V1 = Op.getOperand(0);
11473 SDValue V2 = Op.getOperand(1);
11474
11475 if (VT == MVT::v2f64 && C)
11476 return Op;
11477
11478 if (Subtarget.hasP9Vector()) {
11479 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11480 // because on P10, it allows this specific insert_vector_elt load pattern to
11481 // utilize the refactored load and store infrastructure in order to exploit
11482 // prefixed loads.
11483 // On targets with inexpensive direct moves (Power9 and up), a
11484 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11485 // load since a single precision load will involve conversion to double
11486 // precision on the load followed by another conversion to single precision.
11487 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11488 (isa<LoadSDNode>(V2))) {
11489 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11490 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11491 SDValue InsVecElt =
11492 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11493 BitcastLoad, Op.getOperand(2));
11494 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11495 }
11496 }
11497
11498 if (Subtarget.isISA3_1()) {
11499 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11500 return SDValue();
11501 // On P10, we have legal lowering for constant and variable indices for
11502 // all vectors.
11503 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11504 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11505 return Op;
11506 }
11507
11508 // Before P10, we have legal lowering for constant indices but not for
11509 // variable ones.
11510 if (!C)
11511 return SDValue();
11512
11513 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11514 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11515 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11516 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11517 unsigned InsertAtElement = C->getZExtValue();
11518 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11519 if (Subtarget.isLittleEndian()) {
11520 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11521 }
11522 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11523 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11524 }
11525 return Op;
11526}
11527
11528SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11529 SelectionDAG &DAG) const {
11530 SDLoc dl(Op);
11531 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11532 SDValue LoadChain = LN->getChain();
11533 SDValue BasePtr = LN->getBasePtr();
11534 EVT VT = Op.getValueType();
11535
11536 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11537 return Op;
11538
11539 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11540 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11541 // 2 or 4 vsx registers.
11542 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11543 "Type unsupported without MMA");
11544 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11545 "Type unsupported without paired vector support");
11546 Align Alignment = LN->getAlign();
11548 SmallVector<SDValue, 4> LoadChains;
11549 unsigned NumVecs = VT.getSizeInBits() / 128;
11550 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11551 SDValue Load =
11552 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11553 LN->getPointerInfo().getWithOffset(Idx * 16),
11554 commonAlignment(Alignment, Idx * 16),
11555 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11556 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11557 DAG.getConstant(16, dl, BasePtr.getValueType()));
11558 Loads.push_back(Load);
11559 LoadChains.push_back(Load.getValue(1));
11560 }
11561 if (Subtarget.isLittleEndian()) {
11562 std::reverse(Loads.begin(), Loads.end());
11563 std::reverse(LoadChains.begin(), LoadChains.end());
11564 }
11565 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11566 SDValue Value =
11567 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11568 dl, VT, Loads);
11569 SDValue RetOps[] = {Value, TF};
11570 return DAG.getMergeValues(RetOps, dl);
11571}
11572
11573SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11574 SelectionDAG &DAG) const {
11575 SDLoc dl(Op);
11576 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11577 SDValue StoreChain = SN->getChain();
11578 SDValue BasePtr = SN->getBasePtr();
11579 SDValue Value = SN->getValue();
11580 SDValue Value2 = SN->getValue();
11581 EVT StoreVT = Value.getValueType();
11582
11583 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11584 return Op;
11585
11586 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11587 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11588 // underlying registers individually.
11589 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11590 "Type unsupported without MMA");
11591 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11592 "Type unsupported without paired vector support");
11593 Align Alignment = SN->getAlign();
11595 unsigned NumVecs = 2;
11596 if (StoreVT == MVT::v512i1) {
11597 if (Subtarget.isISAFuture()) {
11598 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11599 MachineSDNode *ExtNode = DAG.getMachineNode(
11600 PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11601
11602 Value = SDValue(ExtNode, 0);
11603 Value2 = SDValue(ExtNode, 1);
11604 } else
11605 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11606 NumVecs = 4;
11607 }
11608 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11609 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11610 SDValue Elt;
11611 if (Subtarget.isISAFuture()) {
11612 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11613 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11614 Idx > 1 ? Value2 : Value,
11615 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11616 } else
11617 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11618 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11619
11620 SDValue Store =
11621 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11622 SN->getPointerInfo().getWithOffset(Idx * 16),
11623 commonAlignment(Alignment, Idx * 16),
11624 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11625 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11626 DAG.getConstant(16, dl, BasePtr.getValueType()));
11627 Stores.push_back(Store);
11628 }
11629 SDValue TF = DAG.getTokenFactor(dl, Stores);
11630 return TF;
11631}
11632
11633SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11634 SDLoc dl(Op);
11635 if (Op.getValueType() == MVT::v4i32) {
11636 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11637
11638 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11639 // +16 as shift amt.
11640 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11641 SDValue RHSSwap = // = vrlw RHS, 16
11642 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11643
11644 // Shrinkify inputs to v8i16.
11645 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11646 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11647 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11648
11649 // Low parts multiplied together, generating 32-bit results (we ignore the
11650 // top parts).
11651 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11652 LHS, RHS, DAG, dl, MVT::v4i32);
11653
11654 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11655 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11656 // Shift the high parts up 16 bits.
11657 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11658 Neg16, DAG, dl);
11659 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11660 } else if (Op.getValueType() == MVT::v16i8) {
11661 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11662 bool isLittleEndian = Subtarget.isLittleEndian();
11663
11664 // Multiply the even 8-bit parts, producing 16-bit sums.
11665 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11666 LHS, RHS, DAG, dl, MVT::v8i16);
11667 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11668
11669 // Multiply the odd 8-bit parts, producing 16-bit sums.
11670 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11671 LHS, RHS, DAG, dl, MVT::v8i16);
11672 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11673
11674 // Merge the results together. Because vmuleub and vmuloub are
11675 // instructions with a big-endian bias, we must reverse the
11676 // element numbering and reverse the meaning of "odd" and "even"
11677 // when generating little endian code.
11678 int Ops[16];
11679 for (unsigned i = 0; i != 8; ++i) {
11680 if (isLittleEndian) {
11681 Ops[i*2 ] = 2*i;
11682 Ops[i*2+1] = 2*i+16;
11683 } else {
11684 Ops[i*2 ] = 2*i+1;
11685 Ops[i*2+1] = 2*i+1+16;
11686 }
11687 }
11688 if (isLittleEndian)
11689 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11690 else
11691 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11692 } else {
11693 llvm_unreachable("Unknown mul to lower!");
11694 }
11695}
11696
11697SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11698 bool IsStrict = Op->isStrictFPOpcode();
11699 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11700 !Subtarget.hasP9Vector())
11701 return SDValue();
11702
11703 return Op;
11704}
11705
11706// Custom lowering for fpext vf32 to v2f64
11707SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11708
11709 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11710 "Should only be called for ISD::FP_EXTEND");
11711
11712 // FIXME: handle extends from half precision float vectors on P9.
11713 // We only want to custom lower an extend from v2f32 to v2f64.
11714 if (Op.getValueType() != MVT::v2f64 ||
11715 Op.getOperand(0).getValueType() != MVT::v2f32)
11716 return SDValue();
11717
11718 SDLoc dl(Op);
11719 SDValue Op0 = Op.getOperand(0);
11720
11721 switch (Op0.getOpcode()) {
11722 default:
11723 return SDValue();
11725 assert(Op0.getNumOperands() == 2 &&
11726 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11727 "Node should have 2 operands with second one being a constant!");
11728
11729 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11730 return SDValue();
11731
11732 // Custom lower is only done for high or low doubleword.
11733 int Idx = Op0.getConstantOperandVal(1);
11734 if (Idx % 2 != 0)
11735 return SDValue();
11736
11737 // Since input is v4f32, at this point Idx is either 0 or 2.
11738 // Shift to get the doubleword position we want.
11739 int DWord = Idx >> 1;
11740
11741 // High and low word positions are different on little endian.
11742 if (Subtarget.isLittleEndian())
11743 DWord ^= 0x1;
11744
11745 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11746 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11747 }
11748 case ISD::FADD:
11749 case ISD::FMUL:
11750 case ISD::FSUB: {
11751 SDValue NewLoad[2];
11752 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11753 // Ensure both input are loads.
11754 SDValue LdOp = Op0.getOperand(i);
11755 if (LdOp.getOpcode() != ISD::LOAD)
11756 return SDValue();
11757 // Generate new load node.
11758 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11759 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11760 NewLoad[i] = DAG.getMemIntrinsicNode(
11761 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11762 LD->getMemoryVT(), LD->getMemOperand());
11763 }
11764 SDValue NewOp =
11765 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11766 NewLoad[1], Op0.getNode()->getFlags());
11767 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11768 DAG.getConstant(0, dl, MVT::i32));
11769 }
11770 case ISD::LOAD: {
11771 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11772 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11773 SDValue NewLd = DAG.getMemIntrinsicNode(
11774 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11775 LD->getMemoryVT(), LD->getMemOperand());
11776 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11777 DAG.getConstant(0, dl, MVT::i32));
11778 }
11779 }
11780 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11781}
11782
11783/// LowerOperation - Provide custom lowering hooks for some operations.
11784///
11786 switch (Op.getOpcode()) {
11787 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11788 case ISD::FPOW: return lowerPow(Op, DAG);
11789 case ISD::FSIN: return lowerSin(Op, DAG);
11790 case ISD::FCOS: return lowerCos(Op, DAG);
11791 case ISD::FLOG: return lowerLog(Op, DAG);
11792 case ISD::FLOG10: return lowerLog10(Op, DAG);
11793 case ISD::FEXP: return lowerExp(Op, DAG);
11794 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11795 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11796 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11797 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11798 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11799 case ISD::STRICT_FSETCC:
11801 case ISD::SETCC: return LowerSETCC(Op, DAG);
11802 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11803 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11804
11805 case ISD::INLINEASM:
11806 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11807 // Variable argument lowering.
11808 case ISD::VASTART: return LowerVASTART(Op, DAG);
11809 case ISD::VAARG: return LowerVAARG(Op, DAG);
11810 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11811
11812 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11813 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11815 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11816
11817 // Exception handling lowering.
11818 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11819 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11820 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11821
11822 case ISD::LOAD: return LowerLOAD(Op, DAG);
11823 case ISD::STORE: return LowerSTORE(Op, DAG);
11824 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11825 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11828 case ISD::FP_TO_UINT:
11829 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11832 case ISD::UINT_TO_FP:
11833 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11834 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11835
11836 // Lower 64-bit shifts.
11837 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11838 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11839 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11840
11841 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11842 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11843
11844 // Vector-related lowering.
11845 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11846 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11847 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11848 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11849 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11850 case ISD::MUL: return LowerMUL(Op, DAG);
11851 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11853 case ISD::FP_ROUND:
11854 return LowerFP_ROUND(Op, DAG);
11855 case ISD::ROTL: return LowerROTL(Op, DAG);
11856
11857 // For counter-based loop handling.
11858 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11859
11860 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11861
11862 // Frame & Return address.
11863 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11864 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11865
11867 return LowerINTRINSIC_VOID(Op, DAG);
11868 case ISD::BSWAP:
11869 return LowerBSWAP(Op, DAG);
11871 return LowerATOMIC_CMP_SWAP(Op, DAG);
11872 case ISD::ATOMIC_STORE:
11873 return LowerATOMIC_LOAD_STORE(Op, DAG);
11874 case ISD::IS_FPCLASS:
11875 return LowerIS_FPCLASS(Op, DAG);
11876 }
11877}
11878
11881 SelectionDAG &DAG) const {
11882 SDLoc dl(N);
11883 switch (N->getOpcode()) {
11884 default:
11885 llvm_unreachable("Do not know how to custom type legalize this operation!");
11886 case ISD::ATOMIC_LOAD: {
11887 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11888 Results.push_back(Res);
11889 Results.push_back(Res.getValue(1));
11890 break;
11891 }
11892 case ISD::READCYCLECOUNTER: {
11893 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11894 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11895
11896 Results.push_back(
11897 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11898 Results.push_back(RTB.getValue(2));
11899 break;
11900 }
11902 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11903 break;
11904
11905 assert(N->getValueType(0) == MVT::i1 &&
11906 "Unexpected result type for CTR decrement intrinsic");
11907 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11908 N->getValueType(0));
11909 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11910 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11911 N->getOperand(1));
11912
11913 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11914 Results.push_back(NewInt.getValue(1));
11915 break;
11916 }
11918 switch (N->getConstantOperandVal(0)) {
11919 case Intrinsic::ppc_pack_longdouble:
11920 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11921 N->getOperand(2), N->getOperand(1)));
11922 break;
11923 case Intrinsic::ppc_maxfe:
11924 case Intrinsic::ppc_minfe:
11925 case Intrinsic::ppc_fnmsub:
11926 case Intrinsic::ppc_convert_f128_to_ppcf128:
11927 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11928 break;
11929 }
11930 break;
11931 }
11932 case ISD::VAARG: {
11933 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11934 return;
11935
11936 EVT VT = N->getValueType(0);
11937
11938 if (VT == MVT::i64) {
11939 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11940
11941 Results.push_back(NewNode);
11942 Results.push_back(NewNode.getValue(1));
11943 }
11944 return;
11945 }
11948 case ISD::FP_TO_SINT:
11949 case ISD::FP_TO_UINT: {
11950 // LowerFP_TO_INT() can only handle f32 and f64.
11951 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11952 MVT::ppcf128)
11953 return;
11954 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11955 Results.push_back(LoweredValue);
11956 if (N->isStrictFPOpcode())
11957 Results.push_back(LoweredValue.getValue(1));
11958 return;
11959 }
11960 case ISD::TRUNCATE: {
11961 if (!N->getValueType(0).isVector())
11962 return;
11963 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11964 if (Lowered)
11965 Results.push_back(Lowered);
11966 return;
11967 }
11968 case ISD::FSHL:
11969 case ISD::FSHR:
11970 // Don't handle funnel shifts here.
11971 return;
11972 case ISD::BITCAST:
11973 // Don't handle bitcast here.
11974 return;
11975 case ISD::FP_EXTEND:
11976 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11977 if (Lowered)
11978 Results.push_back(Lowered);
11979 return;
11980 }
11981}
11982
11983//===----------------------------------------------------------------------===//
11984// Other Lowering Code
11985//===----------------------------------------------------------------------===//
11986
11988 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11989 Function *Func = Intrinsic::getDeclaration(M, Id);
11990 return Builder.CreateCall(Func, {});
11991}
11992
11993// The mappings for emitLeading/TrailingFence is taken from
11994// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11996 Instruction *Inst,
11997 AtomicOrdering Ord) const {
11999 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12000 if (isReleaseOrStronger(Ord))
12001 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12002 return nullptr;
12003}
12004
12006 Instruction *Inst,
12007 AtomicOrdering Ord) const {
12008 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12009 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12010 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12011 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12012 if (isa<LoadInst>(Inst))
12013 return Builder.CreateCall(
12015 Builder.GetInsertBlock()->getParent()->getParent(),
12016 Intrinsic::ppc_cfence, {Inst->getType()}),
12017 {Inst});
12018 // FIXME: Can use isync for rmw operation.
12019 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12020 }
12021 return nullptr;
12022}
12023
12026 unsigned AtomicSize,
12027 unsigned BinOpcode,
12028 unsigned CmpOpcode,
12029 unsigned CmpPred) const {
12030 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12031 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12032
12033 auto LoadMnemonic = PPC::LDARX;
12034 auto StoreMnemonic = PPC::STDCX;
12035 switch (AtomicSize) {
12036 default:
12037 llvm_unreachable("Unexpected size of atomic entity");
12038 case 1:
12039 LoadMnemonic = PPC::LBARX;
12040 StoreMnemonic = PPC::STBCX;
12041 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12042 break;
12043 case 2:
12044 LoadMnemonic = PPC::LHARX;
12045 StoreMnemonic = PPC::STHCX;
12046 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12047 break;
12048 case 4:
12049 LoadMnemonic = PPC::LWARX;
12050 StoreMnemonic = PPC::STWCX;
12051 break;
12052 case 8:
12053 LoadMnemonic = PPC::LDARX;
12054 StoreMnemonic = PPC::STDCX;
12055 break;
12056 }
12057
12058 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12059 MachineFunction *F = BB->getParent();
12061
12062 Register dest = MI.getOperand(0).getReg();
12063 Register ptrA = MI.getOperand(1).getReg();
12064 Register ptrB = MI.getOperand(2).getReg();
12065 Register incr = MI.getOperand(3).getReg();
12066 DebugLoc dl = MI.getDebugLoc();
12067
12068 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12069 MachineBasicBlock *loop2MBB =
12070 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12071 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12072 F->insert(It, loopMBB);
12073 if (CmpOpcode)
12074 F->insert(It, loop2MBB);
12075 F->insert(It, exitMBB);
12076 exitMBB->splice(exitMBB->begin(), BB,
12077 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12079
12080 MachineRegisterInfo &RegInfo = F->getRegInfo();
12081 Register TmpReg = (!BinOpcode) ? incr :
12082 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12083 : &PPC::GPRCRegClass);
12084
12085 // thisMBB:
12086 // ...
12087 // fallthrough --> loopMBB
12088 BB->addSuccessor(loopMBB);
12089
12090 // loopMBB:
12091 // l[wd]arx dest, ptr
12092 // add r0, dest, incr
12093 // st[wd]cx. r0, ptr
12094 // bne- loopMBB
12095 // fallthrough --> exitMBB
12096
12097 // For max/min...
12098 // loopMBB:
12099 // l[wd]arx dest, ptr
12100 // cmpl?[wd] dest, incr
12101 // bgt exitMBB
12102 // loop2MBB:
12103 // st[wd]cx. dest, ptr
12104 // bne- loopMBB
12105 // fallthrough --> exitMBB
12106
12107 BB = loopMBB;
12108 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12109 .addReg(ptrA).addReg(ptrB);
12110 if (BinOpcode)
12111 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12112 if (CmpOpcode) {
12113 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12114 // Signed comparisons of byte or halfword values must be sign-extended.
12115 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12116 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12117 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12118 ExtReg).addReg(dest);
12119 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12120 } else
12121 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12122
12123 BuildMI(BB, dl, TII->get(PPC::BCC))
12124 .addImm(CmpPred)
12125 .addReg(CrReg)
12126 .addMBB(exitMBB);
12127 BB->addSuccessor(loop2MBB);
12128 BB->addSuccessor(exitMBB);
12129 BB = loop2MBB;
12130 }
12131 BuildMI(BB, dl, TII->get(StoreMnemonic))
12132 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12133 BuildMI(BB, dl, TII->get(PPC::BCC))
12134 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12135 BB->addSuccessor(loopMBB);
12136 BB->addSuccessor(exitMBB);
12137
12138 // exitMBB:
12139 // ...
12140 BB = exitMBB;
12141 return BB;
12142}
12143
12145 switch(MI.getOpcode()) {
12146 default:
12147 return false;
12148 case PPC::COPY:
12149 return TII->isSignExtended(MI.getOperand(1).getReg(),
12150 &MI.getMF()->getRegInfo());
12151 case PPC::LHA:
12152 case PPC::LHA8:
12153 case PPC::LHAU:
12154 case PPC::LHAU8:
12155 case PPC::LHAUX:
12156 case PPC::LHAUX8:
12157 case PPC::LHAX:
12158 case PPC::LHAX8:
12159 case PPC::LWA:
12160 case PPC::LWAUX:
12161 case PPC::LWAX:
12162 case PPC::LWAX_32:
12163 case PPC::LWA_32:
12164 case PPC::PLHA:
12165 case PPC::PLHA8:
12166 case PPC::PLHA8pc:
12167 case PPC::PLHApc:
12168 case PPC::PLWA:
12169 case PPC::PLWA8:
12170 case PPC::PLWA8pc:
12171 case PPC::PLWApc:
12172 case PPC::EXTSB:
12173 case PPC::EXTSB8:
12174 case PPC::EXTSB8_32_64:
12175 case PPC::EXTSB8_rec:
12176 case PPC::EXTSB_rec:
12177 case PPC::EXTSH:
12178 case PPC::EXTSH8:
12179 case PPC::EXTSH8_32_64:
12180 case PPC::EXTSH8_rec:
12181 case PPC::EXTSH_rec:
12182 case PPC::EXTSW:
12183 case PPC::EXTSWSLI:
12184 case PPC::EXTSWSLI_32_64:
12185 case PPC::EXTSWSLI_32_64_rec:
12186 case PPC::EXTSWSLI_rec:
12187 case PPC::EXTSW_32:
12188 case PPC::EXTSW_32_64:
12189 case PPC::EXTSW_32_64_rec:
12190 case PPC::EXTSW_rec:
12191 case PPC::SRAW:
12192 case PPC::SRAWI:
12193 case PPC::SRAWI_rec:
12194 case PPC::SRAW_rec:
12195 return true;
12196 }
12197 return false;
12198}
12199
12202 bool is8bit, // operation
12203 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12204 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12205 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12206
12207 // If this is a signed comparison and the value being compared is not known
12208 // to be sign extended, sign extend it here.
12209 DebugLoc dl = MI.getDebugLoc();
12210 MachineFunction *F = BB->getParent();
12211 MachineRegisterInfo &RegInfo = F->getRegInfo();
12212 Register incr = MI.getOperand(3).getReg();
12213 bool IsSignExtended =
12214 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12215
12216 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12217 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12218 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12219 .addReg(MI.getOperand(3).getReg());
12220 MI.getOperand(3).setReg(ValueReg);
12221 incr = ValueReg;
12222 }
12223 // If we support part-word atomic mnemonics, just use them
12224 if (Subtarget.hasPartwordAtomics())
12225 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12226 CmpPred);
12227
12228 // In 64 bit mode we have to use 64 bits for addresses, even though the
12229 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12230 // registers without caring whether they're 32 or 64, but here we're
12231 // doing actual arithmetic on the addresses.
12232 bool is64bit = Subtarget.isPPC64();
12233 bool isLittleEndian = Subtarget.isLittleEndian();
12234 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12235
12236 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12238
12239 Register dest = MI.getOperand(0).getReg();
12240 Register ptrA = MI.getOperand(1).getReg();
12241 Register ptrB = MI.getOperand(2).getReg();
12242
12243 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12244 MachineBasicBlock *loop2MBB =
12245 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12246 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12247 F->insert(It, loopMBB);
12248 if (CmpOpcode)
12249 F->insert(It, loop2MBB);
12250 F->insert(It, exitMBB);
12251 exitMBB->splice(exitMBB->begin(), BB,
12252 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12254
12255 const TargetRegisterClass *RC =
12256 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12257 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12258
12259 Register PtrReg = RegInfo.createVirtualRegister(RC);
12260 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12261 Register ShiftReg =
12262 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12263 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12264 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12265 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12266 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12267 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12268 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12269 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12270 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12271 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12272 Register Ptr1Reg;
12273 Register TmpReg =
12274 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12275
12276 // thisMBB:
12277 // ...
12278 // fallthrough --> loopMBB
12279 BB->addSuccessor(loopMBB);
12280
12281 // The 4-byte load must be aligned, while a char or short may be
12282 // anywhere in the word. Hence all this nasty bookkeeping code.
12283 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12284 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12285 // xori shift, shift1, 24 [16]
12286 // rlwinm ptr, ptr1, 0, 0, 29
12287 // slw incr2, incr, shift
12288 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12289 // slw mask, mask2, shift
12290 // loopMBB:
12291 // lwarx tmpDest, ptr
12292 // add tmp, tmpDest, incr2
12293 // andc tmp2, tmpDest, mask
12294 // and tmp3, tmp, mask
12295 // or tmp4, tmp3, tmp2
12296 // stwcx. tmp4, ptr
12297 // bne- loopMBB
12298 // fallthrough --> exitMBB
12299 // srw SrwDest, tmpDest, shift
12300 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12301 if (ptrA != ZeroReg) {
12302 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12303 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12304 .addReg(ptrA)
12305 .addReg(ptrB);
12306 } else {
12307 Ptr1Reg = ptrB;
12308 }
12309 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12310 // mode.
12311 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12312 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12313 .addImm(3)
12314 .addImm(27)
12315 .addImm(is8bit ? 28 : 27);
12316 if (!isLittleEndian)
12317 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12318 .addReg(Shift1Reg)
12319 .addImm(is8bit ? 24 : 16);
12320 if (is64bit)
12321 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12322 .addReg(Ptr1Reg)
12323 .addImm(0)
12324 .addImm(61);
12325 else
12326 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12327 .addReg(Ptr1Reg)
12328 .addImm(0)
12329 .addImm(0)
12330 .addImm(29);
12331 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12332 if (is8bit)
12333 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12334 else {
12335 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12336 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12337 .addReg(Mask3Reg)
12338 .addImm(65535);
12339 }
12340 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12341 .addReg(Mask2Reg)
12342 .addReg(ShiftReg);
12343
12344 BB = loopMBB;
12345 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12346 .addReg(ZeroReg)
12347 .addReg(PtrReg);
12348 if (BinOpcode)
12349 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12350 .addReg(Incr2Reg)
12351 .addReg(TmpDestReg);
12352 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12353 .addReg(TmpDestReg)
12354 .addReg(MaskReg);
12355 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12356 if (CmpOpcode) {
12357 // For unsigned comparisons, we can directly compare the shifted values.
12358 // For signed comparisons we shift and sign extend.
12359 Register SReg = RegInfo.createVirtualRegister(GPRC);
12360 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12361 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12362 .addReg(TmpDestReg)
12363 .addReg(MaskReg);
12364 unsigned ValueReg = SReg;
12365 unsigned CmpReg = Incr2Reg;
12366 if (CmpOpcode == PPC::CMPW) {
12367 ValueReg = RegInfo.createVirtualRegister(GPRC);
12368 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12369 .addReg(SReg)
12370 .addReg(ShiftReg);
12371 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12372 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12373 .addReg(ValueReg);
12374 ValueReg = ValueSReg;
12375 CmpReg = incr;
12376 }
12377 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12378 BuildMI(BB, dl, TII->get(PPC::BCC))
12379 .addImm(CmpPred)
12380 .addReg(CrReg)
12381 .addMBB(exitMBB);
12382 BB->addSuccessor(loop2MBB);
12383 BB->addSuccessor(exitMBB);
12384 BB = loop2MBB;
12385 }
12386 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12387 BuildMI(BB, dl, TII->get(PPC::STWCX))
12388 .addReg(Tmp4Reg)
12389 .addReg(ZeroReg)
12390 .addReg(PtrReg);
12391 BuildMI(BB, dl, TII->get(PPC::BCC))
12393 .addReg(PPC::CR0)
12394 .addMBB(loopMBB);
12395 BB->addSuccessor(loopMBB);
12396 BB->addSuccessor(exitMBB);
12397
12398 // exitMBB:
12399 // ...
12400 BB = exitMBB;
12401 // Since the shift amount is not a constant, we need to clear
12402 // the upper bits with a separate RLWINM.
12403 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12404 .addReg(SrwDestReg)
12405 .addImm(0)
12406 .addImm(is8bit ? 24 : 16)
12407 .addImm(31);
12408 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12409 .addReg(TmpDestReg)
12410 .addReg(ShiftReg);
12411 return BB;
12412}
12413
12416 MachineBasicBlock *MBB) const {
12417 DebugLoc DL = MI.getDebugLoc();
12418 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12419 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12420
12421 MachineFunction *MF = MBB->getParent();
12423
12424 const BasicBlock *BB = MBB->getBasicBlock();
12426
12427 Register DstReg = MI.getOperand(0).getReg();
12428 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12429 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12430 Register mainDstReg = MRI.createVirtualRegister(RC);
12431 Register restoreDstReg = MRI.createVirtualRegister(RC);
12432
12433 MVT PVT = getPointerTy(MF->getDataLayout());
12434 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12435 "Invalid Pointer Size!");
12436 // For v = setjmp(buf), we generate
12437 //
12438 // thisMBB:
12439 // SjLjSetup mainMBB
12440 // bl mainMBB
12441 // v_restore = 1
12442 // b sinkMBB
12443 //
12444 // mainMBB:
12445 // buf[LabelOffset] = LR
12446 // v_main = 0
12447 //
12448 // sinkMBB:
12449 // v = phi(main, restore)
12450 //
12451
12452 MachineBasicBlock *thisMBB = MBB;
12453 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12454 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12455 MF->insert(I, mainMBB);
12456 MF->insert(I, sinkMBB);
12457
12459
12460 // Transfer the remainder of BB and its successor edges to sinkMBB.
12461 sinkMBB->splice(sinkMBB->begin(), MBB,
12462 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12464
12465 // Note that the structure of the jmp_buf used here is not compatible
12466 // with that used by libc, and is not designed to be. Specifically, it
12467 // stores only those 'reserved' registers that LLVM does not otherwise
12468 // understand how to spill. Also, by convention, by the time this
12469 // intrinsic is called, Clang has already stored the frame address in the
12470 // first slot of the buffer and stack address in the third. Following the
12471 // X86 target code, we'll store the jump address in the second slot. We also
12472 // need to save the TOC pointer (R2) to handle jumps between shared
12473 // libraries, and that will be stored in the fourth slot. The thread
12474 // identifier (R13) is not affected.
12475
12476 // thisMBB:
12477 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12478 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12479 const int64_t BPOffset = 4 * PVT.getStoreSize();
12480
12481 // Prepare IP either in reg.
12482 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12483 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12484 Register BufReg = MI.getOperand(1).getReg();
12485
12486 if (Subtarget.is64BitELFABI()) {
12488 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12489 .addReg(PPC::X2)
12490 .addImm(TOCOffset)
12491 .addReg(BufReg)
12492 .cloneMemRefs(MI);
12493 }
12494
12495 // Naked functions never have a base pointer, and so we use r1. For all
12496 // other functions, this decision must be delayed until during PEI.
12497 unsigned BaseReg;
12498 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12499 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12500 else
12501 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12502
12503 MIB = BuildMI(*thisMBB, MI, DL,
12504 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12505 .addReg(BaseReg)
12506 .addImm(BPOffset)
12507 .addReg(BufReg)
12508 .cloneMemRefs(MI);
12509
12510 // Setup
12511 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12512 MIB.addRegMask(TRI->getNoPreservedMask());
12513
12514 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12515
12516 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12517 .addMBB(mainMBB);
12518 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12519
12520 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12521 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12522
12523 // mainMBB:
12524 // mainDstReg = 0
12525 MIB =
12526 BuildMI(mainMBB, DL,
12527 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12528
12529 // Store IP
12530 if (Subtarget.isPPC64()) {
12531 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12532 .addReg(LabelReg)
12533 .addImm(LabelOffset)
12534 .addReg(BufReg);
12535 } else {
12536 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12537 .addReg(LabelReg)
12538 .addImm(LabelOffset)
12539 .addReg(BufReg);
12540 }
12541 MIB.cloneMemRefs(MI);
12542
12543 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12544 mainMBB->addSuccessor(sinkMBB);
12545
12546 // sinkMBB:
12547 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12548 TII->get(PPC::PHI), DstReg)
12549 .addReg(mainDstReg).addMBB(mainMBB)
12550 .addReg(restoreDstReg).addMBB(thisMBB);
12551
12552 MI.eraseFromParent();
12553 return sinkMBB;
12554}
12555
12558 MachineBasicBlock *MBB) const {
12559 DebugLoc DL = MI.getDebugLoc();
12560 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12561
12562 MachineFunction *MF = MBB->getParent();
12564
12565 MVT PVT = getPointerTy(MF->getDataLayout());
12566 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12567 "Invalid Pointer Size!");
12568
12569 const TargetRegisterClass *RC =
12570 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12571 Register Tmp = MRI.createVirtualRegister(RC);
12572 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12573 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12574 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12575 unsigned BP =
12576 (PVT == MVT::i64)
12577 ? PPC::X30
12578 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12579 : PPC::R30);
12580
12582
12583 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12584 const int64_t SPOffset = 2 * PVT.getStoreSize();
12585 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12586 const int64_t BPOffset = 4 * PVT.getStoreSize();
12587
12588 Register BufReg = MI.getOperand(0).getReg();
12589
12590 // Reload FP (the jumped-to function may not have had a
12591 // frame pointer, and if so, then its r31 will be restored
12592 // as necessary).
12593 if (PVT == MVT::i64) {
12594 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12595 .addImm(0)
12596 .addReg(BufReg);
12597 } else {
12598 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12599 .addImm(0)
12600 .addReg(BufReg);
12601 }
12602 MIB.cloneMemRefs(MI);
12603
12604 // Reload IP
12605 if (PVT == MVT::i64) {
12606 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12607 .addImm(LabelOffset)
12608 .addReg(BufReg);
12609 } else {
12610 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12611 .addImm(LabelOffset)
12612 .addReg(BufReg);
12613 }
12614 MIB.cloneMemRefs(MI);
12615
12616 // Reload SP
12617 if (PVT == MVT::i64) {
12618 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12619 .addImm(SPOffset)
12620 .addReg(BufReg);
12621 } else {
12622 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12623 .addImm(SPOffset)
12624 .addReg(BufReg);
12625 }
12626 MIB.cloneMemRefs(MI);
12627
12628 // Reload BP
12629 if (PVT == MVT::i64) {
12630 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12631 .addImm(BPOffset)
12632 .addReg(BufReg);
12633 } else {
12634 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12635 .addImm(BPOffset)
12636 .addReg(BufReg);
12637 }
12638 MIB.cloneMemRefs(MI);
12639
12640 // Reload TOC
12641 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12643 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12644 .addImm(TOCOffset)
12645 .addReg(BufReg)
12646 .cloneMemRefs(MI);
12647 }
12648
12649 // Jump
12650 BuildMI(*MBB, MI, DL,
12651 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12652 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12653
12654 MI.eraseFromParent();
12655 return MBB;
12656}
12657
12659 // If the function specifically requests inline stack probes, emit them.
12660 if (MF.getFunction().hasFnAttribute("probe-stack"))
12661 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12662 "inline-asm";
12663 return false;
12664}
12665
12667 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12668 unsigned StackAlign = TFI->getStackAlignment();
12669 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12670 "Unexpected stack alignment");
12671 // The default stack probe size is 4096 if the function has no
12672 // stack-probe-size attribute.
12673 const Function &Fn = MF.getFunction();
12674 unsigned StackProbeSize =
12675 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12676 // Round down to the stack alignment.
12677 StackProbeSize &= ~(StackAlign - 1);
12678 return StackProbeSize ? StackProbeSize : StackAlign;
12679}
12680
12681// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12682// into three phases. In the first phase, it uses pseudo instruction
12683// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12684// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12685// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12686// MaxCallFrameSize so that it can calculate correct data area pointer.
12689 MachineBasicBlock *MBB) const {
12690 const bool isPPC64 = Subtarget.isPPC64();
12691 MachineFunction *MF = MBB->getParent();
12692 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12693 DebugLoc DL = MI.getDebugLoc();
12694 const unsigned ProbeSize = getStackProbeSize(*MF);
12695 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12697 // The CFG of probing stack looks as
12698 // +-----+
12699 // | MBB |
12700 // +--+--+
12701 // |
12702 // +----v----+
12703 // +--->+ TestMBB +---+
12704 // | +----+----+ |
12705 // | | |
12706 // | +-----v----+ |
12707 // +---+ BlockMBB | |
12708 // +----------+ |
12709 // |
12710 // +---------+ |
12711 // | TailMBB +<--+
12712 // +---------+
12713 // In MBB, calculate previous frame pointer and final stack pointer.
12714 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12715 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12716 // TailMBB is spliced via \p MI.
12717 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12718 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12719 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12720
12722 MF->insert(MBBIter, TestMBB);
12723 MF->insert(MBBIter, BlockMBB);
12724 MF->insert(MBBIter, TailMBB);
12725
12726 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12727 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12728
12729 Register DstReg = MI.getOperand(0).getReg();
12730 Register NegSizeReg = MI.getOperand(1).getReg();
12731 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12732 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12733 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12734 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12735
12736 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12737 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12738 // NegSize.
12739 unsigned ProbeOpc;
12740 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12741 ProbeOpc =
12742 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12743 else
12744 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12745 // and NegSizeReg will be allocated in the same phyreg to avoid
12746 // redundant copy when NegSizeReg has only one use which is current MI and
12747 // will be replaced by PREPARE_PROBED_ALLOCA then.
12748 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12749 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12750 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12751 .addDef(ActualNegSizeReg)
12752 .addReg(NegSizeReg)
12753 .add(MI.getOperand(2))
12754 .add(MI.getOperand(3));
12755
12756 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12757 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12758 FinalStackPtr)
12759 .addReg(SPReg)
12760 .addReg(ActualNegSizeReg);
12761
12762 // Materialize a scratch register for update.
12763 int64_t NegProbeSize = -(int64_t)ProbeSize;
12764 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12765 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12766 if (!isInt<16>(NegProbeSize)) {
12767 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12768 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12769 .addImm(NegProbeSize >> 16);
12770 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12771 ScratchReg)
12772 .addReg(TempReg)
12773 .addImm(NegProbeSize & 0xFFFF);
12774 } else
12775 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12776 .addImm(NegProbeSize);
12777
12778 {
12779 // Probing leading residual part.
12780 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12781 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12782 .addReg(ActualNegSizeReg)
12783 .addReg(ScratchReg);
12784 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12785 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12786 .addReg(Div)
12787 .addReg(ScratchReg);
12788 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12789 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12790 .addReg(Mul)
12791 .addReg(ActualNegSizeReg);
12792 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12793 .addReg(FramePointer)
12794 .addReg(SPReg)
12795 .addReg(NegMod);
12796 }
12797
12798 {
12799 // Remaining part should be multiple of ProbeSize.
12800 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12801 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12802 .addReg(SPReg)
12803 .addReg(FinalStackPtr);
12804 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12806 .addReg(CmpResult)
12807 .addMBB(TailMBB);
12808 TestMBB->addSuccessor(BlockMBB);
12809 TestMBB->addSuccessor(TailMBB);
12810 }
12811
12812 {
12813 // Touch the block.
12814 // |P...|P...|P...
12815 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12816 .addReg(FramePointer)
12817 .addReg(SPReg)
12818 .addReg(ScratchReg);
12819 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12820 BlockMBB->addSuccessor(TestMBB);
12821 }
12822
12823 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12824 // DYNAREAOFFSET pseudo instruction to get the future result.
12825 Register MaxCallFrameSizeReg =
12826 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12827 BuildMI(TailMBB, DL,
12828 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12829 MaxCallFrameSizeReg)
12830 .add(MI.getOperand(2))
12831 .add(MI.getOperand(3));
12832 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12833 .addReg(SPReg)
12834 .addReg(MaxCallFrameSizeReg);
12835
12836 // Splice instructions after MI to TailMBB.
12837 TailMBB->splice(TailMBB->end(), MBB,
12838 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12840 MBB->addSuccessor(TestMBB);
12841
12842 // Delete the pseudo instruction.
12843 MI.eraseFromParent();
12844
12845 ++NumDynamicAllocaProbed;
12846 return TailMBB;
12847}
12848
12850 switch (MI.getOpcode()) {
12851 case PPC::SELECT_CC_I4:
12852 case PPC::SELECT_CC_I8:
12853 case PPC::SELECT_CC_F4:
12854 case PPC::SELECT_CC_F8:
12855 case PPC::SELECT_CC_F16:
12856 case PPC::SELECT_CC_VRRC:
12857 case PPC::SELECT_CC_VSFRC:
12858 case PPC::SELECT_CC_VSSRC:
12859 case PPC::SELECT_CC_VSRC:
12860 case PPC::SELECT_CC_SPE4:
12861 case PPC::SELECT_CC_SPE:
12862 return true;
12863 default:
12864 return false;
12865 }
12866}
12867
12868static bool IsSelect(MachineInstr &MI) {
12869 switch (MI.getOpcode()) {
12870 case PPC::SELECT_I4:
12871 case PPC::SELECT_I8:
12872 case PPC::SELECT_F4:
12873 case PPC::SELECT_F8:
12874 case PPC::SELECT_F16:
12875 case PPC::SELECT_SPE:
12876 case PPC::SELECT_SPE4:
12877 case PPC::SELECT_VRRC:
12878 case PPC::SELECT_VSFRC:
12879 case PPC::SELECT_VSSRC:
12880 case PPC::SELECT_VSRC:
12881 return true;
12882 default:
12883 return false;
12884 }
12885}
12886
12889 MachineBasicBlock *BB) const {
12890 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12891 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12892 if (Subtarget.is64BitELFABI() &&
12893 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12894 !Subtarget.isUsingPCRelativeCalls()) {
12895 // Call lowering should have added an r2 operand to indicate a dependence
12896 // on the TOC base pointer value. It can't however, because there is no
12897 // way to mark the dependence as implicit there, and so the stackmap code
12898 // will confuse it with a regular operand. Instead, add the dependence
12899 // here.
12900 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12901 }
12902
12903 return emitPatchPoint(MI, BB);
12904 }
12905
12906 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12907 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12908 return emitEHSjLjSetJmp(MI, BB);
12909 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12910 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12911 return emitEHSjLjLongJmp(MI, BB);
12912 }
12913
12914 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12915
12916 // To "insert" these instructions we actually have to insert their
12917 // control-flow patterns.
12918 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12920
12921 MachineFunction *F = BB->getParent();
12922 MachineRegisterInfo &MRI = F->getRegInfo();
12923
12924 if (Subtarget.hasISEL() &&
12925 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12926 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12927 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12929 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12930 MI.getOpcode() == PPC::SELECT_CC_I8)
12931 Cond.push_back(MI.getOperand(4));
12932 else
12934 Cond.push_back(MI.getOperand(1));
12935
12936 DebugLoc dl = MI.getDebugLoc();
12937 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12938 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12939 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12940 // The incoming instruction knows the destination vreg to set, the
12941 // condition code register to branch on, the true/false values to
12942 // select between, and a branch opcode to use.
12943
12944 // thisMBB:
12945 // ...
12946 // TrueVal = ...
12947 // cmpTY ccX, r1, r2
12948 // bCC sinkMBB
12949 // fallthrough --> copy0MBB
12950 MachineBasicBlock *thisMBB = BB;
12951 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12952 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12953 DebugLoc dl = MI.getDebugLoc();
12954 F->insert(It, copy0MBB);
12955 F->insert(It, sinkMBB);
12956
12957 // Set the call frame size on entry to the new basic blocks.
12958 // See https://reviews.llvm.org/D156113.
12959 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12960 copy0MBB->setCallFrameSize(CallFrameSize);
12961 sinkMBB->setCallFrameSize(CallFrameSize);
12962
12963 // Transfer the remainder of BB and its successor edges to sinkMBB.
12964 sinkMBB->splice(sinkMBB->begin(), BB,
12965 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12967
12968 // Next, add the true and fallthrough blocks as its successors.
12969 BB->addSuccessor(copy0MBB);
12970 BB->addSuccessor(sinkMBB);
12971
12972 if (IsSelect(MI)) {
12973 BuildMI(BB, dl, TII->get(PPC::BC))
12974 .addReg(MI.getOperand(1).getReg())
12975 .addMBB(sinkMBB);
12976 } else {
12977 unsigned SelectPred = MI.getOperand(4).getImm();
12978 BuildMI(BB, dl, TII->get(PPC::BCC))
12979 .addImm(SelectPred)
12980 .addReg(MI.getOperand(1).getReg())
12981 .addMBB(sinkMBB);
12982 }
12983
12984 // copy0MBB:
12985 // %FalseValue = ...
12986 // # fallthrough to sinkMBB
12987 BB = copy0MBB;
12988
12989 // Update machine-CFG edges
12990 BB->addSuccessor(sinkMBB);
12991
12992 // sinkMBB:
12993 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12994 // ...
12995 BB = sinkMBB;
12996 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12997 .addReg(MI.getOperand(3).getReg())
12998 .addMBB(copy0MBB)
12999 .addReg(MI.getOperand(2).getReg())
13000 .addMBB(thisMBB);
13001 } else if (MI.getOpcode() == PPC::ReadTB) {
13002 // To read the 64-bit time-base register on a 32-bit target, we read the
13003 // two halves. Should the counter have wrapped while it was being read, we
13004 // need to try again.
13005 // ...
13006 // readLoop:
13007 // mfspr Rx,TBU # load from TBU
13008 // mfspr Ry,TB # load from TB
13009 // mfspr Rz,TBU # load from TBU
13010 // cmpw crX,Rx,Rz # check if 'old'='new'
13011 // bne readLoop # branch if they're not equal
13012 // ...
13013
13014 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13015 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13016 DebugLoc dl = MI.getDebugLoc();
13017 F->insert(It, readMBB);
13018 F->insert(It, sinkMBB);
13019
13020 // Transfer the remainder of BB and its successor edges to sinkMBB.
13021 sinkMBB->splice(sinkMBB->begin(), BB,
13022 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13024
13025 BB->addSuccessor(readMBB);
13026 BB = readMBB;
13027
13028 MachineRegisterInfo &RegInfo = F->getRegInfo();
13029 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13030 Register LoReg = MI.getOperand(0).getReg();
13031 Register HiReg = MI.getOperand(1).getReg();
13032
13033 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13034 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13035 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13036
13037 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13038
13039 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13040 .addReg(HiReg)
13041 .addReg(ReadAgainReg);
13042 BuildMI(BB, dl, TII->get(PPC::BCC))
13044 .addReg(CmpReg)
13045 .addMBB(readMBB);
13046
13047 BB->addSuccessor(readMBB);
13048 BB->addSuccessor(sinkMBB);
13049 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13050 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13051 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13052 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13053 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13054 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13055 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13056 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13057
13058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13059 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13060 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13061 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13062 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13063 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13064 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13065 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13066
13067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13068 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13069 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13070 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13071 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13072 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13073 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13074 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13075
13076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13077 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13078 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13079 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13080 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13081 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13082 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13083 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13084
13085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13086 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13087 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13088 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13089 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13090 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13091 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13092 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13093
13094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13095 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13096 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13097 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13098 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13099 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13100 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13101 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13102
13103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13104 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13105 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13106 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13107 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13108 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13109 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13110 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13111
13112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13113 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13114 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13115 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13116 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13117 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13118 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13119 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13120
13121 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13122 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13123 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13124 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13125 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13126 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13127 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13128 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13129
13130 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13131 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13132 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13133 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13134 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13135 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13136 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13137 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13138
13139 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13140 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13141 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13142 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13143 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13144 BB = EmitAtomicBinary(MI, BB, 4, 0);
13145 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13146 BB = EmitAtomicBinary(MI, BB, 8, 0);
13147 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13148 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13149 (Subtarget.hasPartwordAtomics() &&
13150 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13151 (Subtarget.hasPartwordAtomics() &&
13152 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13153 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13154
13155 auto LoadMnemonic = PPC::LDARX;
13156 auto StoreMnemonic = PPC::STDCX;
13157 switch (MI.getOpcode()) {
13158 default:
13159 llvm_unreachable("Compare and swap of unknown size");
13160 case PPC::ATOMIC_CMP_SWAP_I8:
13161 LoadMnemonic = PPC::LBARX;
13162 StoreMnemonic = PPC::STBCX;
13163 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13164 break;
13165 case PPC::ATOMIC_CMP_SWAP_I16:
13166 LoadMnemonic = PPC::LHARX;
13167 StoreMnemonic = PPC::STHCX;
13168 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13169 break;
13170 case PPC::ATOMIC_CMP_SWAP_I32:
13171 LoadMnemonic = PPC::LWARX;
13172 StoreMnemonic = PPC::STWCX;
13173 break;
13174 case PPC::ATOMIC_CMP_SWAP_I64:
13175 LoadMnemonic = PPC::LDARX;
13176 StoreMnemonic = PPC::STDCX;
13177 break;
13178 }
13179 MachineRegisterInfo &RegInfo = F->getRegInfo();
13180 Register dest = MI.getOperand(0).getReg();
13181 Register ptrA = MI.getOperand(1).getReg();
13182 Register ptrB = MI.getOperand(2).getReg();
13183 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13184 Register oldval = MI.getOperand(3).getReg();
13185 Register newval = MI.getOperand(4).getReg();
13186 DebugLoc dl = MI.getDebugLoc();
13187
13188 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13189 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13190 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13191 F->insert(It, loop1MBB);
13192 F->insert(It, loop2MBB);
13193 F->insert(It, exitMBB);
13194 exitMBB->splice(exitMBB->begin(), BB,
13195 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13197
13198 // thisMBB:
13199 // ...
13200 // fallthrough --> loopMBB
13201 BB->addSuccessor(loop1MBB);
13202
13203 // loop1MBB:
13204 // l[bhwd]arx dest, ptr
13205 // cmp[wd] dest, oldval
13206 // bne- exitBB
13207 // loop2MBB:
13208 // st[bhwd]cx. newval, ptr
13209 // bne- loopMBB
13210 // b exitBB
13211 // exitBB:
13212 BB = loop1MBB;
13213 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13214 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13215 .addReg(dest)
13216 .addReg(oldval);
13217 BuildMI(BB, dl, TII->get(PPC::BCC))
13219 .addReg(CrReg)
13220 .addMBB(exitMBB);
13221 BB->addSuccessor(loop2MBB);
13222 BB->addSuccessor(exitMBB);
13223
13224 BB = loop2MBB;
13225 BuildMI(BB, dl, TII->get(StoreMnemonic))
13226 .addReg(newval)
13227 .addReg(ptrA)
13228 .addReg(ptrB);
13229 BuildMI(BB, dl, TII->get(PPC::BCC))
13231 .addReg(PPC::CR0)
13232 .addMBB(loop1MBB);
13233 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13234 BB->addSuccessor(loop1MBB);
13235 BB->addSuccessor(exitMBB);
13236
13237 // exitMBB:
13238 // ...
13239 BB = exitMBB;
13240 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13241 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13242 // We must use 64-bit registers for addresses when targeting 64-bit,
13243 // since we're actually doing arithmetic on them. Other registers
13244 // can be 32-bit.
13245 bool is64bit = Subtarget.isPPC64();
13246 bool isLittleEndian = Subtarget.isLittleEndian();
13247 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13248
13249 Register dest = MI.getOperand(0).getReg();
13250 Register ptrA = MI.getOperand(1).getReg();
13251 Register ptrB = MI.getOperand(2).getReg();
13252 Register oldval = MI.getOperand(3).getReg();
13253 Register newval = MI.getOperand(4).getReg();
13254 DebugLoc dl = MI.getDebugLoc();
13255
13256 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13257 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13258 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13259 F->insert(It, loop1MBB);
13260 F->insert(It, loop2MBB);
13261 F->insert(It, exitMBB);
13262 exitMBB->splice(exitMBB->begin(), BB,
13263 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13265
13266 MachineRegisterInfo &RegInfo = F->getRegInfo();
13267 const TargetRegisterClass *RC =
13268 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13269 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13270
13271 Register PtrReg = RegInfo.createVirtualRegister(RC);
13272 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13273 Register ShiftReg =
13274 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13275 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13276 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13277 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13278 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13279 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13280 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13281 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13282 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13283 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13284 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13285 Register Ptr1Reg;
13286 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13287 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13288 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13289 // thisMBB:
13290 // ...
13291 // fallthrough --> loopMBB
13292 BB->addSuccessor(loop1MBB);
13293
13294 // The 4-byte load must be aligned, while a char or short may be
13295 // anywhere in the word. Hence all this nasty bookkeeping code.
13296 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13297 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13298 // xori shift, shift1, 24 [16]
13299 // rlwinm ptr, ptr1, 0, 0, 29
13300 // slw newval2, newval, shift
13301 // slw oldval2, oldval,shift
13302 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13303 // slw mask, mask2, shift
13304 // and newval3, newval2, mask
13305 // and oldval3, oldval2, mask
13306 // loop1MBB:
13307 // lwarx tmpDest, ptr
13308 // and tmp, tmpDest, mask
13309 // cmpw tmp, oldval3
13310 // bne- exitBB
13311 // loop2MBB:
13312 // andc tmp2, tmpDest, mask
13313 // or tmp4, tmp2, newval3
13314 // stwcx. tmp4, ptr
13315 // bne- loop1MBB
13316 // b exitBB
13317 // exitBB:
13318 // srw dest, tmpDest, shift
13319 if (ptrA != ZeroReg) {
13320 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13321 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13322 .addReg(ptrA)
13323 .addReg(ptrB);
13324 } else {
13325 Ptr1Reg = ptrB;
13326 }
13327
13328 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13329 // mode.
13330 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13331 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13332 .addImm(3)
13333 .addImm(27)
13334 .addImm(is8bit ? 28 : 27);
13335 if (!isLittleEndian)
13336 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13337 .addReg(Shift1Reg)
13338 .addImm(is8bit ? 24 : 16);
13339 if (is64bit)
13340 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13341 .addReg(Ptr1Reg)
13342 .addImm(0)
13343 .addImm(61);
13344 else
13345 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13346 .addReg(Ptr1Reg)
13347 .addImm(0)
13348 .addImm(0)
13349 .addImm(29);
13350 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13351 .addReg(newval)
13352 .addReg(ShiftReg);
13353 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13354 .addReg(oldval)
13355 .addReg(ShiftReg);
13356 if (is8bit)
13357 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13358 else {
13359 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13360 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13361 .addReg(Mask3Reg)
13362 .addImm(65535);
13363 }
13364 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13365 .addReg(Mask2Reg)
13366 .addReg(ShiftReg);
13367 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13368 .addReg(NewVal2Reg)
13369 .addReg(MaskReg);
13370 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13371 .addReg(OldVal2Reg)
13372 .addReg(MaskReg);
13373
13374 BB = loop1MBB;
13375 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13376 .addReg(ZeroReg)
13377 .addReg(PtrReg);
13378 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13379 .addReg(TmpDestReg)
13380 .addReg(MaskReg);
13381 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13382 .addReg(TmpReg)
13383 .addReg(OldVal3Reg);
13384 BuildMI(BB, dl, TII->get(PPC::BCC))
13386 .addReg(CrReg)
13387 .addMBB(exitMBB);
13388 BB->addSuccessor(loop2MBB);
13389 BB->addSuccessor(exitMBB);
13390
13391 BB = loop2MBB;
13392 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13393 .addReg(TmpDestReg)
13394 .addReg(MaskReg);
13395 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13396 .addReg(Tmp2Reg)
13397 .addReg(NewVal3Reg);
13398 BuildMI(BB, dl, TII->get(PPC::STWCX))
13399 .addReg(Tmp4Reg)
13400 .addReg(ZeroReg)
13401 .addReg(PtrReg);
13402 BuildMI(BB, dl, TII->get(PPC::BCC))
13404 .addReg(PPC::CR0)
13405 .addMBB(loop1MBB);
13406 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13407 BB->addSuccessor(loop1MBB);
13408 BB->addSuccessor(exitMBB);
13409
13410 // exitMBB:
13411 // ...
13412 BB = exitMBB;
13413 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13414 .addReg(TmpReg)
13415 .addReg(ShiftReg);
13416 } else if (MI.getOpcode() == PPC::FADDrtz) {
13417 // This pseudo performs an FADD with rounding mode temporarily forced
13418 // to round-to-zero. We emit this via custom inserter since the FPSCR
13419 // is not modeled at the SelectionDAG level.
13420 Register Dest = MI.getOperand(0).getReg();
13421 Register Src1 = MI.getOperand(1).getReg();
13422 Register Src2 = MI.getOperand(2).getReg();
13423 DebugLoc dl = MI.getDebugLoc();
13424
13425 MachineRegisterInfo &RegInfo = F->getRegInfo();
13426 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13427
13428 // Save FPSCR value.
13429 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13430
13431 // Set rounding mode to round-to-zero.
13432 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13433 .addImm(31)
13435
13436 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13437 .addImm(30)
13439
13440 // Perform addition.
13441 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13442 .addReg(Src1)
13443 .addReg(Src2);
13444 if (MI.getFlag(MachineInstr::NoFPExcept))
13446
13447 // Restore FPSCR value.
13448 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13449 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13450 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13451 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13452 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13453 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13454 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13455 ? PPC::ANDI8_rec
13456 : PPC::ANDI_rec;
13457 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13458 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13459
13460 MachineRegisterInfo &RegInfo = F->getRegInfo();
13461 Register Dest = RegInfo.createVirtualRegister(
13462 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13463
13464 DebugLoc Dl = MI.getDebugLoc();
13465 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13466 .addReg(MI.getOperand(1).getReg())
13467 .addImm(1);
13468 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13469 MI.getOperand(0).getReg())
13470 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13471 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13472 DebugLoc Dl = MI.getDebugLoc();
13473 MachineRegisterInfo &RegInfo = F->getRegInfo();
13474 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13475 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13476 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13477 MI.getOperand(0).getReg())
13478 .addReg(CRReg);
13479 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13480 DebugLoc Dl = MI.getDebugLoc();
13481 unsigned Imm = MI.getOperand(1).getImm();
13482 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13483 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13484 MI.getOperand(0).getReg())
13485 .addReg(PPC::CR0EQ);
13486 } else if (MI.getOpcode() == PPC::SETRNDi) {
13487 DebugLoc dl = MI.getDebugLoc();
13488 Register OldFPSCRReg = MI.getOperand(0).getReg();
13489
13490 // Save FPSCR value.
13491 if (MRI.use_empty(OldFPSCRReg))
13492 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13493 else
13494 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13495
13496 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13497 // the following settings:
13498 // 00 Round to nearest
13499 // 01 Round to 0
13500 // 10 Round to +inf
13501 // 11 Round to -inf
13502
13503 // When the operand is immediate, using the two least significant bits of
13504 // the immediate to set the bits 62:63 of FPSCR.
13505 unsigned Mode = MI.getOperand(1).getImm();
13506 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13507 .addImm(31)
13509
13510 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13511 .addImm(30)
13513 } else if (MI.getOpcode() == PPC::SETRND) {
13514 DebugLoc dl = MI.getDebugLoc();
13515
13516 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13517 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13518 // If the target doesn't have DirectMove, we should use stack to do the
13519 // conversion, because the target doesn't have the instructions like mtvsrd
13520 // or mfvsrd to do this conversion directly.
13521 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13522 if (Subtarget.hasDirectMove()) {
13523 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13524 .addReg(SrcReg);
13525 } else {
13526 // Use stack to do the register copy.
13527 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13528 MachineRegisterInfo &RegInfo = F->getRegInfo();
13529 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13530 if (RC == &PPC::F8RCRegClass) {
13531 // Copy register from F8RCRegClass to G8RCRegclass.
13532 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13533 "Unsupported RegClass.");
13534
13535 StoreOp = PPC::STFD;
13536 LoadOp = PPC::LD;
13537 } else {
13538 // Copy register from G8RCRegClass to F8RCRegclass.
13539 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13540 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13541 "Unsupported RegClass.");
13542 }
13543
13544 MachineFrameInfo &MFI = F->getFrameInfo();
13545 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13546
13547 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13548 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13550 MFI.getObjectAlign(FrameIdx));
13551
13552 // Store the SrcReg into the stack.
13553 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13554 .addReg(SrcReg)
13555 .addImm(0)
13556 .addFrameIndex(FrameIdx)
13557 .addMemOperand(MMOStore);
13558
13559 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13560 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13562 MFI.getObjectAlign(FrameIdx));
13563
13564 // Load from the stack where SrcReg is stored, and save to DestReg,
13565 // so we have done the RegClass conversion from RegClass::SrcReg to
13566 // RegClass::DestReg.
13567 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13568 .addImm(0)
13569 .addFrameIndex(FrameIdx)
13570 .addMemOperand(MMOLoad);
13571 }
13572 };
13573
13574 Register OldFPSCRReg = MI.getOperand(0).getReg();
13575
13576 // Save FPSCR value.
13577 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13578
13579 // When the operand is gprc register, use two least significant bits of the
13580 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13581 //
13582 // copy OldFPSCRTmpReg, OldFPSCRReg
13583 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13584 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13585 // copy NewFPSCRReg, NewFPSCRTmpReg
13586 // mtfsf 255, NewFPSCRReg
13587 MachineOperand SrcOp = MI.getOperand(1);
13588 MachineRegisterInfo &RegInfo = F->getRegInfo();
13589 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13590
13591 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13592
13593 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13594 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13595
13596 // The first operand of INSERT_SUBREG should be a register which has
13597 // subregisters, we only care about its RegClass, so we should use an
13598 // IMPLICIT_DEF register.
13599 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13600 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13601 .addReg(ImDefReg)
13602 .add(SrcOp)
13603 .addImm(1);
13604
13605 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13606 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13607 .addReg(OldFPSCRTmpReg)
13608 .addReg(ExtSrcReg)
13609 .addImm(0)
13610 .addImm(62);
13611
13612 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13613 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13614
13615 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13616 // bits of FPSCR.
13617 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13618 .addImm(255)
13619 .addReg(NewFPSCRReg)
13620 .addImm(0)
13621 .addImm(0);
13622 } else if (MI.getOpcode() == PPC::SETFLM) {
13623 DebugLoc Dl = MI.getDebugLoc();
13624
13625 // Result of setflm is previous FPSCR content, so we need to save it first.
13626 Register OldFPSCRReg = MI.getOperand(0).getReg();
13627 if (MRI.use_empty(OldFPSCRReg))
13628 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13629 else
13630 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13631
13632 // Put bits in 32:63 to FPSCR.
13633 Register NewFPSCRReg = MI.getOperand(1).getReg();
13634 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13635 .addImm(255)
13636 .addReg(NewFPSCRReg)
13637 .addImm(0)
13638 .addImm(0);
13639 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13640 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13641 return emitProbedAlloca(MI, BB);
13642 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13643 DebugLoc DL = MI.getDebugLoc();
13644 Register Src = MI.getOperand(2).getReg();
13645 Register Lo = MI.getOperand(0).getReg();
13646 Register Hi = MI.getOperand(1).getReg();
13647 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13648 .addDef(Lo)
13649 .addUse(Src, 0, PPC::sub_gp8_x1);
13650 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13651 .addDef(Hi)
13652 .addUse(Src, 0, PPC::sub_gp8_x0);
13653 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13654 MI.getOpcode() == PPC::STQX_PSEUDO) {
13655 DebugLoc DL = MI.getDebugLoc();
13656 // Ptr is used as the ptr_rc_no_r0 part
13657 // of LQ/STQ's memory operand and adding result of RA and RB,
13658 // so it has to be g8rc_and_g8rc_nox0.
13659 Register Ptr =
13660 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13661 Register Val = MI.getOperand(0).getReg();
13662 Register RA = MI.getOperand(1).getReg();
13663 Register RB = MI.getOperand(2).getReg();
13664 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13665 BuildMI(*BB, MI, DL,
13666 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13667 : TII->get(PPC::STQ))
13668 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13669 .addImm(0)
13670 .addReg(Ptr);
13671 } else {
13672 llvm_unreachable("Unexpected instr type to insert");
13673 }
13674
13675 MI.eraseFromParent(); // The pseudo instruction is gone now.
13676 return BB;
13677}
13678
13679//===----------------------------------------------------------------------===//
13680// Target Optimization Hooks
13681//===----------------------------------------------------------------------===//
13682
13683static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13684 // For the estimates, convergence is quadratic, so we essentially double the
13685 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13686 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13687 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13688 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13689 if (VT.getScalarType() == MVT::f64)
13690 RefinementSteps++;
13691 return RefinementSteps;
13692}
13693
13694SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13695 const DenormalMode &Mode) const {
13696 // We only have VSX Vector Test for software Square Root.
13697 EVT VT = Op.getValueType();
13698 if (!isTypeLegal(MVT::i1) ||
13699 (VT != MVT::f64 &&
13700 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13701 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13702
13703 SDLoc DL(Op);
13704 // The output register of FTSQRT is CR field.
13705 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13706 // ftsqrt BF,FRB
13707 // Let e_b be the unbiased exponent of the double-precision
13708 // floating-point operand in register FRB.
13709 // fe_flag is set to 1 if either of the following conditions occurs.
13710 // - The double-precision floating-point operand in register FRB is a zero,
13711 // a NaN, or an infinity, or a negative value.
13712 // - e_b is less than or equal to -970.
13713 // Otherwise fe_flag is set to 0.
13714 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13715 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13716 // exponent is less than -970)
13717 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13718 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13719 FTSQRT, SRIdxVal),
13720 0);
13721}
13722
13723SDValue
13724PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13725 SelectionDAG &DAG) const {
13726 // We only have VSX Vector Square Root.
13727 EVT VT = Op.getValueType();
13728 if (VT != MVT::f64 &&
13729 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13731
13732 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13733}
13734
13735SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13736 int Enabled, int &RefinementSteps,
13737 bool &UseOneConstNR,
13738 bool Reciprocal) const {
13739 EVT VT = Operand.getValueType();
13740 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13741 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13742 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13743 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13744 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13745 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13746
13747 // The Newton-Raphson computation with a single constant does not provide
13748 // enough accuracy on some CPUs.
13749 UseOneConstNR = !Subtarget.needsTwoConstNR();
13750 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13751 }
13752 return SDValue();
13753}
13754
13755SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13756 int Enabled,
13757 int &RefinementSteps) const {
13758 EVT VT = Operand.getValueType();
13759 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13760 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13761 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13762 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13763 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13764 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13765 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13766 }
13767 return SDValue();
13768}
13769
13770unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13771 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13772 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13773 // enabled for division), this functionality is redundant with the default
13774 // combiner logic (once the division -> reciprocal/multiply transformation
13775 // has taken place). As a result, this matters more for older cores than for
13776 // newer ones.
13777
13778 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13779 // reciprocal if there are two or more FDIVs (for embedded cores with only
13780 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13781 switch (Subtarget.getCPUDirective()) {
13782 default:
13783 return 3;
13784 case PPC::DIR_440:
13785 case PPC::DIR_A2:
13786 case PPC::DIR_E500:
13787 case PPC::DIR_E500mc:
13788 case PPC::DIR_E5500:
13789 return 2;
13790 }
13791}
13792
13793// isConsecutiveLSLoc needs to work even if all adds have not yet been
13794// collapsed, and so we need to look through chains of them.
13796 int64_t& Offset, SelectionDAG &DAG) {
13797 if (DAG.isBaseWithConstantOffset(Loc)) {
13798 Base = Loc.getOperand(0);
13799 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13800
13801 // The base might itself be a base plus an offset, and if so, accumulate
13802 // that as well.
13804 }
13805}
13806
13808 unsigned Bytes, int Dist,
13809 SelectionDAG &DAG) {
13810 if (VT.getSizeInBits() / 8 != Bytes)
13811 return false;
13812
13813 SDValue BaseLoc = Base->getBasePtr();
13814 if (Loc.getOpcode() == ISD::FrameIndex) {
13815 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13816 return false;
13818 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13819 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13820 int FS = MFI.getObjectSize(FI);
13821 int BFS = MFI.getObjectSize(BFI);
13822 if (FS != BFS || FS != (int)Bytes) return false;
13823 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13824 }
13825
13826 SDValue Base1 = Loc, Base2 = BaseLoc;
13827 int64_t Offset1 = 0, Offset2 = 0;
13828 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13829 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13830 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13831 return true;
13832
13833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13834 const GlobalValue *GV1 = nullptr;
13835 const GlobalValue *GV2 = nullptr;
13836 Offset1 = 0;
13837 Offset2 = 0;
13838 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13839 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13840 if (isGA1 && isGA2 && GV1 == GV2)
13841 return Offset1 == (Offset2 + Dist*Bytes);
13842 return false;
13843}
13844
13845// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13846// not enforce equality of the chain operands.
13848 unsigned Bytes, int Dist,
13849 SelectionDAG &DAG) {
13850 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13851 EVT VT = LS->getMemoryVT();
13852 SDValue Loc = LS->getBasePtr();
13853 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13854 }
13855
13856 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13857 EVT VT;
13858 switch (N->getConstantOperandVal(1)) {
13859 default: return false;
13860 case Intrinsic::ppc_altivec_lvx:
13861 case Intrinsic::ppc_altivec_lvxl:
13862 case Intrinsic::ppc_vsx_lxvw4x:
13863 case Intrinsic::ppc_vsx_lxvw4x_be:
13864 VT = MVT::v4i32;
13865 break;
13866 case Intrinsic::ppc_vsx_lxvd2x:
13867 case Intrinsic::ppc_vsx_lxvd2x_be:
13868 VT = MVT::v2f64;
13869 break;
13870 case Intrinsic::ppc_altivec_lvebx:
13871 VT = MVT::i8;
13872 break;
13873 case Intrinsic::ppc_altivec_lvehx:
13874 VT = MVT::i16;
13875 break;
13876 case Intrinsic::ppc_altivec_lvewx:
13877 VT = MVT::i32;
13878 break;
13879 }
13880
13881 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13882 }
13883
13884 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13885 EVT VT;
13886 switch (N->getConstantOperandVal(1)) {
13887 default: return false;
13888 case Intrinsic::ppc_altivec_stvx:
13889 case Intrinsic::ppc_altivec_stvxl:
13890 case Intrinsic::ppc_vsx_stxvw4x:
13891 VT = MVT::v4i32;
13892 break;
13893 case Intrinsic::ppc_vsx_stxvd2x:
13894 VT = MVT::v2f64;
13895 break;
13896 case Intrinsic::ppc_vsx_stxvw4x_be:
13897 VT = MVT::v4i32;
13898 break;
13899 case Intrinsic::ppc_vsx_stxvd2x_be:
13900 VT = MVT::v2f64;
13901 break;
13902 case Intrinsic::ppc_altivec_stvebx:
13903 VT = MVT::i8;
13904 break;
13905 case Intrinsic::ppc_altivec_stvehx:
13906 VT = MVT::i16;
13907 break;
13908 case Intrinsic::ppc_altivec_stvewx:
13909 VT = MVT::i32;
13910 break;
13911 }
13912
13913 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13914 }
13915
13916 return false;
13917}
13918
13919// Return true is there is a nearyby consecutive load to the one provided
13920// (regardless of alignment). We search up and down the chain, looking though
13921// token factors and other loads (but nothing else). As a result, a true result
13922// indicates that it is safe to create a new consecutive load adjacent to the
13923// load provided.
13925 SDValue Chain = LD->getChain();
13926 EVT VT = LD->getMemoryVT();
13927
13928 SmallSet<SDNode *, 16> LoadRoots;
13929 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13930 SmallSet<SDNode *, 16> Visited;
13931
13932 // First, search up the chain, branching to follow all token-factor operands.
13933 // If we find a consecutive load, then we're done, otherwise, record all
13934 // nodes just above the top-level loads and token factors.
13935 while (!Queue.empty()) {
13936 SDNode *ChainNext = Queue.pop_back_val();
13937 if (!Visited.insert(ChainNext).second)
13938 continue;
13939
13940 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13941 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13942 return true;
13943
13944 if (!Visited.count(ChainLD->getChain().getNode()))
13945 Queue.push_back(ChainLD->getChain().getNode());
13946 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13947 for (const SDUse &O : ChainNext->ops())
13948 if (!Visited.count(O.getNode()))
13949 Queue.push_back(O.getNode());
13950 } else
13951 LoadRoots.insert(ChainNext);
13952 }
13953
13954 // Second, search down the chain, starting from the top-level nodes recorded
13955 // in the first phase. These top-level nodes are the nodes just above all
13956 // loads and token factors. Starting with their uses, recursively look though
13957 // all loads (just the chain uses) and token factors to find a consecutive
13958 // load.
13959 Visited.clear();
13960 Queue.clear();
13961
13962 for (SDNode *I : LoadRoots) {
13963 Queue.push_back(I);
13964
13965 while (!Queue.empty()) {
13966 SDNode *LoadRoot = Queue.pop_back_val();
13967 if (!Visited.insert(LoadRoot).second)
13968 continue;
13969
13970 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13971 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13972 return true;
13973
13974 for (SDNode *U : LoadRoot->uses())
13975 if (((isa<MemSDNode>(U) &&
13976 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13977 U->getOpcode() == ISD::TokenFactor) &&
13978 !Visited.count(U))
13979 Queue.push_back(U);
13980 }
13981 }
13982
13983 return false;
13984}
13985
13986/// This function is called when we have proved that a SETCC node can be replaced
13987/// by subtraction (and other supporting instructions) so that the result of
13988/// comparison is kept in a GPR instead of CR. This function is purely for
13989/// codegen purposes and has some flags to guide the codegen process.
13990static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13991 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13992 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13993
13994 // Zero extend the operands to the largest legal integer. Originally, they
13995 // must be of a strictly smaller size.
13996 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13997 DAG.getConstant(Size, DL, MVT::i32));
13998 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13999 DAG.getConstant(Size, DL, MVT::i32));
14000
14001 // Swap if needed. Depends on the condition code.
14002 if (Swap)
14003 std::swap(Op0, Op1);
14004
14005 // Subtract extended integers.
14006 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14007
14008 // Move the sign bit to the least significant position and zero out the rest.
14009 // Now the least significant bit carries the result of original comparison.
14010 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14011 DAG.getConstant(Size - 1, DL, MVT::i32));
14012 auto Final = Shifted;
14013
14014 // Complement the result if needed. Based on the condition code.
14015 if (Complement)
14016 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14017 DAG.getConstant(1, DL, MVT::i64));
14018
14019 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14020}
14021
14022SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14023 DAGCombinerInfo &DCI) const {
14024 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14025
14026 SelectionDAG &DAG = DCI.DAG;
14027 SDLoc DL(N);
14028
14029 // Size of integers being compared has a critical role in the following
14030 // analysis, so we prefer to do this when all types are legal.
14031 if (!DCI.isAfterLegalizeDAG())
14032 return SDValue();
14033
14034 // If all users of SETCC extend its value to a legal integer type
14035 // then we replace SETCC with a subtraction
14036 for (const SDNode *U : N->uses())
14037 if (U->getOpcode() != ISD::ZERO_EXTEND)
14038 return SDValue();
14039
14040 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14041 auto OpSize = N->getOperand(0).getValueSizeInBits();
14042
14044
14045 if (OpSize < Size) {
14046 switch (CC) {
14047 default: break;
14048 case ISD::SETULT:
14049 return generateEquivalentSub(N, Size, false, false, DL, DAG);
14050 case ISD::SETULE:
14051 return generateEquivalentSub(N, Size, true, true, DL, DAG);
14052 case ISD::SETUGT:
14053 return generateEquivalentSub(N, Size, false, true, DL, DAG);
14054 case ISD::SETUGE:
14055 return generateEquivalentSub(N, Size, true, false, DL, DAG);
14056 }
14057 }
14058
14059 return SDValue();
14060}
14061
14062SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14063 DAGCombinerInfo &DCI) const {
14064 SelectionDAG &DAG = DCI.DAG;
14065 SDLoc dl(N);
14066
14067 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14068 // If we're tracking CR bits, we need to be careful that we don't have:
14069 // trunc(binary-ops(zext(x), zext(y)))
14070 // or
14071 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14072 // such that we're unnecessarily moving things into GPRs when it would be
14073 // better to keep them in CR bits.
14074
14075 // Note that trunc here can be an actual i1 trunc, or can be the effective
14076 // truncation that comes from a setcc or select_cc.
14077 if (N->getOpcode() == ISD::TRUNCATE &&
14078 N->getValueType(0) != MVT::i1)
14079 return SDValue();
14080
14081 if (N->getOperand(0).getValueType() != MVT::i32 &&
14082 N->getOperand(0).getValueType() != MVT::i64)
14083 return SDValue();
14084
14085 if (N->getOpcode() == ISD::SETCC ||
14086 N->getOpcode() == ISD::SELECT_CC) {
14087 // If we're looking at a comparison, then we need to make sure that the
14088 // high bits (all except for the first) don't matter the result.
14090 cast<CondCodeSDNode>(N->getOperand(
14091 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14092 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14093
14095 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14096 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14097 return SDValue();
14098 } else if (ISD::isUnsignedIntSetCC(CC)) {
14099 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14100 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14101 !DAG.MaskedValueIsZero(N->getOperand(1),
14102 APInt::getHighBitsSet(OpBits, OpBits-1)))
14103 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14104 : SDValue());
14105 } else {
14106 // This is neither a signed nor an unsigned comparison, just make sure
14107 // that the high bits are equal.
14108 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14109 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14110
14111 // We don't really care about what is known about the first bit (if
14112 // anything), so pretend that it is known zero for both to ensure they can
14113 // be compared as constants.
14114 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14115 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14116
14117 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14118 Op1Known.getConstant() != Op2Known.getConstant())
14119 return SDValue();
14120 }
14121 }
14122
14123 // We now know that the higher-order bits are irrelevant, we just need to
14124 // make sure that all of the intermediate operations are bit operations, and
14125 // all inputs are extensions.
14126 if (N->getOperand(0).getOpcode() != ISD::AND &&
14127 N->getOperand(0).getOpcode() != ISD::OR &&
14128 N->getOperand(0).getOpcode() != ISD::XOR &&
14129 N->getOperand(0).getOpcode() != ISD::SELECT &&
14130 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14131 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14132 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14133 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14134 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14135 return SDValue();
14136
14137 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14138 N->getOperand(1).getOpcode() != ISD::AND &&
14139 N->getOperand(1).getOpcode() != ISD::OR &&
14140 N->getOperand(1).getOpcode() != ISD::XOR &&
14141 N->getOperand(1).getOpcode() != ISD::SELECT &&
14142 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14143 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14144 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14145 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14146 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14147 return SDValue();
14148
14150 SmallVector<SDValue, 8> BinOps, PromOps;
14152
14153 for (unsigned i = 0; i < 2; ++i) {
14154 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14155 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14156 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14157 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14158 isa<ConstantSDNode>(N->getOperand(i)))
14159 Inputs.push_back(N->getOperand(i));
14160 else
14161 BinOps.push_back(N->getOperand(i));
14162
14163 if (N->getOpcode() == ISD::TRUNCATE)
14164 break;
14165 }
14166
14167 // Visit all inputs, collect all binary operations (and, or, xor and
14168 // select) that are all fed by extensions.
14169 while (!BinOps.empty()) {
14170 SDValue BinOp = BinOps.pop_back_val();
14171
14172 if (!Visited.insert(BinOp.getNode()).second)
14173 continue;
14174
14175 PromOps.push_back(BinOp);
14176
14177 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14178 // The condition of the select is not promoted.
14179 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14180 continue;
14181 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14182 continue;
14183
14184 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14185 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14186 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14187 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14188 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14189 Inputs.push_back(BinOp.getOperand(i));
14190 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14191 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14192 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14193 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14194 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14195 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14196 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14197 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14198 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14199 BinOps.push_back(BinOp.getOperand(i));
14200 } else {
14201 // We have an input that is not an extension or another binary
14202 // operation; we'll abort this transformation.
14203 return SDValue();
14204 }
14205 }
14206 }
14207
14208 // Make sure that this is a self-contained cluster of operations (which
14209 // is not quite the same thing as saying that everything has only one
14210 // use).
14211 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14212 if (isa<ConstantSDNode>(Inputs[i]))
14213 continue;
14214
14215 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14216 if (User != N && !Visited.count(User))
14217 return SDValue();
14218
14219 // Make sure that we're not going to promote the non-output-value
14220 // operand(s) or SELECT or SELECT_CC.
14221 // FIXME: Although we could sometimes handle this, and it does occur in
14222 // practice that one of the condition inputs to the select is also one of
14223 // the outputs, we currently can't deal with this.
14224 if (User->getOpcode() == ISD::SELECT) {
14225 if (User->getOperand(0) == Inputs[i])
14226 return SDValue();
14227 } else if (User->getOpcode() == ISD::SELECT_CC) {
14228 if (User->getOperand(0) == Inputs[i] ||
14229 User->getOperand(1) == Inputs[i])
14230 return SDValue();
14231 }
14232 }
14233 }
14234
14235 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14236 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14237 if (User != N && !Visited.count(User))
14238 return SDValue();
14239
14240 // Make sure that we're not going to promote the non-output-value
14241 // operand(s) or SELECT or SELECT_CC.
14242 // FIXME: Although we could sometimes handle this, and it does occur in
14243 // practice that one of the condition inputs to the select is also one of
14244 // the outputs, we currently can't deal with this.
14245 if (User->getOpcode() == ISD::SELECT) {
14246 if (User->getOperand(0) == PromOps[i])
14247 return SDValue();
14248 } else if (User->getOpcode() == ISD::SELECT_CC) {
14249 if (User->getOperand(0) == PromOps[i] ||
14250 User->getOperand(1) == PromOps[i])
14251 return SDValue();
14252 }
14253 }
14254 }
14255
14256 // Replace all inputs with the extension operand.
14257 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14258 // Constants may have users outside the cluster of to-be-promoted nodes,
14259 // and so we need to replace those as we do the promotions.
14260 if (isa<ConstantSDNode>(Inputs[i]))
14261 continue;
14262 else
14263 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14264 }
14265
14266 std::list<HandleSDNode> PromOpHandles;
14267 for (auto &PromOp : PromOps)
14268 PromOpHandles.emplace_back(PromOp);
14269
14270 // Replace all operations (these are all the same, but have a different
14271 // (i1) return type). DAG.getNode will validate that the types of
14272 // a binary operator match, so go through the list in reverse so that
14273 // we've likely promoted both operands first. Any intermediate truncations or
14274 // extensions disappear.
14275 while (!PromOpHandles.empty()) {
14276 SDValue PromOp = PromOpHandles.back().getValue();
14277 PromOpHandles.pop_back();
14278
14279 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14280 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14281 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14282 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14283 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14284 PromOp.getOperand(0).getValueType() != MVT::i1) {
14285 // The operand is not yet ready (see comment below).
14286 PromOpHandles.emplace_front(PromOp);
14287 continue;
14288 }
14289
14290 SDValue RepValue = PromOp.getOperand(0);
14291 if (isa<ConstantSDNode>(RepValue))
14292 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14293
14294 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14295 continue;
14296 }
14297
14298 unsigned C;
14299 switch (PromOp.getOpcode()) {
14300 default: C = 0; break;
14301 case ISD::SELECT: C = 1; break;
14302 case ISD::SELECT_CC: C = 2; break;
14303 }
14304
14305 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14306 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14307 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14308 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14309 // The to-be-promoted operands of this node have not yet been
14310 // promoted (this should be rare because we're going through the
14311 // list backward, but if one of the operands has several users in
14312 // this cluster of to-be-promoted nodes, it is possible).
14313 PromOpHandles.emplace_front(PromOp);
14314 continue;
14315 }
14316
14318 PromOp.getNode()->op_end());
14319
14320 // If there are any constant inputs, make sure they're replaced now.
14321 for (unsigned i = 0; i < 2; ++i)
14322 if (isa<ConstantSDNode>(Ops[C+i]))
14323 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14324
14325 DAG.ReplaceAllUsesOfValueWith(PromOp,
14326 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14327 }
14328
14329 // Now we're left with the initial truncation itself.
14330 if (N->getOpcode() == ISD::TRUNCATE)
14331 return N->getOperand(0);
14332
14333 // Otherwise, this is a comparison. The operands to be compared have just
14334 // changed type (to i1), but everything else is the same.
14335 return SDValue(N, 0);
14336}
14337
14338SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14339 DAGCombinerInfo &DCI) const {
14340 SelectionDAG &DAG = DCI.DAG;
14341 SDLoc dl(N);
14342
14343 // If we're tracking CR bits, we need to be careful that we don't have:
14344 // zext(binary-ops(trunc(x), trunc(y)))
14345 // or
14346 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14347 // such that we're unnecessarily moving things into CR bits that can more
14348 // efficiently stay in GPRs. Note that if we're not certain that the high
14349 // bits are set as required by the final extension, we still may need to do
14350 // some masking to get the proper behavior.
14351
14352 // This same functionality is important on PPC64 when dealing with
14353 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14354 // the return values of functions. Because it is so similar, it is handled
14355 // here as well.
14356
14357 if (N->getValueType(0) != MVT::i32 &&
14358 N->getValueType(0) != MVT::i64)
14359 return SDValue();
14360
14361 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14362 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14363 return SDValue();
14364
14365 if (N->getOperand(0).getOpcode() != ISD::AND &&
14366 N->getOperand(0).getOpcode() != ISD::OR &&
14367 N->getOperand(0).getOpcode() != ISD::XOR &&
14368 N->getOperand(0).getOpcode() != ISD::SELECT &&
14369 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14370 return SDValue();
14371
14373 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14375
14376 // Visit all inputs, collect all binary operations (and, or, xor and
14377 // select) that are all fed by truncations.
14378 while (!BinOps.empty()) {
14379 SDValue BinOp = BinOps.pop_back_val();
14380
14381 if (!Visited.insert(BinOp.getNode()).second)
14382 continue;
14383
14384 PromOps.push_back(BinOp);
14385
14386 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14387 // The condition of the select is not promoted.
14388 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14389 continue;
14390 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14391 continue;
14392
14393 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14394 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14395 Inputs.push_back(BinOp.getOperand(i));
14396 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14397 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14398 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14399 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14400 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14401 BinOps.push_back(BinOp.getOperand(i));
14402 } else {
14403 // We have an input that is not a truncation or another binary
14404 // operation; we'll abort this transformation.
14405 return SDValue();
14406 }
14407 }
14408 }
14409
14410 // The operands of a select that must be truncated when the select is
14411 // promoted because the operand is actually part of the to-be-promoted set.
14412 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14413
14414 // Make sure that this is a self-contained cluster of operations (which
14415 // is not quite the same thing as saying that everything has only one
14416 // use).
14417 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14418 if (isa<ConstantSDNode>(Inputs[i]))
14419 continue;
14420
14421 for (SDNode *User : Inputs[i].getNode()->uses()) {
14422 if (User != N && !Visited.count(User))
14423 return SDValue();
14424
14425 // If we're going to promote the non-output-value operand(s) or SELECT or
14426 // SELECT_CC, record them for truncation.
14427 if (User->getOpcode() == ISD::SELECT) {
14428 if (User->getOperand(0) == Inputs[i])
14429 SelectTruncOp[0].insert(std::make_pair(User,
14430 User->getOperand(0).getValueType()));
14431 } else if (User->getOpcode() == ISD::SELECT_CC) {
14432 if (User->getOperand(0) == Inputs[i])
14433 SelectTruncOp[0].insert(std::make_pair(User,
14434 User->getOperand(0).getValueType()));
14435 if (User->getOperand(1) == Inputs[i])
14436 SelectTruncOp[1].insert(std::make_pair(User,
14437 User->getOperand(1).getValueType()));
14438 }
14439 }
14440 }
14441
14442 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14443 for (SDNode *User : PromOps[i].getNode()->uses()) {
14444 if (User != N && !Visited.count(User))
14445 return SDValue();
14446
14447 // If we're going to promote the non-output-value operand(s) or SELECT or
14448 // SELECT_CC, record them for truncation.
14449 if (User->getOpcode() == ISD::SELECT) {
14450 if (User->getOperand(0) == PromOps[i])
14451 SelectTruncOp[0].insert(std::make_pair(User,
14452 User->getOperand(0).getValueType()));
14453 } else if (User->getOpcode() == ISD::SELECT_CC) {
14454 if (User->getOperand(0) == PromOps[i])
14455 SelectTruncOp[0].insert(std::make_pair(User,
14456 User->getOperand(0).getValueType()));
14457 if (User->getOperand(1) == PromOps[i])
14458 SelectTruncOp[1].insert(std::make_pair(User,
14459 User->getOperand(1).getValueType()));
14460 }
14461 }
14462 }
14463
14464 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14465 bool ReallyNeedsExt = false;
14466 if (N->getOpcode() != ISD::ANY_EXTEND) {
14467 // If all of the inputs are not already sign/zero extended, then
14468 // we'll still need to do that at the end.
14469 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14470 if (isa<ConstantSDNode>(Inputs[i]))
14471 continue;
14472
14473 unsigned OpBits =
14474 Inputs[i].getOperand(0).getValueSizeInBits();
14475 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14476
14477 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14478 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14479 APInt::getHighBitsSet(OpBits,
14480 OpBits-PromBits))) ||
14481 (N->getOpcode() == ISD::SIGN_EXTEND &&
14482 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14483 (OpBits-(PromBits-1)))) {
14484 ReallyNeedsExt = true;
14485 break;
14486 }
14487 }
14488 }
14489
14490 // Replace all inputs, either with the truncation operand, or a
14491 // truncation or extension to the final output type.
14492 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14493 // Constant inputs need to be replaced with the to-be-promoted nodes that
14494 // use them because they might have users outside of the cluster of
14495 // promoted nodes.
14496 if (isa<ConstantSDNode>(Inputs[i]))
14497 continue;
14498
14499 SDValue InSrc = Inputs[i].getOperand(0);
14500 if (Inputs[i].getValueType() == N->getValueType(0))
14501 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14502 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14503 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14504 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14505 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14506 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14507 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14508 else
14509 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14510 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14511 }
14512
14513 std::list<HandleSDNode> PromOpHandles;
14514 for (auto &PromOp : PromOps)
14515 PromOpHandles.emplace_back(PromOp);
14516
14517 // Replace all operations (these are all the same, but have a different
14518 // (promoted) return type). DAG.getNode will validate that the types of
14519 // a binary operator match, so go through the list in reverse so that
14520 // we've likely promoted both operands first.
14521 while (!PromOpHandles.empty()) {
14522 SDValue PromOp = PromOpHandles.back().getValue();
14523 PromOpHandles.pop_back();
14524
14525 unsigned C;
14526 switch (PromOp.getOpcode()) {
14527 default: C = 0; break;
14528 case ISD::SELECT: C = 1; break;
14529 case ISD::SELECT_CC: C = 2; break;
14530 }
14531
14532 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14533 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14534 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14535 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14536 // The to-be-promoted operands of this node have not yet been
14537 // promoted (this should be rare because we're going through the
14538 // list backward, but if one of the operands has several users in
14539 // this cluster of to-be-promoted nodes, it is possible).
14540 PromOpHandles.emplace_front(PromOp);
14541 continue;
14542 }
14543
14544 // For SELECT and SELECT_CC nodes, we do a similar check for any
14545 // to-be-promoted comparison inputs.
14546 if (PromOp.getOpcode() == ISD::SELECT ||
14547 PromOp.getOpcode() == ISD::SELECT_CC) {
14548 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14549 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14550 (SelectTruncOp[1].count(PromOp.getNode()) &&
14551 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14552 PromOpHandles.emplace_front(PromOp);
14553 continue;
14554 }
14555 }
14556
14558 PromOp.getNode()->op_end());
14559
14560 // If this node has constant inputs, then they'll need to be promoted here.
14561 for (unsigned i = 0; i < 2; ++i) {
14562 if (!isa<ConstantSDNode>(Ops[C+i]))
14563 continue;
14564 if (Ops[C+i].getValueType() == N->getValueType(0))
14565 continue;
14566
14567 if (N->getOpcode() == ISD::SIGN_EXTEND)
14568 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14569 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14570 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14571 else
14572 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14573 }
14574
14575 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14576 // truncate them again to the original value type.
14577 if (PromOp.getOpcode() == ISD::SELECT ||
14578 PromOp.getOpcode() == ISD::SELECT_CC) {
14579 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14580 if (SI0 != SelectTruncOp[0].end())
14581 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14582 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14583 if (SI1 != SelectTruncOp[1].end())
14584 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14585 }
14586
14587 DAG.ReplaceAllUsesOfValueWith(PromOp,
14588 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14589 }
14590
14591 // Now we're left with the initial extension itself.
14592 if (!ReallyNeedsExt)
14593 return N->getOperand(0);
14594
14595 // To zero extend, just mask off everything except for the first bit (in the
14596 // i1 case).
14597 if (N->getOpcode() == ISD::ZERO_EXTEND)
14598 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14600 N->getValueSizeInBits(0), PromBits),
14601 dl, N->getValueType(0)));
14602
14603 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14604 "Invalid extension type");
14605 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14606 SDValue ShiftCst =
14607 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14608 return DAG.getNode(
14609 ISD::SRA, dl, N->getValueType(0),
14610 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14611 ShiftCst);
14612}
14613
14614SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14615 DAGCombinerInfo &DCI) const {
14616 assert(N->getOpcode() == ISD::SETCC &&
14617 "Should be called with a SETCC node");
14618
14619 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14620 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14621 SDValue LHS = N->getOperand(0);
14622 SDValue RHS = N->getOperand(1);
14623
14624 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14625 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14626 LHS.hasOneUse())
14627 std::swap(LHS, RHS);
14628
14629 // x == 0-y --> x+y == 0
14630 // x != 0-y --> x+y != 0
14631 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14632 RHS.hasOneUse()) {
14633 SDLoc DL(N);
14634 SelectionDAG &DAG = DCI.DAG;
14635 EVT VT = N->getValueType(0);
14636 EVT OpVT = LHS.getValueType();
14637 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14638 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14639 }
14640 }
14641
14642 return DAGCombineTruncBoolExt(N, DCI);
14643}
14644
14645// Is this an extending load from an f32 to an f64?
14646static bool isFPExtLoad(SDValue Op) {
14647 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14648 return LD->getExtensionType() == ISD::EXTLOAD &&
14649 Op.getValueType() == MVT::f64;
14650 return false;
14651}
14652
14653/// Reduces the number of fp-to-int conversion when building a vector.
14654///
14655/// If this vector is built out of floating to integer conversions,
14656/// transform it to a vector built out of floating point values followed by a
14657/// single floating to integer conversion of the vector.
14658/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14659/// becomes (fptosi (build_vector ($A, $B, ...)))
14660SDValue PPCTargetLowering::
14661combineElementTruncationToVectorTruncation(SDNode *N,
14662 DAGCombinerInfo &DCI) const {
14663 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14664 "Should be called with a BUILD_VECTOR node");
14665
14666 SelectionDAG &DAG = DCI.DAG;
14667 SDLoc dl(N);
14668
14669 SDValue FirstInput = N->getOperand(0);
14670 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14671 "The input operand must be an fp-to-int conversion.");
14672
14673 // This combine happens after legalization so the fp_to_[su]i nodes are
14674 // already converted to PPCSISD nodes.
14675 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14676 if (FirstConversion == PPCISD::FCTIDZ ||
14677 FirstConversion == PPCISD::FCTIDUZ ||
14678 FirstConversion == PPCISD::FCTIWZ ||
14679 FirstConversion == PPCISD::FCTIWUZ) {
14680 bool IsSplat = true;
14681 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14682 FirstConversion == PPCISD::FCTIWUZ;
14683 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14685 EVT TargetVT = N->getValueType(0);
14686 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14687 SDValue NextOp = N->getOperand(i);
14688 if (NextOp.getOpcode() != PPCISD::MFVSR)
14689 return SDValue();
14690 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14691 if (NextConversion != FirstConversion)
14692 return SDValue();
14693 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14694 // This is not valid if the input was originally double precision. It is
14695 // also not profitable to do unless this is an extending load in which
14696 // case doing this combine will allow us to combine consecutive loads.
14697 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14698 return SDValue();
14699 if (N->getOperand(i) != FirstInput)
14700 IsSplat = false;
14701 }
14702
14703 // If this is a splat, we leave it as-is since there will be only a single
14704 // fp-to-int conversion followed by a splat of the integer. This is better
14705 // for 32-bit and smaller ints and neutral for 64-bit ints.
14706 if (IsSplat)
14707 return SDValue();
14708
14709 // Now that we know we have the right type of node, get its operands
14710 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14711 SDValue In = N->getOperand(i).getOperand(0);
14712 if (Is32Bit) {
14713 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14714 // here, we know that all inputs are extending loads so this is safe).
14715 if (In.isUndef())
14716 Ops.push_back(DAG.getUNDEF(SrcVT));
14717 else {
14718 SDValue Trunc =
14719 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14720 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14721 Ops.push_back(Trunc);
14722 }
14723 } else
14724 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14725 }
14726
14727 unsigned Opcode;
14728 if (FirstConversion == PPCISD::FCTIDZ ||
14729 FirstConversion == PPCISD::FCTIWZ)
14730 Opcode = ISD::FP_TO_SINT;
14731 else
14732 Opcode = ISD::FP_TO_UINT;
14733
14734 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14735 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14736 return DAG.getNode(Opcode, dl, TargetVT, BV);
14737 }
14738 return SDValue();
14739}
14740
14741/// Reduce the number of loads when building a vector.
14742///
14743/// Building a vector out of multiple loads can be converted to a load
14744/// of the vector type if the loads are consecutive. If the loads are
14745/// consecutive but in descending order, a shuffle is added at the end
14746/// to reorder the vector.
14748 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14749 "Should be called with a BUILD_VECTOR node");
14750
14751 SDLoc dl(N);
14752
14753 // Return early for non byte-sized type, as they can't be consecutive.
14754 if (!N->getValueType(0).getVectorElementType().isByteSized())
14755 return SDValue();
14756
14757 bool InputsAreConsecutiveLoads = true;
14758 bool InputsAreReverseConsecutive = true;
14759 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14760 SDValue FirstInput = N->getOperand(0);
14761 bool IsRoundOfExtLoad = false;
14762 LoadSDNode *FirstLoad = nullptr;
14763
14764 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14765 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14766 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14767 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14768 }
14769 // Not a build vector of (possibly fp_rounded) loads.
14770 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14771 N->getNumOperands() == 1)
14772 return SDValue();
14773
14774 if (!IsRoundOfExtLoad)
14775 FirstLoad = cast<LoadSDNode>(FirstInput);
14776
14778 InputLoads.push_back(FirstLoad);
14779 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14780 // If any inputs are fp_round(extload), they all must be.
14781 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14782 return SDValue();
14783
14784 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14785 N->getOperand(i);
14786 if (NextInput.getOpcode() != ISD::LOAD)
14787 return SDValue();
14788
14789 SDValue PreviousInput =
14790 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14791 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14792 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14793
14794 // If any inputs are fp_round(extload), they all must be.
14795 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14796 return SDValue();
14797
14798 // We only care about regular loads. The PPC-specific load intrinsics
14799 // will not lead to a merge opportunity.
14800 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14801 InputsAreConsecutiveLoads = false;
14802 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14803 InputsAreReverseConsecutive = false;
14804
14805 // Exit early if the loads are neither consecutive nor reverse consecutive.
14806 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14807 return SDValue();
14808 InputLoads.push_back(LD2);
14809 }
14810
14811 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14812 "The loads cannot be both consecutive and reverse consecutive.");
14813
14814 SDValue WideLoad;
14815 SDValue ReturnSDVal;
14816 if (InputsAreConsecutiveLoads) {
14817 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14818 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14819 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14820 FirstLoad->getAlign());
14821 ReturnSDVal = WideLoad;
14822 } else if (InputsAreReverseConsecutive) {
14823 LoadSDNode *LastLoad = InputLoads.back();
14824 assert(LastLoad && "Input needs to be a LoadSDNode.");
14825 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14826 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14827 LastLoad->getAlign());
14829 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14830 Ops.push_back(i);
14831
14832 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14833 DAG.getUNDEF(N->getValueType(0)), Ops);
14834 } else
14835 return SDValue();
14836
14837 for (auto *LD : InputLoads)
14838 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14839 return ReturnSDVal;
14840}
14841
14842// This function adds the required vector_shuffle needed to get
14843// the elements of the vector extract in the correct position
14844// as specified by the CorrectElems encoding.
14846 SDValue Input, uint64_t Elems,
14847 uint64_t CorrectElems) {
14848 SDLoc dl(N);
14849
14850 unsigned NumElems = Input.getValueType().getVectorNumElements();
14851 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14852
14853 // Knowing the element indices being extracted from the original
14854 // vector and the order in which they're being inserted, just put
14855 // them at element indices required for the instruction.
14856 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14857 if (DAG.getDataLayout().isLittleEndian())
14858 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14859 else
14860 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14861 CorrectElems = CorrectElems >> 8;
14862 Elems = Elems >> 8;
14863 }
14864
14865 SDValue Shuffle =
14866 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14867 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14868
14869 EVT VT = N->getValueType(0);
14870 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14871
14872 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14875 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14876 DAG.getValueType(ExtVT));
14877}
14878
14879// Look for build vector patterns where input operands come from sign
14880// extended vector_extract elements of specific indices. If the correct indices
14881// aren't used, add a vector shuffle to fix up the indices and create
14882// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14883// during instruction selection.
14885 // This array encodes the indices that the vector sign extend instructions
14886 // extract from when extending from one type to another for both BE and LE.
14887 // The right nibble of each byte corresponds to the LE incides.
14888 // and the left nibble of each byte corresponds to the BE incides.
14889 // For example: 0x3074B8FC byte->word
14890 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14891 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14892 // For example: 0x000070F8 byte->double word
14893 // For LE: the allowed indices are: 0x0,0x8
14894 // For BE: the allowed indices are: 0x7,0xF
14895 uint64_t TargetElems[] = {
14896 0x3074B8FC, // b->w
14897 0x000070F8, // b->d
14898 0x10325476, // h->w
14899 0x00003074, // h->d
14900 0x00001032, // w->d
14901 };
14902
14903 uint64_t Elems = 0;
14904 int Index;
14905 SDValue Input;
14906
14907 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14908 if (!Op)
14909 return false;
14910 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14911 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14912 return false;
14913
14914 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14915 // of the right width.
14916 SDValue Extract = Op.getOperand(0);
14917 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14918 Extract = Extract.getOperand(0);
14919 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14920 return false;
14921
14922 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14923 if (!ExtOp)
14924 return false;
14925
14926 Index = ExtOp->getZExtValue();
14927 if (Input && Input != Extract.getOperand(0))
14928 return false;
14929
14930 if (!Input)
14931 Input = Extract.getOperand(0);
14932
14933 Elems = Elems << 8;
14934 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14935 Elems |= Index;
14936
14937 return true;
14938 };
14939
14940 // If the build vector operands aren't sign extended vector extracts,
14941 // of the same input vector, then return.
14942 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14943 if (!isSExtOfVecExtract(N->getOperand(i))) {
14944 return SDValue();
14945 }
14946 }
14947
14948 // If the vector extract indicies are not correct, add the appropriate
14949 // vector_shuffle.
14950 int TgtElemArrayIdx;
14951 int InputSize = Input.getValueType().getScalarSizeInBits();
14952 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14953 if (InputSize + OutputSize == 40)
14954 TgtElemArrayIdx = 0;
14955 else if (InputSize + OutputSize == 72)
14956 TgtElemArrayIdx = 1;
14957 else if (InputSize + OutputSize == 48)
14958 TgtElemArrayIdx = 2;
14959 else if (InputSize + OutputSize == 80)
14960 TgtElemArrayIdx = 3;
14961 else if (InputSize + OutputSize == 96)
14962 TgtElemArrayIdx = 4;
14963 else
14964 return SDValue();
14965
14966 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14967 CorrectElems = DAG.getDataLayout().isLittleEndian()
14968 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14969 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14970 if (Elems != CorrectElems) {
14971 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14972 }
14973
14974 // Regular lowering will catch cases where a shuffle is not needed.
14975 return SDValue();
14976}
14977
14978// Look for the pattern of a load from a narrow width to i128, feeding
14979// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14980// (LXVRZX). This node represents a zero extending load that will be matched
14981// to the Load VSX Vector Rightmost instructions.
14983 SDLoc DL(N);
14984
14985 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14986 if (N->getValueType(0) != MVT::v1i128)
14987 return SDValue();
14988
14989 SDValue Operand = N->getOperand(0);
14990 // Proceed with the transformation if the operand to the BUILD_VECTOR
14991 // is a load instruction.
14992 if (Operand.getOpcode() != ISD::LOAD)
14993 return SDValue();
14994
14995 auto *LD = cast<LoadSDNode>(Operand);
14996 EVT MemoryType = LD->getMemoryVT();
14997
14998 // This transformation is only valid if the we are loading either a byte,
14999 // halfword, word, or doubleword.
15000 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15001 MemoryType == MVT::i32 || MemoryType == MVT::i64;
15002
15003 // Ensure that the load from the narrow width is being zero extended to i128.
15004 if (!ValidLDType ||
15005 (LD->getExtensionType() != ISD::ZEXTLOAD &&
15006 LD->getExtensionType() != ISD::EXTLOAD))
15007 return SDValue();
15008
15009 SDValue LoadOps[] = {
15010 LD->getChain(), LD->getBasePtr(),
15011 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15012
15014 DAG.getVTList(MVT::v1i128, MVT::Other),
15015 LoadOps, MemoryType, LD->getMemOperand());
15016}
15017
15018SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15019 DAGCombinerInfo &DCI) const {
15020 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15021 "Should be called with a BUILD_VECTOR node");
15022
15023 SelectionDAG &DAG = DCI.DAG;
15024 SDLoc dl(N);
15025
15026 if (!Subtarget.hasVSX())
15027 return SDValue();
15028
15029 // The target independent DAG combiner will leave a build_vector of
15030 // float-to-int conversions intact. We can generate MUCH better code for
15031 // a float-to-int conversion of a vector of floats.
15032 SDValue FirstInput = N->getOperand(0);
15033 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15034 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15035 if (Reduced)
15036 return Reduced;
15037 }
15038
15039 // If we're building a vector out of consecutive loads, just load that
15040 // vector type.
15041 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15042 if (Reduced)
15043 return Reduced;
15044
15045 // If we're building a vector out of extended elements from another vector
15046 // we have P9 vector integer extend instructions. The code assumes legal
15047 // input types (i.e. it can't handle things like v4i16) so do not run before
15048 // legalization.
15049 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15050 Reduced = combineBVOfVecSExt(N, DAG);
15051 if (Reduced)
15052 return Reduced;
15053 }
15054
15055 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15056 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15057 // is a load from <valid narrow width> to i128.
15058 if (Subtarget.isISA3_1()) {
15059 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15060 if (BVOfZLoad)
15061 return BVOfZLoad;
15062 }
15063
15064 if (N->getValueType(0) != MVT::v2f64)
15065 return SDValue();
15066
15067 // Looking for:
15068 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15069 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15070 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15071 return SDValue();
15072 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15073 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15074 return SDValue();
15075 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15076 return SDValue();
15077
15078 SDValue Ext1 = FirstInput.getOperand(0);
15079 SDValue Ext2 = N->getOperand(1).getOperand(0);
15080 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15082 return SDValue();
15083
15084 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15085 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15086 if (!Ext1Op || !Ext2Op)
15087 return SDValue();
15088 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15089 Ext1.getOperand(0) != Ext2.getOperand(0))
15090 return SDValue();
15091
15092 int FirstElem = Ext1Op->getZExtValue();
15093 int SecondElem = Ext2Op->getZExtValue();
15094 int SubvecIdx;
15095 if (FirstElem == 0 && SecondElem == 1)
15096 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15097 else if (FirstElem == 2 && SecondElem == 3)
15098 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15099 else
15100 return SDValue();
15101
15102 SDValue SrcVec = Ext1.getOperand(0);
15103 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15105 return DAG.getNode(NodeType, dl, MVT::v2f64,
15106 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15107}
15108
15109SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15110 DAGCombinerInfo &DCI) const {
15111 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15112 N->getOpcode() == ISD::UINT_TO_FP) &&
15113 "Need an int -> FP conversion node here");
15114
15115 if (useSoftFloat() || !Subtarget.has64BitSupport())
15116 return SDValue();
15117
15118 SelectionDAG &DAG = DCI.DAG;
15119 SDLoc dl(N);
15120 SDValue Op(N, 0);
15121
15122 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15123 // from the hardware.
15124 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15125 return SDValue();
15126 if (!Op.getOperand(0).getValueType().isSimple())
15127 return SDValue();
15128 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15129 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15130 return SDValue();
15131
15132 SDValue FirstOperand(Op.getOperand(0));
15133 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15134 (FirstOperand.getValueType() == MVT::i8 ||
15135 FirstOperand.getValueType() == MVT::i16);
15136 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15137 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15138 bool DstDouble = Op.getValueType() == MVT::f64;
15139 unsigned ConvOp = Signed ?
15140 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15141 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15142 SDValue WidthConst =
15143 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15144 dl, false);
15145 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15146 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15148 DAG.getVTList(MVT::f64, MVT::Other),
15149 Ops, MVT::i8, LDN->getMemOperand());
15150 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15151
15152 // For signed conversion, we need to sign-extend the value in the VSR
15153 if (Signed) {
15154 SDValue ExtOps[] = { Ld, WidthConst };
15155 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15156 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15157 } else
15158 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15159 }
15160
15161
15162 // For i32 intermediate values, unfortunately, the conversion functions
15163 // leave the upper 32 bits of the value are undefined. Within the set of
15164 // scalar instructions, we have no method for zero- or sign-extending the
15165 // value. Thus, we cannot handle i32 intermediate values here.
15166 if (Op.getOperand(0).getValueType() == MVT::i32)
15167 return SDValue();
15168
15169 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15170 "UINT_TO_FP is supported only with FPCVT");
15171
15172 // If we have FCFIDS, then use it when converting to single-precision.
15173 // Otherwise, convert to double-precision and then round.
15174 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15175 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15177 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15178 : PPCISD::FCFID);
15179 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15180 ? MVT::f32
15181 : MVT::f64;
15182
15183 // If we're converting from a float, to an int, and back to a float again,
15184 // then we don't need the store/load pair at all.
15185 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15186 Subtarget.hasFPCVT()) ||
15187 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15188 SDValue Src = Op.getOperand(0).getOperand(0);
15189 if (Src.getValueType() == MVT::f32) {
15190 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15191 DCI.AddToWorklist(Src.getNode());
15192 } else if (Src.getValueType() != MVT::f64) {
15193 // Make sure that we don't pick up a ppc_fp128 source value.
15194 return SDValue();
15195 }
15196
15197 unsigned FCTOp =
15198 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15200
15201 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15202 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15203
15204 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15205 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15206 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15207 DCI.AddToWorklist(FP.getNode());
15208 }
15209
15210 return FP;
15211 }
15212
15213 return SDValue();
15214}
15215
15216// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15217// builtins) into loads with swaps.
15219 DAGCombinerInfo &DCI) const {
15220 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15221 // load combines.
15222 if (DCI.isBeforeLegalizeOps())
15223 return SDValue();
15224
15225 SelectionDAG &DAG = DCI.DAG;
15226 SDLoc dl(N);
15227 SDValue Chain;
15228 SDValue Base;
15229 MachineMemOperand *MMO;
15230
15231 switch (N->getOpcode()) {
15232 default:
15233 llvm_unreachable("Unexpected opcode for little endian VSX load");
15234 case ISD::LOAD: {
15235 LoadSDNode *LD = cast<LoadSDNode>(N);
15236 Chain = LD->getChain();
15237 Base = LD->getBasePtr();
15238 MMO = LD->getMemOperand();
15239 // If the MMO suggests this isn't a load of a full vector, leave
15240 // things alone. For a built-in, we have to make the change for
15241 // correctness, so if there is a size problem that will be a bug.
15242 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15243 return SDValue();
15244 break;
15245 }
15247 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15248 Chain = Intrin->getChain();
15249 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15250 // us what we want. Get operand 2 instead.
15251 Base = Intrin->getOperand(2);
15252 MMO = Intrin->getMemOperand();
15253 break;
15254 }
15255 }
15256
15257 MVT VecTy = N->getValueType(0).getSimpleVT();
15258
15259 SDValue LoadOps[] = { Chain, Base };
15261 DAG.getVTList(MVT::v2f64, MVT::Other),
15262 LoadOps, MVT::v2f64, MMO);
15263
15264 DCI.AddToWorklist(Load.getNode());
15265 Chain = Load.getValue(1);
15266 SDValue Swap = DAG.getNode(
15267 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15268 DCI.AddToWorklist(Swap.getNode());
15269
15270 // Add a bitcast if the resulting load type doesn't match v2f64.
15271 if (VecTy != MVT::v2f64) {
15272 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15273 DCI.AddToWorklist(N.getNode());
15274 // Package {bitcast value, swap's chain} to match Load's shape.
15275 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15276 N, Swap.getValue(1));
15277 }
15278
15279 return Swap;
15280}
15281
15282// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15283// builtins) into stores with swaps.
15285 DAGCombinerInfo &DCI) const {
15286 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15287 // store combines.
15288 if (DCI.isBeforeLegalizeOps())
15289 return SDValue();
15290
15291 SelectionDAG &DAG = DCI.DAG;
15292 SDLoc dl(N);
15293 SDValue Chain;
15294 SDValue Base;
15295 unsigned SrcOpnd;
15296 MachineMemOperand *MMO;
15297
15298 switch (N->getOpcode()) {
15299 default:
15300 llvm_unreachable("Unexpected opcode for little endian VSX store");
15301 case ISD::STORE: {
15302 StoreSDNode *ST = cast<StoreSDNode>(N);
15303 Chain = ST->getChain();
15304 Base = ST->getBasePtr();
15305 MMO = ST->getMemOperand();
15306 SrcOpnd = 1;
15307 // If the MMO suggests this isn't a store of a full vector, leave
15308 // things alone. For a built-in, we have to make the change for
15309 // correctness, so if there is a size problem that will be a bug.
15310 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15311 return SDValue();
15312 break;
15313 }
15314 case ISD::INTRINSIC_VOID: {
15315 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15316 Chain = Intrin->getChain();
15317 // Intrin->getBasePtr() oddly does not get what we want.
15318 Base = Intrin->getOperand(3);
15319 MMO = Intrin->getMemOperand();
15320 SrcOpnd = 2;
15321 break;
15322 }
15323 }
15324
15325 SDValue Src = N->getOperand(SrcOpnd);
15326 MVT VecTy = Src.getValueType().getSimpleVT();
15327
15328 // All stores are done as v2f64 and possible bit cast.
15329 if (VecTy != MVT::v2f64) {
15330 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15331 DCI.AddToWorklist(Src.getNode());
15332 }
15333
15334 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15335 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15336 DCI.AddToWorklist(Swap.getNode());
15337 Chain = Swap.getValue(1);
15338 SDValue StoreOps[] = { Chain, Swap, Base };
15340 DAG.getVTList(MVT::Other),
15341 StoreOps, VecTy, MMO);
15342 DCI.AddToWorklist(Store.getNode());
15343 return Store;
15344}
15345
15346// Handle DAG combine for STORE (FP_TO_INT F).
15347SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15348 DAGCombinerInfo &DCI) const {
15349 SelectionDAG &DAG = DCI.DAG;
15350 SDLoc dl(N);
15351 unsigned Opcode = N->getOperand(1).getOpcode();
15352 (void)Opcode;
15353 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15354
15355 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15356 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15357 && "Not a FP_TO_INT Instruction!");
15358
15359 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15360 EVT Op1VT = N->getOperand(1).getValueType();
15361 EVT ResVT = Val.getValueType();
15362
15363 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15364 return SDValue();
15365
15366 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15367 bool ValidTypeForStoreFltAsInt =
15368 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15369 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15370
15371 // TODO: Lower conversion from f128 on all VSX targets
15372 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15373 return SDValue();
15374
15375 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15376 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15377 return SDValue();
15378
15379 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15380
15381 // Set number of bytes being converted.
15382 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15383 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15384 DAG.getIntPtrConstant(ByteSize, dl, false),
15385 DAG.getValueType(Op1VT)};
15386
15388 DAG.getVTList(MVT::Other), Ops,
15389 cast<StoreSDNode>(N)->getMemoryVT(),
15390 cast<StoreSDNode>(N)->getMemOperand());
15391
15392 return Val;
15393}
15394
15395static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15396 // Check that the source of the element keeps flipping
15397 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15398 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15399 for (int i = 1, e = Mask.size(); i < e; i++) {
15400 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15401 return false;
15402 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15403 return false;
15404 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15405 }
15406 return true;
15407}
15408
15409static bool isSplatBV(SDValue Op) {
15410 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15411 return false;
15412 SDValue FirstOp;
15413
15414 // Find first non-undef input.
15415 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15416 FirstOp = Op.getOperand(i);
15417 if (!FirstOp.isUndef())
15418 break;
15419 }
15420
15421 // All inputs are undef or the same as the first non-undef input.
15422 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15423 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15424 return false;
15425 return true;
15426}
15427
15429 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15430 return Op;
15431 if (Op.getOpcode() != ISD::BITCAST)
15432 return SDValue();
15433 Op = Op.getOperand(0);
15434 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15435 return Op;
15436 return SDValue();
15437}
15438
15439// Fix up the shuffle mask to account for the fact that the result of
15440// scalar_to_vector is not in lane zero. This just takes all values in
15441// the ranges specified by the min/max indices and adds the number of
15442// elements required to ensure each element comes from the respective
15443// position in the valid lane.
15444// On little endian, that's just the corresponding element in the other
15445// half of the vector. On big endian, it is in the same half but right
15446// justified rather than left justified in that half.
15448 int LHSMaxIdx, int RHSMinIdx,
15449 int RHSMaxIdx, int HalfVec,
15450 unsigned ValidLaneWidth,
15451 const PPCSubtarget &Subtarget) {
15452 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15453 int Idx = ShuffV[i];
15454 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15455 ShuffV[i] +=
15456 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15457 }
15458}
15459
15460// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15461// the original is:
15462// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15463// In such a case, just change the shuffle mask to extract the element
15464// from the permuted index.
15466 const PPCSubtarget &Subtarget) {
15467 SDLoc dl(OrigSToV);
15468 EVT VT = OrigSToV.getValueType();
15469 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15470 "Expecting a SCALAR_TO_VECTOR here");
15471 SDValue Input = OrigSToV.getOperand(0);
15472
15473 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15474 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15475 SDValue OrigVector = Input.getOperand(0);
15476
15477 // Can't handle non-const element indices or different vector types
15478 // for the input to the extract and the output of the scalar_to_vector.
15479 if (Idx && VT == OrigVector.getValueType()) {
15480 unsigned NumElts = VT.getVectorNumElements();
15481 assert(
15482 NumElts > 1 &&
15483 "Cannot produce a permuted scalar_to_vector for one element vector");
15484 SmallVector<int, 16> NewMask(NumElts, -1);
15485 unsigned ResultInElt = NumElts / 2;
15486 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15487 NewMask[ResultInElt] = Idx->getZExtValue();
15488 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15489 }
15490 }
15491 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15492 OrigSToV.getOperand(0));
15493}
15494
15495// On little endian subtargets, combine shuffles such as:
15496// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15497// into:
15498// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15499// because the latter can be matched to a single instruction merge.
15500// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15501// to put the value into element zero. Adjust the shuffle mask so that the
15502// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15503// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15504// nodes with elements smaller than doubleword because all the ways
15505// of getting scalar data into a vector register put the value in the
15506// rightmost element of the left half of the vector.
15507SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15508 SelectionDAG &DAG) const {
15509 SDValue LHS = SVN->getOperand(0);
15510 SDValue RHS = SVN->getOperand(1);
15511 auto Mask = SVN->getMask();
15512 int NumElts = LHS.getValueType().getVectorNumElements();
15513 SDValue Res(SVN, 0);
15514 SDLoc dl(SVN);
15515 bool IsLittleEndian = Subtarget.isLittleEndian();
15516
15517 // On big endian targets this is only useful for subtargets with direct moves.
15518 // On little endian targets it would be useful for all subtargets with VSX.
15519 // However adding special handling for LE subtargets without direct moves
15520 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15521 // which includes direct moves.
15522 if (!Subtarget.hasDirectMove())
15523 return Res;
15524
15525 // If this is not a shuffle of a shuffle and the first element comes from
15526 // the second vector, canonicalize to the commuted form. This will make it
15527 // more likely to match one of the single instruction patterns.
15528 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15529 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15530 std::swap(LHS, RHS);
15531 Res = DAG.getCommutedVectorShuffle(*SVN);
15532 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15533 }
15534
15535 // Adjust the shuffle mask if either input vector comes from a
15536 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15537 // form (to prevent the need for a swap).
15538 SmallVector<int, 16> ShuffV(Mask);
15539 SDValue SToVLHS = isScalarToVec(LHS);
15540 SDValue SToVRHS = isScalarToVec(RHS);
15541 if (SToVLHS || SToVRHS) {
15542 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15543 // same type and have differing element sizes, then do not perform
15544 // the following transformation. The current transformation for
15545 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15546 // element size. This will be updated in the future to account for
15547 // differing sizes of the LHS and RHS.
15548 if (SToVLHS && SToVRHS &&
15549 (SToVLHS.getValueType().getScalarSizeInBits() !=
15550 SToVRHS.getValueType().getScalarSizeInBits()))
15551 return Res;
15552
15553 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15554 : SToVRHS.getValueType().getVectorNumElements();
15555 int NumEltsOut = ShuffV.size();
15556 // The width of the "valid lane" (i.e. the lane that contains the value that
15557 // is vectorized) needs to be expressed in terms of the number of elements
15558 // of the shuffle. It is thereby the ratio of the values before and after
15559 // any bitcast.
15560 unsigned ValidLaneWidth =
15561 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15562 LHS.getValueType().getScalarSizeInBits()
15563 : SToVRHS.getValueType().getScalarSizeInBits() /
15564 RHS.getValueType().getScalarSizeInBits();
15565
15566 // Initially assume that neither input is permuted. These will be adjusted
15567 // accordingly if either input is.
15568 int LHSMaxIdx = -1;
15569 int RHSMinIdx = -1;
15570 int RHSMaxIdx = -1;
15571 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15572
15573 // Get the permuted scalar to vector nodes for the source(s) that come from
15574 // ISD::SCALAR_TO_VECTOR.
15575 // On big endian systems, this only makes sense for element sizes smaller
15576 // than 64 bits since for 64-bit elements, all instructions already put
15577 // the value into element zero. Since scalar size of LHS and RHS may differ
15578 // after isScalarToVec, this should be checked using their own sizes.
15579 if (SToVLHS) {
15580 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15581 return Res;
15582 // Set up the values for the shuffle vector fixup.
15583 LHSMaxIdx = NumEltsOut / NumEltsIn;
15584 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15585 if (SToVLHS.getValueType() != LHS.getValueType())
15586 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15587 LHS = SToVLHS;
15588 }
15589 if (SToVRHS) {
15590 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15591 return Res;
15592 RHSMinIdx = NumEltsOut;
15593 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15594 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15595 if (SToVRHS.getValueType() != RHS.getValueType())
15596 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15597 RHS = SToVRHS;
15598 }
15599
15600 // Fix up the shuffle mask to reflect where the desired element actually is.
15601 // The minimum and maximum indices that correspond to element zero for both
15602 // the LHS and RHS are computed and will control which shuffle mask entries
15603 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15604 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15605 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15606 HalfVec, ValidLaneWidth, Subtarget);
15607 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15608
15609 // We may have simplified away the shuffle. We won't be able to do anything
15610 // further with it here.
15611 if (!isa<ShuffleVectorSDNode>(Res))
15612 return Res;
15613 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15614 }
15615
15616 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15617 // The common case after we commuted the shuffle is that the RHS is a splat
15618 // and we have elements coming in from the splat at indices that are not
15619 // conducive to using a merge.
15620 // Example:
15621 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15622 if (!isSplatBV(TheSplat))
15623 return Res;
15624
15625 // We are looking for a mask such that all even elements are from
15626 // one vector and all odd elements from the other.
15627 if (!isAlternatingShuffMask(Mask, NumElts))
15628 return Res;
15629
15630 // Adjust the mask so we are pulling in the same index from the splat
15631 // as the index from the interesting vector in consecutive elements.
15632 if (IsLittleEndian) {
15633 // Example (even elements from first vector):
15634 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15635 if (Mask[0] < NumElts)
15636 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15637 if (ShuffV[i] < 0)
15638 continue;
15639 // If element from non-splat is undef, pick first element from splat.
15640 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15641 }
15642 // Example (odd elements from first vector):
15643 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15644 else
15645 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15646 if (ShuffV[i] < 0)
15647 continue;
15648 // If element from non-splat is undef, pick first element from splat.
15649 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15650 }
15651 } else {
15652 // Example (even elements from first vector):
15653 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15654 if (Mask[0] < NumElts)
15655 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15656 if (ShuffV[i] < 0)
15657 continue;
15658 // If element from non-splat is undef, pick first element from splat.
15659 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15660 }
15661 // Example (odd elements from first vector):
15662 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15663 else
15664 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15665 if (ShuffV[i] < 0)
15666 continue;
15667 // If element from non-splat is undef, pick first element from splat.
15668 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15669 }
15670 }
15671
15672 // If the RHS has undefs, we need to remove them since we may have created
15673 // a shuffle that adds those instead of the splat value.
15674 SDValue SplatVal =
15675 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15676 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15677
15678 if (IsLittleEndian)
15679 RHS = TheSplat;
15680 else
15681 LHS = TheSplat;
15682 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15683}
15684
15685SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15686 LSBaseSDNode *LSBase,
15687 DAGCombinerInfo &DCI) const {
15688 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15689 "Not a reverse memop pattern!");
15690
15691 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15692 auto Mask = SVN->getMask();
15693 int i = 0;
15694 auto I = Mask.rbegin();
15695 auto E = Mask.rend();
15696
15697 for (; I != E; ++I) {
15698 if (*I != i)
15699 return false;
15700 i++;
15701 }
15702 return true;
15703 };
15704
15705 SelectionDAG &DAG = DCI.DAG;
15706 EVT VT = SVN->getValueType(0);
15707
15708 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15709 return SDValue();
15710
15711 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15712 // See comment in PPCVSXSwapRemoval.cpp.
15713 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15714 if (!Subtarget.hasP9Vector())
15715 return SDValue();
15716
15717 if(!IsElementReverse(SVN))
15718 return SDValue();
15719
15720 if (LSBase->getOpcode() == ISD::LOAD) {
15721 // If the load return value 0 has more than one user except the
15722 // shufflevector instruction, it is not profitable to replace the
15723 // shufflevector with a reverse load.
15724 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15725 UI != UE; ++UI)
15726 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15727 return SDValue();
15728
15729 SDLoc dl(LSBase);
15730 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15731 return DAG.getMemIntrinsicNode(
15732 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15733 LSBase->getMemoryVT(), LSBase->getMemOperand());
15734 }
15735
15736 if (LSBase->getOpcode() == ISD::STORE) {
15737 // If there are other uses of the shuffle, the swap cannot be avoided.
15738 // Forcing the use of an X-Form (since swapped stores only have
15739 // X-Forms) without removing the swap is unprofitable.
15740 if (!SVN->hasOneUse())
15741 return SDValue();
15742
15743 SDLoc dl(LSBase);
15744 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15745 LSBase->getBasePtr()};
15746 return DAG.getMemIntrinsicNode(
15747 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15748 LSBase->getMemoryVT(), LSBase->getMemOperand());
15749 }
15750
15751 llvm_unreachable("Expected a load or store node here");
15752}
15753
15754static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15755 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15756 if (IntrinsicID == Intrinsic::ppc_stdcx)
15757 StoreWidth = 8;
15758 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15759 StoreWidth = 4;
15760 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15761 StoreWidth = 2;
15762 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15763 StoreWidth = 1;
15764 else
15765 return false;
15766 return true;
15767}
15768
15770 DAGCombinerInfo &DCI) const {
15771 SelectionDAG &DAG = DCI.DAG;
15772 SDLoc dl(N);
15773 switch (N->getOpcode()) {
15774 default: break;
15775 case ISD::ADD:
15776 return combineADD(N, DCI);
15777 case ISD::AND: {
15778 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15779 // original input as that will prevent us from selecting optimal rotates.
15780 // This only matters if the input to the extend is i32 widened to i64.
15781 SDValue Op1 = N->getOperand(0);
15782 SDValue Op2 = N->getOperand(1);
15783 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15784 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15785 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15786 Op1.getOperand(0).getValueType() != MVT::i32)
15787 break;
15788 SDValue NarrowOp = Op1.getOperand(0);
15789 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15790 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15791 break;
15792
15793 uint64_t Imm = Op2->getAsZExtVal();
15794 // Make sure that the constant is narrow enough to fit in the narrow type.
15795 if (!isUInt<32>(Imm))
15796 break;
15797 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15798 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15799 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15800 }
15801 case ISD::SHL:
15802 return combineSHL(N, DCI);
15803 case ISD::SRA:
15804 return combineSRA(N, DCI);
15805 case ISD::SRL:
15806 return combineSRL(N, DCI);
15807 case ISD::MUL:
15808 return combineMUL(N, DCI);
15809 case ISD::FMA:
15810 case PPCISD::FNMSUB:
15811 return combineFMALike(N, DCI);
15812 case PPCISD::SHL:
15813 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15814 return N->getOperand(0);
15815 break;
15816 case PPCISD::SRL:
15817 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15818 return N->getOperand(0);
15819 break;
15820 case PPCISD::SRA:
15821 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15822 if (C->isZero() || // 0 >>s V -> 0.
15823 C->isAllOnes()) // -1 >>s V -> -1.
15824 return N->getOperand(0);
15825 }
15826 break;
15827 case ISD::SIGN_EXTEND:
15828 case ISD::ZERO_EXTEND:
15829 case ISD::ANY_EXTEND:
15830 return DAGCombineExtBoolTrunc(N, DCI);
15831 case ISD::TRUNCATE:
15832 return combineTRUNCATE(N, DCI);
15833 case ISD::SETCC:
15834 if (SDValue CSCC = combineSetCC(N, DCI))
15835 return CSCC;
15836 [[fallthrough]];
15837 case ISD::SELECT_CC:
15838 return DAGCombineTruncBoolExt(N, DCI);
15839 case ISD::SINT_TO_FP:
15840 case ISD::UINT_TO_FP:
15841 return combineFPToIntToFP(N, DCI);
15843 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15844 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15845 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15846 }
15847 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15848 case ISD::STORE: {
15849
15850 EVT Op1VT = N->getOperand(1).getValueType();
15851 unsigned Opcode = N->getOperand(1).getOpcode();
15852
15853 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15854 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15855 SDValue Val = combineStoreFPToInt(N, DCI);
15856 if (Val)
15857 return Val;
15858 }
15859
15860 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15861 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15862 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15863 if (Val)
15864 return Val;
15865 }
15866
15867 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15868 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15869 N->getOperand(1).getNode()->hasOneUse() &&
15870 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15871 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15872
15873 // STBRX can only handle simple types and it makes no sense to store less
15874 // two bytes in byte-reversed order.
15875 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15876 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15877 break;
15878
15879 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15880 // Do an any-extend to 32-bits if this is a half-word input.
15881 if (BSwapOp.getValueType() == MVT::i16)
15882 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15883
15884 // If the type of BSWAP operand is wider than stored memory width
15885 // it need to be shifted to the right side before STBRX.
15886 if (Op1VT.bitsGT(mVT)) {
15887 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15888 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15889 DAG.getConstant(Shift, dl, MVT::i32));
15890 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15891 if (Op1VT == MVT::i64)
15892 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15893 }
15894
15895 SDValue Ops[] = {
15896 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15897 };
15898 return
15899 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15900 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15901 cast<StoreSDNode>(N)->getMemOperand());
15902 }
15903
15904 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15905 // So it can increase the chance of CSE constant construction.
15906 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15907 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15908 // Need to sign-extended to 64-bits to handle negative values.
15909 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15910 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15911 MemVT.getSizeInBits());
15912 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15913
15914 // DAG.getTruncStore() can't be used here because it doesn't accept
15915 // the general (base + offset) addressing mode.
15916 // So we use UpdateNodeOperands and setTruncatingStore instead.
15917 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15918 N->getOperand(3));
15919 cast<StoreSDNode>(N)->setTruncatingStore(true);
15920 return SDValue(N, 0);
15921 }
15922
15923 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15924 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15925 if (Op1VT.isSimple()) {
15926 MVT StoreVT = Op1VT.getSimpleVT();
15927 if (Subtarget.needsSwapsForVSXMemOps() &&
15928 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15929 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15930 return expandVSXStoreForLE(N, DCI);
15931 }
15932 break;
15933 }
15934 case ISD::LOAD: {
15935 LoadSDNode *LD = cast<LoadSDNode>(N);
15936 EVT VT = LD->getValueType(0);
15937
15938 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15939 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15940 if (VT.isSimple()) {
15941 MVT LoadVT = VT.getSimpleVT();
15942 if (Subtarget.needsSwapsForVSXMemOps() &&
15943 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15944 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15945 return expandVSXLoadForLE(N, DCI);
15946 }
15947
15948 // We sometimes end up with a 64-bit integer load, from which we extract
15949 // two single-precision floating-point numbers. This happens with
15950 // std::complex<float>, and other similar structures, because of the way we
15951 // canonicalize structure copies. However, if we lack direct moves,
15952 // then the final bitcasts from the extracted integer values to the
15953 // floating-point numbers turn into store/load pairs. Even with direct moves,
15954 // just loading the two floating-point numbers is likely better.
15955 auto ReplaceTwoFloatLoad = [&]() {
15956 if (VT != MVT::i64)
15957 return false;
15958
15959 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15960 LD->isVolatile())
15961 return false;
15962
15963 // We're looking for a sequence like this:
15964 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15965 // t16: i64 = srl t13, Constant:i32<32>
15966 // t17: i32 = truncate t16
15967 // t18: f32 = bitcast t17
15968 // t19: i32 = truncate t13
15969 // t20: f32 = bitcast t19
15970
15971 if (!LD->hasNUsesOfValue(2, 0))
15972 return false;
15973
15974 auto UI = LD->use_begin();
15975 while (UI.getUse().getResNo() != 0) ++UI;
15976 SDNode *Trunc = *UI++;
15977 while (UI.getUse().getResNo() != 0) ++UI;
15978 SDNode *RightShift = *UI;
15979 if (Trunc->getOpcode() != ISD::TRUNCATE)
15980 std::swap(Trunc, RightShift);
15981
15982 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15983 Trunc->getValueType(0) != MVT::i32 ||
15984 !Trunc->hasOneUse())
15985 return false;
15986 if (RightShift->getOpcode() != ISD::SRL ||
15987 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15988 RightShift->getConstantOperandVal(1) != 32 ||
15989 !RightShift->hasOneUse())
15990 return false;
15991
15992 SDNode *Trunc2 = *RightShift->use_begin();
15993 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15994 Trunc2->getValueType(0) != MVT::i32 ||
15995 !Trunc2->hasOneUse())
15996 return false;
15997
15998 SDNode *Bitcast = *Trunc->use_begin();
15999 SDNode *Bitcast2 = *Trunc2->use_begin();
16000
16001 if (Bitcast->getOpcode() != ISD::BITCAST ||
16002 Bitcast->getValueType(0) != MVT::f32)
16003 return false;
16004 if (Bitcast2->getOpcode() != ISD::BITCAST ||
16005 Bitcast2->getValueType(0) != MVT::f32)
16006 return false;
16007
16008 if (Subtarget.isLittleEndian())
16009 std::swap(Bitcast, Bitcast2);
16010
16011 // Bitcast has the second float (in memory-layout order) and Bitcast2
16012 // has the first one.
16013
16014 SDValue BasePtr = LD->getBasePtr();
16015 if (LD->isIndexed()) {
16016 assert(LD->getAddressingMode() == ISD::PRE_INC &&
16017 "Non-pre-inc AM on PPC?");
16018 BasePtr =
16019 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16020 LD->getOffset());
16021 }
16022
16023 auto MMOFlags =
16024 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16025 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16026 LD->getPointerInfo(), LD->getAlign(),
16027 MMOFlags, LD->getAAInfo());
16028 SDValue AddPtr =
16029 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16030 BasePtr, DAG.getIntPtrConstant(4, dl));
16031 SDValue FloatLoad2 = DAG.getLoad(
16032 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16033 LD->getPointerInfo().getWithOffset(4),
16034 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16035
16036 if (LD->isIndexed()) {
16037 // Note that DAGCombine should re-form any pre-increment load(s) from
16038 // what is produced here if that makes sense.
16039 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16040 }
16041
16042 DCI.CombineTo(Bitcast2, FloatLoad);
16043 DCI.CombineTo(Bitcast, FloatLoad2);
16044
16045 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16046 SDValue(FloatLoad2.getNode(), 1));
16047 return true;
16048 };
16049
16050 if (ReplaceTwoFloatLoad())
16051 return SDValue(N, 0);
16052
16053 EVT MemVT = LD->getMemoryVT();
16054 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16055 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16056 if (LD->isUnindexed() && VT.isVector() &&
16057 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16058 // P8 and later hardware should just use LOAD.
16059 !Subtarget.hasP8Vector() &&
16060 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16061 VT == MVT::v4f32))) &&
16062 LD->getAlign() < ABIAlignment) {
16063 // This is a type-legal unaligned Altivec load.
16064 SDValue Chain = LD->getChain();
16065 SDValue Ptr = LD->getBasePtr();
16066 bool isLittleEndian = Subtarget.isLittleEndian();
16067
16068 // This implements the loading of unaligned vectors as described in
16069 // the venerable Apple Velocity Engine overview. Specifically:
16070 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16071 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16072 //
16073 // The general idea is to expand a sequence of one or more unaligned
16074 // loads into an alignment-based permutation-control instruction (lvsl
16075 // or lvsr), a series of regular vector loads (which always truncate
16076 // their input address to an aligned address), and a series of
16077 // permutations. The results of these permutations are the requested
16078 // loaded values. The trick is that the last "extra" load is not taken
16079 // from the address you might suspect (sizeof(vector) bytes after the
16080 // last requested load), but rather sizeof(vector) - 1 bytes after the
16081 // last requested vector. The point of this is to avoid a page fault if
16082 // the base address happened to be aligned. This works because if the
16083 // base address is aligned, then adding less than a full vector length
16084 // will cause the last vector in the sequence to be (re)loaded.
16085 // Otherwise, the next vector will be fetched as you might suspect was
16086 // necessary.
16087
16088 // We might be able to reuse the permutation generation from
16089 // a different base address offset from this one by an aligned amount.
16090 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16091 // optimization later.
16092 Intrinsic::ID Intr, IntrLD, IntrPerm;
16093 MVT PermCntlTy, PermTy, LDTy;
16094 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16095 : Intrinsic::ppc_altivec_lvsl;
16096 IntrLD = Intrinsic::ppc_altivec_lvx;
16097 IntrPerm = Intrinsic::ppc_altivec_vperm;
16098 PermCntlTy = MVT::v16i8;
16099 PermTy = MVT::v4i32;
16100 LDTy = MVT::v4i32;
16101
16102 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16103
16104 // Create the new MMO for the new base load. It is like the original MMO,
16105 // but represents an area in memory almost twice the vector size centered
16106 // on the original address. If the address is unaligned, we might start
16107 // reading up to (sizeof(vector)-1) bytes below the address of the
16108 // original unaligned load.
16110 MachineMemOperand *BaseMMO =
16111 MF.getMachineMemOperand(LD->getMemOperand(),
16112 -(int64_t)MemVT.getStoreSize()+1,
16113 2*MemVT.getStoreSize()-1);
16114
16115 // Create the new base load.
16116 SDValue LDXIntID =
16117 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16118 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16119 SDValue BaseLoad =
16121 DAG.getVTList(PermTy, MVT::Other),
16122 BaseLoadOps, LDTy, BaseMMO);
16123
16124 // Note that the value of IncOffset (which is provided to the next
16125 // load's pointer info offset value, and thus used to calculate the
16126 // alignment), and the value of IncValue (which is actually used to
16127 // increment the pointer value) are different! This is because we
16128 // require the next load to appear to be aligned, even though it
16129 // is actually offset from the base pointer by a lesser amount.
16130 int IncOffset = VT.getSizeInBits() / 8;
16131 int IncValue = IncOffset;
16132
16133 // Walk (both up and down) the chain looking for another load at the real
16134 // (aligned) offset (the alignment of the other load does not matter in
16135 // this case). If found, then do not use the offset reduction trick, as
16136 // that will prevent the loads from being later combined (as they would
16137 // otherwise be duplicates).
16138 if (!findConsecutiveLoad(LD, DAG))
16139 --IncValue;
16140
16141 SDValue Increment =
16142 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16143 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16144
16145 MachineMemOperand *ExtraMMO =
16146 MF.getMachineMemOperand(LD->getMemOperand(),
16147 1, 2*MemVT.getStoreSize()-1);
16148 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16149 SDValue ExtraLoad =
16151 DAG.getVTList(PermTy, MVT::Other),
16152 ExtraLoadOps, LDTy, ExtraMMO);
16153
16154 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16155 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16156
16157 // Because vperm has a big-endian bias, we must reverse the order
16158 // of the input vectors and complement the permute control vector
16159 // when generating little endian code. We have already handled the
16160 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16161 // and ExtraLoad here.
16162 SDValue Perm;
16163 if (isLittleEndian)
16164 Perm = BuildIntrinsicOp(IntrPerm,
16165 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16166 else
16167 Perm = BuildIntrinsicOp(IntrPerm,
16168 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16169
16170 if (VT != PermTy)
16171 Perm = Subtarget.hasAltivec()
16172 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16173 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16174 DAG.getTargetConstant(1, dl, MVT::i64));
16175 // second argument is 1 because this rounding
16176 // is always exact.
16177
16178 // The output of the permutation is our loaded result, the TokenFactor is
16179 // our new chain.
16180 DCI.CombineTo(N, Perm, TF);
16181 return SDValue(N, 0);
16182 }
16183 }
16184 break;
16186 bool isLittleEndian = Subtarget.isLittleEndian();
16187 unsigned IID = N->getConstantOperandVal(0);
16188 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16189 : Intrinsic::ppc_altivec_lvsl);
16190 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16191 SDValue Add = N->getOperand(1);
16192
16193 int Bits = 4 /* 16 byte alignment */;
16194
16195 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16196 APInt::getAllOnes(Bits /* alignment */)
16197 .zext(Add.getScalarValueSizeInBits()))) {
16198 SDNode *BasePtr = Add->getOperand(0).getNode();
16199 for (SDNode *U : BasePtr->uses()) {
16200 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16201 U->getConstantOperandVal(0) == IID) {
16202 // We've found another LVSL/LVSR, and this address is an aligned
16203 // multiple of that one. The results will be the same, so use the
16204 // one we've just found instead.
16205
16206 return SDValue(U, 0);
16207 }
16208 }
16209 }
16210
16211 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16212 SDNode *BasePtr = Add->getOperand(0).getNode();
16213 for (SDNode *U : BasePtr->uses()) {
16214 if (U->getOpcode() == ISD::ADD &&
16215 isa<ConstantSDNode>(U->getOperand(1)) &&
16216 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16217 (1ULL << Bits) ==
16218 0) {
16219 SDNode *OtherAdd = U;
16220 for (SDNode *V : OtherAdd->uses()) {
16221 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16222 V->getConstantOperandVal(0) == IID) {
16223 return SDValue(V, 0);
16224 }
16225 }
16226 }
16227 }
16228 }
16229 }
16230
16231 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16232 // Expose the vabsduw/h/b opportunity for down stream
16233 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16234 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16235 IID == Intrinsic::ppc_altivec_vmaxsh ||
16236 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16237 SDValue V1 = N->getOperand(1);
16238 SDValue V2 = N->getOperand(2);
16239 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16240 V1.getSimpleValueType() == MVT::v8i16 ||
16241 V1.getSimpleValueType() == MVT::v16i8) &&
16242 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16243 // (0-a, a)
16244 if (V1.getOpcode() == ISD::SUB &&
16246 V1.getOperand(1) == V2) {
16247 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16248 }
16249 // (a, 0-a)
16250 if (V2.getOpcode() == ISD::SUB &&
16251 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16252 V2.getOperand(1) == V1) {
16253 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16254 }
16255 // (x-y, y-x)
16256 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16257 V1.getOperand(0) == V2.getOperand(1) &&
16258 V1.getOperand(1) == V2.getOperand(0)) {
16259 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16260 }
16261 }
16262 }
16263 }
16264
16265 break;
16267 switch (N->getConstantOperandVal(1)) {
16268 default:
16269 break;
16270 case Intrinsic::ppc_altivec_vsum4sbs:
16271 case Intrinsic::ppc_altivec_vsum4shs:
16272 case Intrinsic::ppc_altivec_vsum4ubs: {
16273 // These sum-across intrinsics only have a chain due to the side effect
16274 // that they may set the SAT bit. If we know the SAT bit will not be set
16275 // for some inputs, we can replace any uses of their chain with the
16276 // input chain.
16277 if (BuildVectorSDNode *BVN =
16278 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16279 APInt APSplatBits, APSplatUndef;
16280 unsigned SplatBitSize;
16281 bool HasAnyUndefs;
16282 bool BVNIsConstantSplat = BVN->isConstantSplat(
16283 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16284 !Subtarget.isLittleEndian());
16285 // If the constant splat vector is 0, the SAT bit will not be set.
16286 if (BVNIsConstantSplat && APSplatBits == 0)
16287 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16288 }
16289 return SDValue();
16290 }
16291 case Intrinsic::ppc_vsx_lxvw4x:
16292 case Intrinsic::ppc_vsx_lxvd2x:
16293 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16294 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16295 if (Subtarget.needsSwapsForVSXMemOps())
16296 return expandVSXLoadForLE(N, DCI);
16297 break;
16298 }
16299 break;
16301 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16302 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16303 if (Subtarget.needsSwapsForVSXMemOps()) {
16304 switch (N->getConstantOperandVal(1)) {
16305 default:
16306 break;
16307 case Intrinsic::ppc_vsx_stxvw4x:
16308 case Intrinsic::ppc_vsx_stxvd2x:
16309 return expandVSXStoreForLE(N, DCI);
16310 }
16311 }
16312 break;
16313 case ISD::BSWAP: {
16314 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16315 // For subtargets without LDBRX, we can still do better than the default
16316 // expansion even for 64-bit BSWAP (LOAD).
16317 bool Is64BitBswapOn64BitTgt =
16318 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16319 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16320 N->getOperand(0).hasOneUse();
16321 if (IsSingleUseNormalLd &&
16322 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16323 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16324 SDValue Load = N->getOperand(0);
16325 LoadSDNode *LD = cast<LoadSDNode>(Load);
16326 // Create the byte-swapping load.
16327 SDValue Ops[] = {
16328 LD->getChain(), // Chain
16329 LD->getBasePtr(), // Ptr
16330 DAG.getValueType(N->getValueType(0)) // VT
16331 };
16332 SDValue BSLoad =
16334 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16335 MVT::i64 : MVT::i32, MVT::Other),
16336 Ops, LD->getMemoryVT(), LD->getMemOperand());
16337
16338 // If this is an i16 load, insert the truncate.
16339 SDValue ResVal = BSLoad;
16340 if (N->getValueType(0) == MVT::i16)
16341 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16342
16343 // First, combine the bswap away. This makes the value produced by the
16344 // load dead.
16345 DCI.CombineTo(N, ResVal);
16346
16347 // Next, combine the load away, we give it a bogus result value but a real
16348 // chain result. The result value is dead because the bswap is dead.
16349 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16350
16351 // Return N so it doesn't get rechecked!
16352 return SDValue(N, 0);
16353 }
16354 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16355 // before legalization so that the BUILD_PAIR is handled correctly.
16356 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16357 !IsSingleUseNormalLd)
16358 return SDValue();
16359 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16360
16361 // Can't split volatile or atomic loads.
16362 if (!LD->isSimple())
16363 return SDValue();
16364 SDValue BasePtr = LD->getBasePtr();
16365 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16366 LD->getPointerInfo(), LD->getAlign());
16367 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16368 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16369 DAG.getIntPtrConstant(4, dl));
16371 LD->getMemOperand(), 4, 4);
16372 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16373 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16374 SDValue Res;
16375 if (Subtarget.isLittleEndian())
16376 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16377 else
16378 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16379 SDValue TF =
16380 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16381 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16382 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16383 return Res;
16384 }
16385 case PPCISD::VCMP:
16386 // If a VCMP_rec node already exists with exactly the same operands as this
16387 // node, use its result instead of this node (VCMP_rec computes both a CR6
16388 // and a normal output).
16389 //
16390 if (!N->getOperand(0).hasOneUse() &&
16391 !N->getOperand(1).hasOneUse() &&
16392 !N->getOperand(2).hasOneUse()) {
16393
16394 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16395 SDNode *VCMPrecNode = nullptr;
16396
16397 SDNode *LHSN = N->getOperand(0).getNode();
16398 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16399 UI != E; ++UI)
16400 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16401 UI->getOperand(1) == N->getOperand(1) &&
16402 UI->getOperand(2) == N->getOperand(2) &&
16403 UI->getOperand(0) == N->getOperand(0)) {
16404 VCMPrecNode = *UI;
16405 break;
16406 }
16407
16408 // If there is no VCMP_rec node, or if the flag value has a single use,
16409 // don't transform this.
16410 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16411 break;
16412
16413 // Look at the (necessarily single) use of the flag value. If it has a
16414 // chain, this transformation is more complex. Note that multiple things
16415 // could use the value result, which we should ignore.
16416 SDNode *FlagUser = nullptr;
16417 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16418 FlagUser == nullptr; ++UI) {
16419 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16420 SDNode *User = *UI;
16421 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16422 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16423 FlagUser = User;
16424 break;
16425 }
16426 }
16427 }
16428
16429 // If the user is a MFOCRF instruction, we know this is safe.
16430 // Otherwise we give up for right now.
16431 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16432 return SDValue(VCMPrecNode, 0);
16433 }
16434 break;
16435 case ISD::BR_CC: {
16436 // If this is a branch on an altivec predicate comparison, lower this so
16437 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16438 // lowering is done pre-legalize, because the legalizer lowers the predicate
16439 // compare down to code that is difficult to reassemble.
16440 // This code also handles branches that depend on the result of a store
16441 // conditional.
16442 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16443 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16444
16445 int CompareOpc;
16446 bool isDot;
16447
16448 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16449 break;
16450
16451 // Since we are doing this pre-legalize, the RHS can be a constant of
16452 // arbitrary bitwidth which may cause issues when trying to get the value
16453 // from the underlying APInt.
16454 auto RHSAPInt = RHS->getAsAPIntVal();
16455 if (!RHSAPInt.isIntN(64))
16456 break;
16457
16458 unsigned Val = RHSAPInt.getZExtValue();
16459 auto isImpossibleCompare = [&]() {
16460 // If this is a comparison against something other than 0/1, then we know
16461 // that the condition is never/always true.
16462 if (Val != 0 && Val != 1) {
16463 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16464 return N->getOperand(0);
16465 // Always !=, turn it into an unconditional branch.
16466 return DAG.getNode(ISD::BR, dl, MVT::Other,
16467 N->getOperand(0), N->getOperand(4));
16468 }
16469 return SDValue();
16470 };
16471 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16472 unsigned StoreWidth = 0;
16473 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16474 isStoreConditional(LHS, StoreWidth)) {
16475 if (SDValue Impossible = isImpossibleCompare())
16476 return Impossible;
16477 PPC::Predicate CompOpc;
16478 // eq 0 => ne
16479 // ne 0 => eq
16480 // eq 1 => eq
16481 // ne 1 => ne
16482 if (Val == 0)
16483 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16484 else
16485 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16486
16487 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16488 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16489 auto *MemNode = cast<MemSDNode>(LHS);
16490 SDValue ConstSt = DAG.getMemIntrinsicNode(
16492 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16493 MemNode->getMemoryVT(), MemNode->getMemOperand());
16494
16495 SDValue InChain;
16496 // Unchain the branch from the original store conditional.
16497 if (N->getOperand(0) == LHS.getValue(1))
16498 InChain = LHS.getOperand(0);
16499 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16500 SmallVector<SDValue, 4> InChains;
16501 SDValue InTF = N->getOperand(0);
16502 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16503 if (InTF.getOperand(i) != LHS.getValue(1))
16504 InChains.push_back(InTF.getOperand(i));
16505 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16506 }
16507
16508 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16509 DAG.getConstant(CompOpc, dl, MVT::i32),
16510 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16511 ConstSt.getValue(2));
16512 }
16513
16514 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16515 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16516 assert(isDot && "Can't compare against a vector result!");
16517
16518 if (SDValue Impossible = isImpossibleCompare())
16519 return Impossible;
16520
16521 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16522 // Create the PPCISD altivec 'dot' comparison node.
16523 SDValue Ops[] = {
16524 LHS.getOperand(2), // LHS of compare
16525 LHS.getOperand(3), // RHS of compare
16526 DAG.getConstant(CompareOpc, dl, MVT::i32)
16527 };
16528 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16529 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16530
16531 // Unpack the result based on how the target uses it.
16532 PPC::Predicate CompOpc;
16533 switch (LHS.getConstantOperandVal(1)) {
16534 default: // Can't happen, don't crash on invalid number though.
16535 case 0: // Branch on the value of the EQ bit of CR6.
16536 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16537 break;
16538 case 1: // Branch on the inverted value of the EQ bit of CR6.
16539 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16540 break;
16541 case 2: // Branch on the value of the LT bit of CR6.
16542 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16543 break;
16544 case 3: // Branch on the inverted value of the LT bit of CR6.
16545 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16546 break;
16547 }
16548
16549 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16550 DAG.getConstant(CompOpc, dl, MVT::i32),
16551 DAG.getRegister(PPC::CR6, MVT::i32),
16552 N->getOperand(4), CompNode.getValue(1));
16553 }
16554 break;
16555 }
16556 case ISD::BUILD_VECTOR:
16557 return DAGCombineBuildVector(N, DCI);
16558 }
16559
16560 return SDValue();
16561}
16562
16563SDValue
16565 SelectionDAG &DAG,
16566 SmallVectorImpl<SDNode *> &Created) const {
16567 // fold (sdiv X, pow2)
16568 EVT VT = N->getValueType(0);
16569 if (VT == MVT::i64 && !Subtarget.isPPC64())
16570 return SDValue();
16571 if ((VT != MVT::i32 && VT != MVT::i64) ||
16572 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16573 return SDValue();
16574
16575 SDLoc DL(N);
16576 SDValue N0 = N->getOperand(0);
16577
16578 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16579 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16580 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16581
16582 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16583 Created.push_back(Op.getNode());
16584
16585 if (IsNegPow2) {
16586 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16587 Created.push_back(Op.getNode());
16588 }
16589
16590 return Op;
16591}
16592
16593//===----------------------------------------------------------------------===//
16594// Inline Assembly Support
16595//===----------------------------------------------------------------------===//
16596
16598 KnownBits &Known,
16599 const APInt &DemandedElts,
16600 const SelectionDAG &DAG,
16601 unsigned Depth) const {
16602 Known.resetAll();
16603 switch (Op.getOpcode()) {
16604 default: break;
16605 case PPCISD::LBRX: {
16606 // lhbrx is known to have the top bits cleared out.
16607 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16608 Known.Zero = 0xFFFF0000;
16609 break;
16610 }
16612 switch (Op.getConstantOperandVal(0)) {
16613 default: break;
16614 case Intrinsic::ppc_altivec_vcmpbfp_p:
16615 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16616 case Intrinsic::ppc_altivec_vcmpequb_p:
16617 case Intrinsic::ppc_altivec_vcmpequh_p:
16618 case Intrinsic::ppc_altivec_vcmpequw_p:
16619 case Intrinsic::ppc_altivec_vcmpequd_p:
16620 case Intrinsic::ppc_altivec_vcmpequq_p:
16621 case Intrinsic::ppc_altivec_vcmpgefp_p:
16622 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16623 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16624 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16625 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16626 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16627 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16628 case Intrinsic::ppc_altivec_vcmpgtub_p:
16629 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16630 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16631 case Intrinsic::ppc_altivec_vcmpgtud_p:
16632 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16633 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16634 break;
16635 }
16636 break;
16637 }
16639 switch (Op.getConstantOperandVal(1)) {
16640 default:
16641 break;
16642 case Intrinsic::ppc_load2r:
16643 // Top bits are cleared for load2r (which is the same as lhbrx).
16644 Known.Zero = 0xFFFF0000;
16645 break;
16646 }
16647 break;
16648 }
16649 }
16650}
16651
16653 switch (Subtarget.getCPUDirective()) {
16654 default: break;
16655 case PPC::DIR_970:
16656 case PPC::DIR_PWR4:
16657 case PPC::DIR_PWR5:
16658 case PPC::DIR_PWR5X:
16659 case PPC::DIR_PWR6:
16660 case PPC::DIR_PWR6X:
16661 case PPC::DIR_PWR7:
16662 case PPC::DIR_PWR8:
16663 case PPC::DIR_PWR9:
16664 case PPC::DIR_PWR10:
16665 case PPC::DIR_PWR_FUTURE: {
16666 if (!ML)
16667 break;
16668
16670 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16671 // so that we can decrease cache misses and branch-prediction misses.
16672 // Actual alignment of the loop will depend on the hotness check and other
16673 // logic in alignBlocks.
16674 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16675 return Align(32);
16676 }
16677
16678 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16679
16680 // For small loops (between 5 and 8 instructions), align to a 32-byte
16681 // boundary so that the entire loop fits in one instruction-cache line.
16682 uint64_t LoopSize = 0;
16683 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16684 for (const MachineInstr &J : **I) {
16685 LoopSize += TII->getInstSizeInBytes(J);
16686 if (LoopSize > 32)
16687 break;
16688 }
16689
16690 if (LoopSize > 16 && LoopSize <= 32)
16691 return Align(32);
16692
16693 break;
16694 }
16695 }
16696
16698}
16699
16700/// getConstraintType - Given a constraint, return the type of
16701/// constraint it is for this target.
16704 if (Constraint.size() == 1) {
16705 switch (Constraint[0]) {
16706 default: break;
16707 case 'b':
16708 case 'r':
16709 case 'f':
16710 case 'd':
16711 case 'v':
16712 case 'y':
16713 return C_RegisterClass;
16714 case 'Z':
16715 // FIXME: While Z does indicate a memory constraint, it specifically
16716 // indicates an r+r address (used in conjunction with the 'y' modifier
16717 // in the replacement string). Currently, we're forcing the base
16718 // register to be r0 in the asm printer (which is interpreted as zero)
16719 // and forming the complete address in the second register. This is
16720 // suboptimal.
16721 return C_Memory;
16722 }
16723 } else if (Constraint == "wc") { // individual CR bits.
16724 return C_RegisterClass;
16725 } else if (Constraint == "wa" || Constraint == "wd" ||
16726 Constraint == "wf" || Constraint == "ws" ||
16727 Constraint == "wi" || Constraint == "ww") {
16728 return C_RegisterClass; // VSX registers.
16729 }
16730 return TargetLowering::getConstraintType(Constraint);
16731}
16732
16733/// Examine constraint type and operand type and determine a weight value.
16734/// This object must already have been set up with the operand type
16735/// and the current alternative constraint selected.
16738 AsmOperandInfo &info, const char *constraint) const {
16740 Value *CallOperandVal = info.CallOperandVal;
16741 // If we don't have a value, we can't do a match,
16742 // but allow it at the lowest weight.
16743 if (!CallOperandVal)
16744 return CW_Default;
16745 Type *type = CallOperandVal->getType();
16746
16747 // Look at the constraint type.
16748 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16749 return CW_Register; // an individual CR bit.
16750 else if ((StringRef(constraint) == "wa" ||
16751 StringRef(constraint) == "wd" ||
16752 StringRef(constraint) == "wf") &&
16753 type->isVectorTy())
16754 return CW_Register;
16755 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16756 return CW_Register; // just hold 64-bit integers data.
16757 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16758 return CW_Register;
16759 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16760 return CW_Register;
16761
16762 switch (*constraint) {
16763 default:
16765 break;
16766 case 'b':
16767 if (type->isIntegerTy())
16768 weight = CW_Register;
16769 break;
16770 case 'f':
16771 if (type->isFloatTy())
16772 weight = CW_Register;
16773 break;
16774 case 'd':
16775 if (type->isDoubleTy())
16776 weight = CW_Register;
16777 break;
16778 case 'v':
16779 if (type->isVectorTy())
16780 weight = CW_Register;
16781 break;
16782 case 'y':
16783 weight = CW_Register;
16784 break;
16785 case 'Z':
16786 weight = CW_Memory;
16787 break;
16788 }
16789 return weight;
16790}
16791
16792std::pair<unsigned, const TargetRegisterClass *>
16794 StringRef Constraint,
16795 MVT VT) const {
16796 if (Constraint.size() == 1) {
16797 // GCC RS6000 Constraint Letters
16798 switch (Constraint[0]) {
16799 case 'b': // R1-R31
16800 if (VT == MVT::i64 && Subtarget.isPPC64())
16801 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16802 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16803 case 'r': // R0-R31
16804 if (VT == MVT::i64 && Subtarget.isPPC64())
16805 return std::make_pair(0U, &PPC::G8RCRegClass);
16806 return std::make_pair(0U, &PPC::GPRCRegClass);
16807 // 'd' and 'f' constraints are both defined to be "the floating point
16808 // registers", where one is for 32-bit and the other for 64-bit. We don't
16809 // really care overly much here so just give them all the same reg classes.
16810 case 'd':
16811 case 'f':
16812 if (Subtarget.hasSPE()) {
16813 if (VT == MVT::f32 || VT == MVT::i32)
16814 return std::make_pair(0U, &PPC::GPRCRegClass);
16815 if (VT == MVT::f64 || VT == MVT::i64)
16816 return std::make_pair(0U, &PPC::SPERCRegClass);
16817 } else {
16818 if (VT == MVT::f32 || VT == MVT::i32)
16819 return std::make_pair(0U, &PPC::F4RCRegClass);
16820 if (VT == MVT::f64 || VT == MVT::i64)
16821 return std::make_pair(0U, &PPC::F8RCRegClass);
16822 }
16823 break;
16824 case 'v':
16825 if (Subtarget.hasAltivec() && VT.isVector())
16826 return std::make_pair(0U, &PPC::VRRCRegClass);
16827 else if (Subtarget.hasVSX())
16828 // Scalars in Altivec registers only make sense with VSX.
16829 return std::make_pair(0U, &PPC::VFRCRegClass);
16830 break;
16831 case 'y': // crrc
16832 return std::make_pair(0U, &PPC::CRRCRegClass);
16833 }
16834 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16835 // An individual CR bit.
16836 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16837 } else if ((Constraint == "wa" || Constraint == "wd" ||
16838 Constraint == "wf" || Constraint == "wi") &&
16839 Subtarget.hasVSX()) {
16840 // A VSX register for either a scalar (FP) or vector. There is no
16841 // support for single precision scalars on subtargets prior to Power8.
16842 if (VT.isVector())
16843 return std::make_pair(0U, &PPC::VSRCRegClass);
16844 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16845 return std::make_pair(0U, &PPC::VSSRCRegClass);
16846 return std::make_pair(0U, &PPC::VSFRCRegClass);
16847 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16848 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16849 return std::make_pair(0U, &PPC::VSSRCRegClass);
16850 else
16851 return std::make_pair(0U, &PPC::VSFRCRegClass);
16852 } else if (Constraint == "lr") {
16853 if (VT == MVT::i64)
16854 return std::make_pair(0U, &PPC::LR8RCRegClass);
16855 else
16856 return std::make_pair(0U, &PPC::LRRCRegClass);
16857 }
16858
16859 // Handle special cases of physical registers that are not properly handled
16860 // by the base class.
16861 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16862 // If we name a VSX register, we can't defer to the base class because it
16863 // will not recognize the correct register (their names will be VSL{0-31}
16864 // and V{0-31} so they won't match). So we match them here.
16865 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16866 int VSNum = atoi(Constraint.data() + 3);
16867 assert(VSNum >= 0 && VSNum <= 63 &&
16868 "Attempted to access a vsr out of range");
16869 if (VSNum < 32)
16870 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16871 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16872 }
16873
16874 // For float registers, we can't defer to the base class as it will match
16875 // the SPILLTOVSRRC class.
16876 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16877 int RegNum = atoi(Constraint.data() + 2);
16878 if (RegNum > 31 || RegNum < 0)
16879 report_fatal_error("Invalid floating point register number");
16880 if (VT == MVT::f32 || VT == MVT::i32)
16881 return Subtarget.hasSPE()
16882 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16883 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16884 if (VT == MVT::f64 || VT == MVT::i64)
16885 return Subtarget.hasSPE()
16886 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16887 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16888 }
16889 }
16890
16891 std::pair<unsigned, const TargetRegisterClass *> R =
16893
16894 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16895 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16896 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16897 // register.
16898 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16899 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16900 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16901 PPC::GPRCRegClass.contains(R.first))
16902 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16903 PPC::sub_32, &PPC::G8RCRegClass),
16904 &PPC::G8RCRegClass);
16905
16906 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16907 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16908 R.first = PPC::CR0;
16909 R.second = &PPC::CRRCRegClass;
16910 }
16911 // FIXME: This warning should ideally be emitted in the front end.
16912 const auto &TM = getTargetMachine();
16913 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16914 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16915 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16916 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16917 errs() << "warning: vector registers 20 to 32 are reserved in the "
16918 "default AIX AltiVec ABI and cannot be used\n";
16919 }
16920
16921 return R;
16922}
16923
16924/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16925/// vector. If it is invalid, don't add anything to Ops.
16927 StringRef Constraint,
16928 std::vector<SDValue> &Ops,
16929 SelectionDAG &DAG) const {
16930 SDValue Result;
16931
16932 // Only support length 1 constraints.
16933 if (Constraint.size() > 1)
16934 return;
16935
16936 char Letter = Constraint[0];
16937 switch (Letter) {
16938 default: break;
16939 case 'I':
16940 case 'J':
16941 case 'K':
16942 case 'L':
16943 case 'M':
16944 case 'N':
16945 case 'O':
16946 case 'P': {
16947 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16948 if (!CST) return; // Must be an immediate to match.
16949 SDLoc dl(Op);
16950 int64_t Value = CST->getSExtValue();
16951 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16952 // numbers are printed as such.
16953 switch (Letter) {
16954 default: llvm_unreachable("Unknown constraint letter!");
16955 case 'I': // "I" is a signed 16-bit constant.
16956 if (isInt<16>(Value))
16957 Result = DAG.getTargetConstant(Value, dl, TCVT);
16958 break;
16959 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16960 if (isShiftedUInt<16, 16>(Value))
16961 Result = DAG.getTargetConstant(Value, dl, TCVT);
16962 break;
16963 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16964 if (isShiftedInt<16, 16>(Value))
16965 Result = DAG.getTargetConstant(Value, dl, TCVT);
16966 break;
16967 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16968 if (isUInt<16>(Value))
16969 Result = DAG.getTargetConstant(Value, dl, TCVT);
16970 break;
16971 case 'M': // "M" is a constant that is greater than 31.
16972 if (Value > 31)
16973 Result = DAG.getTargetConstant(Value, dl, TCVT);
16974 break;
16975 case 'N': // "N" is a positive constant that is an exact power of two.
16976 if (Value > 0 && isPowerOf2_64(Value))
16977 Result = DAG.getTargetConstant(Value, dl, TCVT);
16978 break;
16979 case 'O': // "O" is the constant zero.
16980 if (Value == 0)
16981 Result = DAG.getTargetConstant(Value, dl, TCVT);
16982 break;
16983 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16984 if (isInt<16>(-Value))
16985 Result = DAG.getTargetConstant(Value, dl, TCVT);
16986 break;
16987 }
16988 break;
16989 }
16990 }
16991
16992 if (Result.getNode()) {
16993 Ops.push_back(Result);
16994 return;
16995 }
16996
16997 // Handle standard constraint letters.
16998 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16999}
17000
17003 SelectionDAG &DAG) const {
17004 if (I.getNumOperands() <= 1)
17005 return;
17006 if (!isa<ConstantSDNode>(Ops[1].getNode()))
17007 return;
17008 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17009 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17010 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17011 return;
17012
17013 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17014 Ops.push_back(DAG.getMDNode(MDN));
17015}
17016
17017// isLegalAddressingMode - Return true if the addressing mode represented
17018// by AM is legal for this target, for a load/store of the specified type.
17020 const AddrMode &AM, Type *Ty,
17021 unsigned AS,
17022 Instruction *I) const {
17023 // Vector type r+i form is supported since power9 as DQ form. We don't check
17024 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17025 // imm form is preferred and the offset can be adjusted to use imm form later
17026 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17027 // max offset to check legal addressing mode, we should be a little aggressive
17028 // to contain other offsets for that LSRUse.
17029 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17030 return false;
17031
17032 // PPC allows a sign-extended 16-bit immediate field.
17033 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17034 return false;
17035
17036 // No global is ever allowed as a base.
17037 if (AM.BaseGV)
17038 return false;
17039
17040 // PPC only support r+r,
17041 switch (AM.Scale) {
17042 case 0: // "r+i" or just "i", depending on HasBaseReg.
17043 break;
17044 case 1:
17045 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
17046 return false;
17047 // Otherwise we have r+r or r+i.
17048 break;
17049 case 2:
17050 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
17051 return false;
17052 // Allow 2*r as r+r.
17053 break;
17054 default:
17055 // No other scales are supported.
17056 return false;
17057 }
17058
17059 return true;
17060}
17061
17062SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17063 SelectionDAG &DAG) const {
17065 MachineFrameInfo &MFI = MF.getFrameInfo();
17066 MFI.setReturnAddressIsTaken(true);
17067
17069 return SDValue();
17070
17071 SDLoc dl(Op);
17072 unsigned Depth = Op.getConstantOperandVal(0);
17073
17074 // Make sure the function does not optimize away the store of the RA to
17075 // the stack.
17076 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17077 FuncInfo->setLRStoreRequired();
17078 bool isPPC64 = Subtarget.isPPC64();
17079 auto PtrVT = getPointerTy(MF.getDataLayout());
17080
17081 if (Depth > 0) {
17082 // The link register (return address) is saved in the caller's frame
17083 // not the callee's stack frame. So we must get the caller's frame
17084 // address and load the return address at the LR offset from there.
17085 SDValue FrameAddr =
17086 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17087 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17088 SDValue Offset =
17089 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17090 isPPC64 ? MVT::i64 : MVT::i32);
17091 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17092 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17094 }
17095
17096 // Just load the return address off the stack.
17097 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17098 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17100}
17101
17102SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17103 SelectionDAG &DAG) const {
17104 SDLoc dl(Op);
17105 unsigned Depth = Op.getConstantOperandVal(0);
17106
17108 MachineFrameInfo &MFI = MF.getFrameInfo();
17109 MFI.setFrameAddressIsTaken(true);
17110
17111 EVT PtrVT = getPointerTy(MF.getDataLayout());
17112 bool isPPC64 = PtrVT == MVT::i64;
17113
17114 // Naked functions never have a frame pointer, and so we use r1. For all
17115 // other functions, this decision must be delayed until during PEI.
17116 unsigned FrameReg;
17117 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17118 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17119 else
17120 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17121
17122 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17123 PtrVT);
17124 while (Depth--)
17125 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17126 FrameAddr, MachinePointerInfo());
17127 return FrameAddr;
17128}
17129
17130// FIXME? Maybe this could be a TableGen attribute on some registers and
17131// this table could be generated automatically from RegInfo.
17133 const MachineFunction &MF) const {
17134 bool isPPC64 = Subtarget.isPPC64();
17135
17136 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17137 if (!is64Bit && VT != LLT::scalar(32))
17138 report_fatal_error("Invalid register global variable type");
17139
17141 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17142 .Case("r2", isPPC64 ? Register() : PPC::R2)
17143 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17144 .Default(Register());
17145
17146 if (Reg)
17147 return Reg;
17148 report_fatal_error("Invalid register name global variable");
17149}
17150
17152 // 32-bit SVR4 ABI access everything as got-indirect.
17153 if (Subtarget.is32BitELFABI())
17154 return true;
17155
17156 // AIX accesses everything indirectly through the TOC, which is similar to
17157 // the GOT.
17158 if (Subtarget.isAIXABI())
17159 return true;
17160
17162 // If it is small or large code model, module locals are accessed
17163 // indirectly by loading their address from .toc/.got.
17164 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17165 return true;
17166
17167 // JumpTable and BlockAddress are accessed as got-indirect.
17168 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17169 return true;
17170
17171 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17172 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17173
17174 return false;
17175}
17176
17177bool
17179 // The PowerPC target isn't yet aware of offsets.
17180 return false;
17181}
17182
17184 const CallInst &I,
17185 MachineFunction &MF,
17186 unsigned Intrinsic) const {
17187 switch (Intrinsic) {
17188 case Intrinsic::ppc_atomicrmw_xchg_i128:
17189 case Intrinsic::ppc_atomicrmw_add_i128:
17190 case Intrinsic::ppc_atomicrmw_sub_i128:
17191 case Intrinsic::ppc_atomicrmw_nand_i128:
17192 case Intrinsic::ppc_atomicrmw_and_i128:
17193 case Intrinsic::ppc_atomicrmw_or_i128:
17194 case Intrinsic::ppc_atomicrmw_xor_i128:
17195 case Intrinsic::ppc_cmpxchg_i128:
17197 Info.memVT = MVT::i128;
17198 Info.ptrVal = I.getArgOperand(0);
17199 Info.offset = 0;
17200 Info.align = Align(16);
17203 return true;
17204 case Intrinsic::ppc_atomic_load_i128:
17206 Info.memVT = MVT::i128;
17207 Info.ptrVal = I.getArgOperand(0);
17208 Info.offset = 0;
17209 Info.align = Align(16);
17211 return true;
17212 case Intrinsic::ppc_atomic_store_i128:
17214 Info.memVT = MVT::i128;
17215 Info.ptrVal = I.getArgOperand(2);
17216 Info.offset = 0;
17217 Info.align = Align(16);
17219 return true;
17220 case Intrinsic::ppc_altivec_lvx:
17221 case Intrinsic::ppc_altivec_lvxl:
17222 case Intrinsic::ppc_altivec_lvebx:
17223 case Intrinsic::ppc_altivec_lvehx:
17224 case Intrinsic::ppc_altivec_lvewx:
17225 case Intrinsic::ppc_vsx_lxvd2x:
17226 case Intrinsic::ppc_vsx_lxvw4x:
17227 case Intrinsic::ppc_vsx_lxvd2x_be:
17228 case Intrinsic::ppc_vsx_lxvw4x_be:
17229 case Intrinsic::ppc_vsx_lxvl:
17230 case Intrinsic::ppc_vsx_lxvll: {
17231 EVT VT;
17232 switch (Intrinsic) {
17233 case Intrinsic::ppc_altivec_lvebx:
17234 VT = MVT::i8;
17235 break;
17236 case Intrinsic::ppc_altivec_lvehx:
17237 VT = MVT::i16;
17238 break;
17239 case Intrinsic::ppc_altivec_lvewx:
17240 VT = MVT::i32;
17241 break;
17242 case Intrinsic::ppc_vsx_lxvd2x:
17243 case Intrinsic::ppc_vsx_lxvd2x_be:
17244 VT = MVT::v2f64;
17245 break;
17246 default:
17247 VT = MVT::v4i32;
17248 break;
17249 }
17250
17252 Info.memVT = VT;
17253 Info.ptrVal = I.getArgOperand(0);
17254 Info.offset = -VT.getStoreSize()+1;
17255 Info.size = 2*VT.getStoreSize()-1;
17256 Info.align = Align(1);
17258 return true;
17259 }
17260 case Intrinsic::ppc_altivec_stvx:
17261 case Intrinsic::ppc_altivec_stvxl:
17262 case Intrinsic::ppc_altivec_stvebx:
17263 case Intrinsic::ppc_altivec_stvehx:
17264 case Intrinsic::ppc_altivec_stvewx:
17265 case Intrinsic::ppc_vsx_stxvd2x:
17266 case Intrinsic::ppc_vsx_stxvw4x:
17267 case Intrinsic::ppc_vsx_stxvd2x_be:
17268 case Intrinsic::ppc_vsx_stxvw4x_be:
17269 case Intrinsic::ppc_vsx_stxvl:
17270 case Intrinsic::ppc_vsx_stxvll: {
17271 EVT VT;
17272 switch (Intrinsic) {
17273 case Intrinsic::ppc_altivec_stvebx:
17274 VT = MVT::i8;
17275 break;
17276 case Intrinsic::ppc_altivec_stvehx:
17277 VT = MVT::i16;
17278 break;
17279 case Intrinsic::ppc_altivec_stvewx:
17280 VT = MVT::i32;
17281 break;
17282 case Intrinsic::ppc_vsx_stxvd2x:
17283 case Intrinsic::ppc_vsx_stxvd2x_be:
17284 VT = MVT::v2f64;
17285 break;
17286 default:
17287 VT = MVT::v4i32;
17288 break;
17289 }
17290
17292 Info.memVT = VT;
17293 Info.ptrVal = I.getArgOperand(1);
17294 Info.offset = -VT.getStoreSize()+1;
17295 Info.size = 2*VT.getStoreSize()-1;
17296 Info.align = Align(1);
17298 return true;
17299 }
17300 case Intrinsic::ppc_stdcx:
17301 case Intrinsic::ppc_stwcx:
17302 case Intrinsic::ppc_sthcx:
17303 case Intrinsic::ppc_stbcx: {
17304 EVT VT;
17305 auto Alignment = Align(8);
17306 switch (Intrinsic) {
17307 case Intrinsic::ppc_stdcx:
17308 VT = MVT::i64;
17309 break;
17310 case Intrinsic::ppc_stwcx:
17311 VT = MVT::i32;
17312 Alignment = Align(4);
17313 break;
17314 case Intrinsic::ppc_sthcx:
17315 VT = MVT::i16;
17316 Alignment = Align(2);
17317 break;
17318 case Intrinsic::ppc_stbcx:
17319 VT = MVT::i8;
17320 Alignment = Align(1);
17321 break;
17322 }
17324 Info.memVT = VT;
17325 Info.ptrVal = I.getArgOperand(0);
17326 Info.offset = 0;
17327 Info.align = Alignment;
17329 return true;
17330 }
17331 default:
17332 break;
17333 }
17334
17335 return false;
17336}
17337
17338/// It returns EVT::Other if the type should be determined using generic
17339/// target-independent logic.
17341 const MemOp &Op, const AttributeList &FuncAttributes) const {
17342 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17343 // We should use Altivec/VSX loads and stores when available. For unaligned
17344 // addresses, unaligned VSX loads are only fast starting with the P8.
17345 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17346 if (Op.isMemset() && Subtarget.hasVSX()) {
17347 uint64_t TailSize = Op.size() % 16;
17348 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17349 // element if vector element type matches tail store. For tail size
17350 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17351 if (TailSize > 2 && TailSize <= 4) {
17352 return MVT::v8i16;
17353 }
17354 return MVT::v4i32;
17355 }
17356 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17357 return MVT::v4i32;
17358 }
17359 }
17360
17361 if (Subtarget.isPPC64()) {
17362 return MVT::i64;
17363 }
17364
17365 return MVT::i32;
17366}
17367
17368/// Returns true if it is beneficial to convert a load of a constant
17369/// to just the constant itself.
17371 Type *Ty) const {
17372 assert(Ty->isIntegerTy());
17373
17374 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17375 return !(BitSize == 0 || BitSize > 64);
17376}
17377
17379 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17380 return false;
17381 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17382 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17383 return NumBits1 == 64 && NumBits2 == 32;
17384}
17385
17387 if (!VT1.isInteger() || !VT2.isInteger())
17388 return false;
17389 unsigned NumBits1 = VT1.getSizeInBits();
17390 unsigned NumBits2 = VT2.getSizeInBits();
17391 return NumBits1 == 64 && NumBits2 == 32;
17392}
17393
17395 // Generally speaking, zexts are not free, but they are free when they can be
17396 // folded with other operations.
17397 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17398 EVT MemVT = LD->getMemoryVT();
17399 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17400 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17401 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17402 LD->getExtensionType() == ISD::ZEXTLOAD))
17403 return true;
17404 }
17405
17406 // FIXME: Add other cases...
17407 // - 32-bit shifts with a zext to i64
17408 // - zext after ctlz, bswap, etc.
17409 // - zext after and by a constant mask
17410
17411 return TargetLowering::isZExtFree(Val, VT2);
17412}
17413
17414bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17415 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17416 "invalid fpext types");
17417 // Extending to float128 is not free.
17418 if (DestVT == MVT::f128)
17419 return false;
17420 return true;
17421}
17422
17424 return isInt<16>(Imm) || isUInt<16>(Imm);
17425}
17426
17428 return isInt<16>(Imm) || isUInt<16>(Imm);
17429}
17430
17433 unsigned *Fast) const {
17435 return false;
17436
17437 // PowerPC supports unaligned memory access for simple non-vector types.
17438 // Although accessing unaligned addresses is not as efficient as accessing
17439 // aligned addresses, it is generally more efficient than manual expansion,
17440 // and generally only traps for software emulation when crossing page
17441 // boundaries.
17442
17443 if (!VT.isSimple())
17444 return false;
17445
17446 if (VT.isFloatingPoint() && !VT.isVector() &&
17447 !Subtarget.allowsUnalignedFPAccess())
17448 return false;
17449
17450 if (VT.getSimpleVT().isVector()) {
17451 if (Subtarget.hasVSX()) {
17452 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17453 VT != MVT::v4f32 && VT != MVT::v4i32)
17454 return false;
17455 } else {
17456 return false;
17457 }
17458 }
17459
17460 if (VT == MVT::ppcf128)
17461 return false;
17462
17463 if (Fast)
17464 *Fast = 1;
17465
17466 return true;
17467}
17468
17470 SDValue C) const {
17471 // Check integral scalar types.
17472 if (!VT.isScalarInteger())
17473 return false;
17474 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17475 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17476 return false;
17477 // This transformation will generate >= 2 operations. But the following
17478 // cases will generate <= 2 instructions during ISEL. So exclude them.
17479 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17480 // HW instruction, ie. MULLI
17481 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17482 // instruction is needed than case 1, ie. MULLI and RLDICR
17483 int64_t Imm = ConstNode->getSExtValue();
17484 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17485 Imm >>= Shift;
17486 if (isInt<16>(Imm))
17487 return false;
17488 uint64_t UImm = static_cast<uint64_t>(Imm);
17489 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17490 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17491 return true;
17492 }
17493 return false;
17494}
17495
17497 EVT VT) const {
17500}
17501
17503 Type *Ty) const {
17504 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17505 return false;
17506 switch (Ty->getScalarType()->getTypeID()) {
17507 case Type::FloatTyID:
17508 case Type::DoubleTyID:
17509 return true;
17510 case Type::FP128TyID:
17511 return Subtarget.hasP9Vector();
17512 default:
17513 return false;
17514 }
17515}
17516
17517// FIXME: add more patterns which are not profitable to hoist.
17519 if (!I->hasOneUse())
17520 return true;
17521
17522 Instruction *User = I->user_back();
17523 assert(User && "A single use instruction with no uses.");
17524
17525 switch (I->getOpcode()) {
17526 case Instruction::FMul: {
17527 // Don't break FMA, PowerPC prefers FMA.
17528 if (User->getOpcode() != Instruction::FSub &&
17529 User->getOpcode() != Instruction::FAdd)
17530 return true;
17531
17533 const Function *F = I->getFunction();
17534 const DataLayout &DL = F->getParent()->getDataLayout();
17535 Type *Ty = User->getOperand(0)->getType();
17536
17537 return !(
17540 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17541 }
17542 case Instruction::Load: {
17543 // Don't break "store (load float*)" pattern, this pattern will be combined
17544 // to "store (load int32)" in later InstCombine pass. See function
17545 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17546 // cycles than loading a 32 bit integer.
17547 LoadInst *LI = cast<LoadInst>(I);
17548 // For the loads that combineLoadToOperationType does nothing, like
17549 // ordered load, it should be profitable to hoist them.
17550 // For swifterror load, it can only be used for pointer to pointer type, so
17551 // later type check should get rid of this case.
17552 if (!LI->isUnordered())
17553 return true;
17554
17555 if (User->getOpcode() != Instruction::Store)
17556 return true;
17557
17558 if (I->getType()->getTypeID() != Type::FloatTyID)
17559 return true;
17560
17561 return false;
17562 }
17563 default:
17564 return true;
17565 }
17566 return true;
17567}
17568
17569const MCPhysReg *
17571 // LR is a callee-save register, but we must treat it as clobbered by any call
17572 // site. Hence we include LR in the scratch registers, which are in turn added
17573 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17574 // to CTR, which is used by any indirect call.
17575 static const MCPhysReg ScratchRegs[] = {
17576 PPC::X12, PPC::LR8, PPC::CTR8, 0
17577 };
17578
17579 return ScratchRegs;
17580}
17581
17583 const Constant *PersonalityFn) const {
17584 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17585}
17586
17588 const Constant *PersonalityFn) const {
17589 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17590}
17591
17592bool
17594 EVT VT , unsigned DefinedValues) const {
17595 if (VT == MVT::v2i64)
17596 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17597
17598 if (Subtarget.hasVSX())
17599 return true;
17600
17602}
17603
17605 if (DisableILPPref || Subtarget.enableMachineScheduler())
17607
17608 return Sched::ILP;
17609}
17610
17611// Create a fast isel object.
17612FastISel *
17614 const TargetLibraryInfo *LibInfo) const {
17615 return PPC::createFastISel(FuncInfo, LibInfo);
17616}
17617
17618// 'Inverted' means the FMA opcode after negating one multiplicand.
17619// For example, (fma -a b c) = (fnmsub a b c)
17620static unsigned invertFMAOpcode(unsigned Opc) {
17621 switch (Opc) {
17622 default:
17623 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17624 case ISD::FMA:
17625 return PPCISD::FNMSUB;
17626 case PPCISD::FNMSUB:
17627 return ISD::FMA;
17628 }
17629}
17630
17632 bool LegalOps, bool OptForSize,
17634 unsigned Depth) const {
17636 return SDValue();
17637
17638 unsigned Opc = Op.getOpcode();
17639 EVT VT = Op.getValueType();
17640 SDNodeFlags Flags = Op.getNode()->getFlags();
17641
17642 switch (Opc) {
17643 case PPCISD::FNMSUB:
17644 if (!Op.hasOneUse() || !isTypeLegal(VT))
17645 break;
17646
17648 SDValue N0 = Op.getOperand(0);
17649 SDValue N1 = Op.getOperand(1);
17650 SDValue N2 = Op.getOperand(2);
17651 SDLoc Loc(Op);
17652
17654 SDValue NegN2 =
17655 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17656
17657 if (!NegN2)
17658 return SDValue();
17659
17660 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17661 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17662 // These transformations may change sign of zeroes. For example,
17663 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17664 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17665 // Try and choose the cheaper one to negate.
17667 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17668 N0Cost, Depth + 1);
17669
17671 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17672 N1Cost, Depth + 1);
17673
17674 if (NegN0 && N0Cost <= N1Cost) {
17675 Cost = std::min(N0Cost, N2Cost);
17676 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17677 } else if (NegN1) {
17678 Cost = std::min(N1Cost, N2Cost);
17679 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17680 }
17681 }
17682
17683 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17684 if (isOperationLegal(ISD::FMA, VT)) {
17685 Cost = N2Cost;
17686 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17687 }
17688
17689 break;
17690 }
17691
17692 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17693 Cost, Depth);
17694}
17695
17696// Override to enable LOAD_STACK_GUARD lowering on Linux.
17698 if (!Subtarget.isTargetLinux())
17700 return true;
17701}
17702
17703// Override to disable global variable loading on Linux and insert AIX canary
17704// word declaration.
17706 if (Subtarget.isAIXABI()) {
17707 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17708 PointerType::getUnqual(M.getContext()));
17709 return;
17710 }
17711 if (!Subtarget.isTargetLinux())
17713}
17714
17716 if (Subtarget.isAIXABI())
17717 return M.getGlobalVariable(AIXSSPCanaryWordName);
17719}
17720
17722 bool ForCodeSize) const {
17723 if (!VT.isSimple() || !Subtarget.hasVSX())
17724 return false;
17725
17726 switch(VT.getSimpleVT().SimpleTy) {
17727 default:
17728 // For FP types that are currently not supported by PPC backend, return
17729 // false. Examples: f16, f80.
17730 return false;
17731 case MVT::f32:
17732 case MVT::f64: {
17733 if (Subtarget.hasPrefixInstrs()) {
17734 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17735 return true;
17736 }
17737 bool IsExact;
17738 APSInt IntResult(16, false);
17739 // The rounding mode doesn't really matter because we only care about floats
17740 // that can be converted to integers exactly.
17741 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17742 // For exact values in the range [-16, 15] we can materialize the float.
17743 if (IsExact && IntResult <= 15 && IntResult >= -16)
17744 return true;
17745 return Imm.isZero();
17746 }
17747 case MVT::ppcf128:
17748 return Imm.isPosZero();
17749 }
17750}
17751
17752// For vector shift operation op, fold
17753// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17755 SelectionDAG &DAG) {
17756 SDValue N0 = N->getOperand(0);
17757 SDValue N1 = N->getOperand(1);
17758 EVT VT = N0.getValueType();
17759 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17760 unsigned Opcode = N->getOpcode();
17761 unsigned TargetOpcode;
17762
17763 switch (Opcode) {
17764 default:
17765 llvm_unreachable("Unexpected shift operation");
17766 case ISD::SHL:
17767 TargetOpcode = PPCISD::SHL;
17768 break;
17769 case ISD::SRL:
17770 TargetOpcode = PPCISD::SRL;
17771 break;
17772 case ISD::SRA:
17773 TargetOpcode = PPCISD::SRA;
17774 break;
17775 }
17776
17777 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17778 N1->getOpcode() == ISD::AND)
17779 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17780 if (Mask->getZExtValue() == OpSizeInBits - 1)
17781 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17782
17783 return SDValue();
17784}
17785
17786SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17787 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17788 return Value;
17789
17790 SDValue N0 = N->getOperand(0);
17791 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17792 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17793 N0.getOpcode() != ISD::SIGN_EXTEND ||
17794 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17795 N->getValueType(0) != MVT::i64)
17796 return SDValue();
17797
17798 // We can't save an operation here if the value is already extended, and
17799 // the existing shift is easier to combine.
17800 SDValue ExtsSrc = N0.getOperand(0);
17801 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17802 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17803 return SDValue();
17804
17805 SDLoc DL(N0);
17806 SDValue ShiftBy = SDValue(CN1, 0);
17807 // We want the shift amount to be i32 on the extswli, but the shift could
17808 // have an i64.
17809 if (ShiftBy.getValueType() == MVT::i64)
17810 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17811
17812 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17813 ShiftBy);
17814}
17815
17816SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17817 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17818 return Value;
17819
17820 return SDValue();
17821}
17822
17823SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17824 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17825 return Value;
17826
17827 return SDValue();
17828}
17829
17830// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17831// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17832// When C is zero, the equation (addi Z, -C) can be simplified to Z
17833// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17835 const PPCSubtarget &Subtarget) {
17836 if (!Subtarget.isPPC64())
17837 return SDValue();
17838
17839 SDValue LHS = N->getOperand(0);
17840 SDValue RHS = N->getOperand(1);
17841
17842 auto isZextOfCompareWithConstant = [](SDValue Op) {
17843 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17844 Op.getValueType() != MVT::i64)
17845 return false;
17846
17847 SDValue Cmp = Op.getOperand(0);
17848 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17849 Cmp.getOperand(0).getValueType() != MVT::i64)
17850 return false;
17851
17852 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17853 int64_t NegConstant = 0 - Constant->getSExtValue();
17854 // Due to the limitations of the addi instruction,
17855 // -C is required to be [-32768, 32767].
17856 return isInt<16>(NegConstant);
17857 }
17858
17859 return false;
17860 };
17861
17862 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17863 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17864
17865 // If there is a pattern, canonicalize a zext operand to the RHS.
17866 if (LHSHasPattern && !RHSHasPattern)
17867 std::swap(LHS, RHS);
17868 else if (!LHSHasPattern && !RHSHasPattern)
17869 return SDValue();
17870
17871 SDLoc DL(N);
17872 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17873 SDValue Cmp = RHS.getOperand(0);
17874 SDValue Z = Cmp.getOperand(0);
17875 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17876 int64_t NegConstant = 0 - Constant->getSExtValue();
17877
17878 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17879 default: break;
17880 case ISD::SETNE: {
17881 // when C == 0
17882 // --> addze X, (addic Z, -1).carry
17883 // /
17884 // add X, (zext(setne Z, C))--
17885 // \ when -32768 <= -C <= 32767 && C != 0
17886 // --> addze X, (addic (addi Z, -C), -1).carry
17887 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17888 DAG.getConstant(NegConstant, DL, MVT::i64));
17889 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17890 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17891 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17892 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17893 SDValue(Addc.getNode(), 1));
17894 }
17895 case ISD::SETEQ: {
17896 // when C == 0
17897 // --> addze X, (subfic Z, 0).carry
17898 // /
17899 // add X, (zext(sete Z, C))--
17900 // \ when -32768 <= -C <= 32767 && C != 0
17901 // --> addze X, (subfic (addi Z, -C), 0).carry
17902 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17903 DAG.getConstant(NegConstant, DL, MVT::i64));
17904 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17905 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17906 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17907 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17908 SDValue(Subc.getNode(), 1));
17909 }
17910 }
17911
17912 return SDValue();
17913}
17914
17915// Transform
17916// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17917// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17918// In this case both C1 and C2 must be known constants.
17919// C1+C2 must fit into a 34 bit signed integer.
17921 const PPCSubtarget &Subtarget) {
17922 if (!Subtarget.isUsingPCRelativeCalls())
17923 return SDValue();
17924
17925 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17926 // If we find that node try to cast the Global Address and the Constant.
17927 SDValue LHS = N->getOperand(0);
17928 SDValue RHS = N->getOperand(1);
17929
17930 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17931 std::swap(LHS, RHS);
17932
17933 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17934 return SDValue();
17935
17936 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17937 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17938 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17939
17940 // Check that both casts succeeded.
17941 if (!GSDN || !ConstNode)
17942 return SDValue();
17943
17944 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17945 SDLoc DL(GSDN);
17946
17947 // The signed int offset needs to fit in 34 bits.
17948 if (!isInt<34>(NewOffset))
17949 return SDValue();
17950
17951 // The new global address is a copy of the old global address except
17952 // that it has the updated Offset.
17953 SDValue GA =
17954 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17955 NewOffset, GSDN->getTargetFlags());
17956 SDValue MatPCRel =
17957 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17958 return MatPCRel;
17959}
17960
17961SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17962 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17963 return Value;
17964
17965 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17966 return Value;
17967
17968 return SDValue();
17969}
17970
17971// Detect TRUNCATE operations on bitcasts of float128 values.
17972// What we are looking for here is the situtation where we extract a subset
17973// of bits from a 128 bit float.
17974// This can be of two forms:
17975// 1) BITCAST of f128 feeding TRUNCATE
17976// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17977// The reason this is required is because we do not have a legal i128 type
17978// and so we want to prevent having to store the f128 and then reload part
17979// of it.
17980SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17981 DAGCombinerInfo &DCI) const {
17982 // If we are using CRBits then try that first.
17983 if (Subtarget.useCRBits()) {
17984 // Check if CRBits did anything and return that if it did.
17985 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17986 return CRTruncValue;
17987 }
17988
17989 SDLoc dl(N);
17990 SDValue Op0 = N->getOperand(0);
17991
17992 // Looking for a truncate of i128 to i64.
17993 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17994 return SDValue();
17995
17996 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17997
17998 // SRL feeding TRUNCATE.
17999 if (Op0.getOpcode() == ISD::SRL) {
18000 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18001 // The right shift has to be by 64 bits.
18002 if (!ConstNode || ConstNode->getZExtValue() != 64)
18003 return SDValue();
18004
18005 // Switch the element number to extract.
18006 EltToExtract = EltToExtract ? 0 : 1;
18007 // Update Op0 past the SRL.
18008 Op0 = Op0.getOperand(0);
18009 }
18010
18011 // BITCAST feeding a TRUNCATE possibly via SRL.
18012 if (Op0.getOpcode() == ISD::BITCAST &&
18013 Op0.getValueType() == MVT::i128 &&
18014 Op0.getOperand(0).getValueType() == MVT::f128) {
18015 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18016 return DCI.DAG.getNode(
18017 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18018 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18019 }
18020 return SDValue();
18021}
18022
18023SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18024 SelectionDAG &DAG = DCI.DAG;
18025
18026 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18027 if (!ConstOpOrElement)
18028 return SDValue();
18029
18030 // An imul is usually smaller than the alternative sequence for legal type.
18032 isOperationLegal(ISD::MUL, N->getValueType(0)))
18033 return SDValue();
18034
18035 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18036 switch (this->Subtarget.getCPUDirective()) {
18037 default:
18038 // TODO: enhance the condition for subtarget before pwr8
18039 return false;
18040 case PPC::DIR_PWR8:
18041 // type mul add shl
18042 // scalar 4 1 1
18043 // vector 7 2 2
18044 return true;
18045 case PPC::DIR_PWR9:
18046 case PPC::DIR_PWR10:
18048 // type mul add shl
18049 // scalar 5 2 2
18050 // vector 7 2 2
18051
18052 // The cycle RATIO of related operations are showed as a table above.
18053 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18054 // scalar and vector type. For 2 instrs patterns, add/sub + shl
18055 // are 4, it is always profitable; but for 3 instrs patterns
18056 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18057 // So we should only do it for vector type.
18058 return IsAddOne && IsNeg ? VT.isVector() : true;
18059 }
18060 };
18061
18062 EVT VT = N->getValueType(0);
18063 SDLoc DL(N);
18064
18065 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18066 bool IsNeg = MulAmt.isNegative();
18067 APInt MulAmtAbs = MulAmt.abs();
18068
18069 if ((MulAmtAbs - 1).isPowerOf2()) {
18070 // (mul x, 2^N + 1) => (add (shl x, N), x)
18071 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18072
18073 if (!IsProfitable(IsNeg, true, VT))
18074 return SDValue();
18075
18076 SDValue Op0 = N->getOperand(0);
18077 SDValue Op1 =
18078 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18079 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18080 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18081
18082 if (!IsNeg)
18083 return Res;
18084
18085 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18086 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18087 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18088 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18089
18090 if (!IsProfitable(IsNeg, false, VT))
18091 return SDValue();
18092
18093 SDValue Op0 = N->getOperand(0);
18094 SDValue Op1 =
18095 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18096 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18097
18098 if (!IsNeg)
18099 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18100 else
18101 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18102
18103 } else {
18104 return SDValue();
18105 }
18106}
18107
18108// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18109// in combiner since we need to check SD flags and other subtarget features.
18110SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18111 DAGCombinerInfo &DCI) const {
18112 SDValue N0 = N->getOperand(0);
18113 SDValue N1 = N->getOperand(1);
18114 SDValue N2 = N->getOperand(2);
18115 SDNodeFlags Flags = N->getFlags();
18116 EVT VT = N->getValueType(0);
18117 SelectionDAG &DAG = DCI.DAG;
18119 unsigned Opc = N->getOpcode();
18120 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18121 bool LegalOps = !DCI.isBeforeLegalizeOps();
18122 SDLoc Loc(N);
18123
18124 if (!isOperationLegal(ISD::FMA, VT))
18125 return SDValue();
18126
18127 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18128 // since (fnmsub a b c)=-0 while c-ab=+0.
18129 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18130 return SDValue();
18131
18132 // (fma (fneg a) b c) => (fnmsub a b c)
18133 // (fnmsub (fneg a) b c) => (fma a b c)
18134 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18135 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18136
18137 // (fma a (fneg b) c) => (fnmsub a b c)
18138 // (fnmsub a (fneg b) c) => (fma a b c)
18139 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18140 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18141
18142 return SDValue();
18143}
18144
18145bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18146 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18147 if (!Subtarget.is64BitELFABI())
18148 return false;
18149
18150 // If not a tail call then no need to proceed.
18151 if (!CI->isTailCall())
18152 return false;
18153
18154 // If sibling calls have been disabled and tail-calls aren't guaranteed
18155 // there is no reason to duplicate.
18156 auto &TM = getTargetMachine();
18157 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18158 return false;
18159
18160 // Can't tail call a function called indirectly, or if it has variadic args.
18161 const Function *Callee = CI->getCalledFunction();
18162 if (!Callee || Callee->isVarArg())
18163 return false;
18164
18165 // Make sure the callee and caller calling conventions are eligible for tco.
18166 const Function *Caller = CI->getParent()->getParent();
18167 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18168 CI->getCallingConv()))
18169 return false;
18170
18171 // If the function is local then we have a good chance at tail-calling it
18172 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18173}
18174
18175bool PPCTargetLowering::
18176isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18177 const Value *Mask = AndI.getOperand(1);
18178 // If the mask is suitable for andi. or andis. we should sink the and.
18179 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18180 // Can't handle constants wider than 64-bits.
18181 if (CI->getBitWidth() > 64)
18182 return false;
18183 int64_t ConstVal = CI->getZExtValue();
18184 return isUInt<16>(ConstVal) ||
18185 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18186 }
18187
18188 // For non-constant masks, we can always use the record-form and.
18189 return true;
18190}
18191
18192/// getAddrModeForFlags - Based on the set of address flags, select the most
18193/// optimal instruction format to match by.
18194PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18195 // This is not a node we should be handling here.
18196 if (Flags == PPC::MOF_None)
18197 return PPC::AM_None;
18198 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18199 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18200 if ((Flags & FlagSet) == FlagSet)
18201 return PPC::AM_DForm;
18202 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18203 if ((Flags & FlagSet) == FlagSet)
18204 return PPC::AM_DSForm;
18205 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18206 if ((Flags & FlagSet) == FlagSet)
18207 return PPC::AM_DQForm;
18208 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18209 if ((Flags & FlagSet) == FlagSet)
18210 return PPC::AM_PrefixDForm;
18211 // If no other forms are selected, return an X-Form as it is the most
18212 // general addressing mode.
18213 return PPC::AM_XForm;
18214}
18215
18216/// Set alignment flags based on whether or not the Frame Index is aligned.
18217/// Utilized when computing flags for address computation when selecting
18218/// load and store instructions.
18219static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18220 SelectionDAG &DAG) {
18221 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18222 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18223 if (!FI)
18224 return;
18226 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18227 // If this is (add $FI, $S16Imm), the alignment flags are already set
18228 // based on the immediate. We just need to clear the alignment flags
18229 // if the FI alignment is weaker.
18230 if ((FrameIndexAlign % 4) != 0)
18231 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18232 if ((FrameIndexAlign % 16) != 0)
18233 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18234 // If the address is a plain FrameIndex, set alignment flags based on
18235 // FI alignment.
18236 if (!IsAdd) {
18237 if ((FrameIndexAlign % 4) == 0)
18238 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18239 if ((FrameIndexAlign % 16) == 0)
18240 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18241 }
18242}
18243
18244/// Given a node, compute flags that are used for address computation when
18245/// selecting load and store instructions. The flags computed are stored in
18246/// FlagSet. This function takes into account whether the node is a constant,
18247/// an ADD, OR, or a constant, and computes the address flags accordingly.
18248static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18249 SelectionDAG &DAG) {
18250 // Set the alignment flags for the node depending on if the node is
18251 // 4-byte or 16-byte aligned.
18252 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18253 if ((Imm & 0x3) == 0)
18254 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18255 if ((Imm & 0xf) == 0)
18256 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18257 };
18258
18259 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18260 // All 32-bit constants can be computed as LIS + Disp.
18261 const APInt &ConstImm = CN->getAPIntValue();
18262 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18263 FlagSet |= PPC::MOF_AddrIsSImm32;
18264 SetAlignFlagsForImm(ConstImm.getZExtValue());
18265 setAlignFlagsForFI(N, FlagSet, DAG);
18266 }
18267 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18268 FlagSet |= PPC::MOF_RPlusSImm34;
18269 else // Let constant materialization handle large constants.
18270 FlagSet |= PPC::MOF_NotAddNorCst;
18271 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18272 // This address can be represented as an addition of:
18273 // - Register + Imm16 (possibly a multiple of 4/16)
18274 // - Register + Imm34
18275 // - Register + PPCISD::Lo
18276 // - Register + Register
18277 // In any case, we won't have to match this as Base + Zero.
18278 SDValue RHS = N.getOperand(1);
18279 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18280 const APInt &ConstImm = CN->getAPIntValue();
18281 if (ConstImm.isSignedIntN(16)) {
18282 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18283 SetAlignFlagsForImm(ConstImm.getZExtValue());
18284 setAlignFlagsForFI(N, FlagSet, DAG);
18285 }
18286 if (ConstImm.isSignedIntN(34))
18287 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18288 else
18289 FlagSet |= PPC::MOF_RPlusR; // Register.
18290 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18291 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18292 else
18293 FlagSet |= PPC::MOF_RPlusR;
18294 } else { // The address computation is not a constant or an addition.
18295 setAlignFlagsForFI(N, FlagSet, DAG);
18296 FlagSet |= PPC::MOF_NotAddNorCst;
18297 }
18298}
18299
18300static bool isPCRelNode(SDValue N) {
18301 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18302 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18303 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18304 isValidPCRelNode<JumpTableSDNode>(N) ||
18305 isValidPCRelNode<BlockAddressSDNode>(N));
18306}
18307
18308/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18309/// the address flags of the load/store instruction that is to be matched.
18310unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18311 SelectionDAG &DAG) const {
18312 unsigned FlagSet = PPC::MOF_None;
18313
18314 // Compute subtarget flags.
18315 if (!Subtarget.hasP9Vector())
18316 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18317 else {
18318 FlagSet |= PPC::MOF_SubtargetP9;
18319 if (Subtarget.hasPrefixInstrs())
18320 FlagSet |= PPC::MOF_SubtargetP10;
18321 }
18322 if (Subtarget.hasSPE())
18323 FlagSet |= PPC::MOF_SubtargetSPE;
18324
18325 // Check if we have a PCRel node and return early.
18326 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18327 return FlagSet;
18328
18329 // If the node is the paired load/store intrinsics, compute flags for
18330 // address computation and return early.
18331 unsigned ParentOp = Parent->getOpcode();
18332 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18333 (ParentOp == ISD::INTRINSIC_VOID))) {
18334 unsigned ID = Parent->getConstantOperandVal(1);
18335 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18336 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18337 ? Parent->getOperand(2)
18338 : Parent->getOperand(3);
18339 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18340 FlagSet |= PPC::MOF_Vector;
18341 return FlagSet;
18342 }
18343 }
18344
18345 // Mark this as something we don't want to handle here if it is atomic
18346 // or pre-increment instruction.
18347 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18348 if (LSB->isIndexed())
18349 return PPC::MOF_None;
18350
18351 // Compute in-memory type flags. This is based on if there are scalars,
18352 // floats or vectors.
18353 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18354 assert(MN && "Parent should be a MemSDNode!");
18355 EVT MemVT = MN->getMemoryVT();
18356 unsigned Size = MemVT.getSizeInBits();
18357 if (MemVT.isScalarInteger()) {
18358 assert(Size <= 128 &&
18359 "Not expecting scalar integers larger than 16 bytes!");
18360 if (Size < 32)
18361 FlagSet |= PPC::MOF_SubWordInt;
18362 else if (Size == 32)
18363 FlagSet |= PPC::MOF_WordInt;
18364 else
18365 FlagSet |= PPC::MOF_DoubleWordInt;
18366 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18367 if (Size == 128)
18368 FlagSet |= PPC::MOF_Vector;
18369 else if (Size == 256) {
18370 assert(Subtarget.pairedVectorMemops() &&
18371 "256-bit vectors are only available when paired vector memops is "
18372 "enabled!");
18373 FlagSet |= PPC::MOF_Vector;
18374 } else
18375 llvm_unreachable("Not expecting illegal vectors!");
18376 } else { // Floating point type: can be scalar, f128 or vector types.
18377 if (Size == 32 || Size == 64)
18378 FlagSet |= PPC::MOF_ScalarFloat;
18379 else if (MemVT == MVT::f128 || MemVT.isVector())
18380 FlagSet |= PPC::MOF_Vector;
18381 else
18382 llvm_unreachable("Not expecting illegal scalar floats!");
18383 }
18384
18385 // Compute flags for address computation.
18386 computeFlagsForAddressComputation(N, FlagSet, DAG);
18387
18388 // Compute type extension flags.
18389 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18390 switch (LN->getExtensionType()) {
18391 case ISD::SEXTLOAD:
18392 FlagSet |= PPC::MOF_SExt;
18393 break;
18394 case ISD::EXTLOAD:
18395 case ISD::ZEXTLOAD:
18396 FlagSet |= PPC::MOF_ZExt;
18397 break;
18398 case ISD::NON_EXTLOAD:
18399 FlagSet |= PPC::MOF_NoExt;
18400 break;
18401 }
18402 } else
18403 FlagSet |= PPC::MOF_NoExt;
18404
18405 // For integers, no extension is the same as zero extension.
18406 // We set the extension mode to zero extension so we don't have
18407 // to add separate entries in AddrModesMap for loads and stores.
18408 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18409 FlagSet |= PPC::MOF_ZExt;
18410 FlagSet &= ~PPC::MOF_NoExt;
18411 }
18412
18413 // If we don't have prefixed instructions, 34-bit constants should be
18414 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18415 bool IsNonP1034BitConst =
18417 FlagSet) == PPC::MOF_RPlusSImm34;
18418 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18419 IsNonP1034BitConst)
18420 FlagSet |= PPC::MOF_NotAddNorCst;
18421
18422 return FlagSet;
18423}
18424
18425/// SelectForceXFormMode - Given the specified address, force it to be
18426/// represented as an indexed [r+r] operation (an XForm instruction).
18428 SDValue &Base,
18429 SelectionDAG &DAG) const {
18430
18432 int16_t ForceXFormImm = 0;
18433 if (provablyDisjointOr(DAG, N) &&
18434 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18435 Disp = N.getOperand(0);
18436 Base = N.getOperand(1);
18437 return Mode;
18438 }
18439
18440 // If the address is the result of an add, we will utilize the fact that the
18441 // address calculation includes an implicit add. However, we can reduce
18442 // register pressure if we do not materialize a constant just for use as the
18443 // index register. We only get rid of the add if it is not an add of a
18444 // value and a 16-bit signed constant and both have a single use.
18445 if (N.getOpcode() == ISD::ADD &&
18446 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18447 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18448 Disp = N.getOperand(0);
18449 Base = N.getOperand(1);
18450 return Mode;
18451 }
18452
18453 // Otherwise, use R0 as the base register.
18454 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18455 N.getValueType());
18456 Base = N;
18457
18458 return Mode;
18459}
18460
18462 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18463 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18464 EVT ValVT = Val.getValueType();
18465 // If we are splitting a scalar integer into f64 parts (i.e. so they
18466 // can be placed into VFRC registers), we need to zero extend and
18467 // bitcast the values. This will ensure the value is placed into a
18468 // VSR using direct moves or stack operations as needed.
18469 if (PartVT == MVT::f64 &&
18470 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18471 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18472 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18473 Parts[0] = Val;
18474 return true;
18475 }
18476 return false;
18477}
18478
18479SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18480 SelectionDAG &DAG) const {
18481 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18483 EVT RetVT = Op.getValueType();
18484 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18485 SDValue Callee =
18486 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18487 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18490 for (const SDValue &N : Op->op_values()) {
18491 EVT ArgVT = N.getValueType();
18492 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18493 Entry.Node = N;
18494 Entry.Ty = ArgTy;
18495 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18496 Entry.IsZExt = !Entry.IsSExt;
18497 Args.push_back(Entry);
18498 }
18499
18500 SDValue InChain = DAG.getEntryNode();
18501 SDValue TCChain = InChain;
18502 const Function &F = DAG.getMachineFunction().getFunction();
18503 bool isTailCall =
18504 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18505 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18506 if (isTailCall)
18507 InChain = TCChain;
18508 CLI.setDebugLoc(SDLoc(Op))
18509 .setChain(InChain)
18510 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18511 .setTailCall(isTailCall)
18512 .setSExtResult(SignExtend)
18513 .setZExtResult(!SignExtend)
18515 return TLI.LowerCallTo(CLI).first;
18516}
18517
18518SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18519 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18520 SelectionDAG &DAG) const {
18521 if (Op.getValueType() == MVT::f32)
18522 return lowerToLibCall(LibCallFloatName, Op, DAG);
18523
18524 if (Op.getValueType() == MVT::f64)
18525 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18526
18527 return SDValue();
18528}
18529
18530bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18531 SDNodeFlags Flags = Op.getNode()->getFlags();
18532 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18533 Flags.hasNoNaNs() && Flags.hasNoInfs();
18534}
18535
18536bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18537 return Op.getNode()->getFlags().hasApproximateFuncs();
18538}
18539
18540bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18542}
18543
18544SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18545 const char *LibCallFloatName,
18546 const char *LibCallDoubleNameFinite,
18547 const char *LibCallFloatNameFinite,
18548 SDValue Op,
18549 SelectionDAG &DAG) const {
18550 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18551 return SDValue();
18552
18553 if (!isLowringToMASSFiniteSafe(Op))
18554 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18555 DAG);
18556
18557 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18558 LibCallDoubleNameFinite, Op, DAG);
18559}
18560
18561SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18562 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18563 "__xl_powf_finite", Op, DAG);
18564}
18565
18566SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18567 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18568 "__xl_sinf_finite", Op, DAG);
18569}
18570
18571SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18572 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18573 "__xl_cosf_finite", Op, DAG);
18574}
18575
18576SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18577 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18578 "__xl_logf_finite", Op, DAG);
18579}
18580
18581SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18582 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18583 "__xl_log10f_finite", Op, DAG);
18584}
18585
18586SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18587 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18588 "__xl_expf_finite", Op, DAG);
18589}
18590
18591// If we happen to match to an aligned D-Form, check if the Frame Index is
18592// adequately aligned. If it is not, reset the mode to match to X-Form.
18593static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18594 PPC::AddrMode &Mode) {
18595 if (!isa<FrameIndexSDNode>(N))
18596 return;
18597 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18598 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18599 Mode = PPC::AM_XForm;
18600}
18601
18602/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18603/// compute the address flags of the node, get the optimal address mode based
18604/// on the flags, and set the Base and Disp based on the address mode.
18606 SDValue N, SDValue &Disp,
18607 SDValue &Base,
18608 SelectionDAG &DAG,
18609 MaybeAlign Align) const {
18610 SDLoc DL(Parent);
18611
18612 // Compute the address flags.
18613 unsigned Flags = computeMOFlags(Parent, N, DAG);
18614
18615 // Get the optimal address mode based on the Flags.
18616 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18617
18618 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18619 // Select an X-Form load if it is not.
18620 setXFormForUnalignedFI(N, Flags, Mode);
18621
18622 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18623 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18624 assert(Subtarget.isUsingPCRelativeCalls() &&
18625 "Must be using PC-Relative calls when a valid PC-Relative node is "
18626 "present!");
18627 Mode = PPC::AM_PCRel;
18628 }
18629
18630 // Set Base and Disp accordingly depending on the address mode.
18631 switch (Mode) {
18632 case PPC::AM_DForm:
18633 case PPC::AM_DSForm:
18634 case PPC::AM_DQForm: {
18635 // This is a register plus a 16-bit immediate. The base will be the
18636 // register and the displacement will be the immediate unless it
18637 // isn't sufficiently aligned.
18638 if (Flags & PPC::MOF_RPlusSImm16) {
18639 SDValue Op0 = N.getOperand(0);
18640 SDValue Op1 = N.getOperand(1);
18641 int16_t Imm = Op1->getAsZExtVal();
18642 if (!Align || isAligned(*Align, Imm)) {
18643 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18644 Base = Op0;
18645 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18646 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18647 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18648 }
18649 break;
18650 }
18651 }
18652 // This is a register plus the @lo relocation. The base is the register
18653 // and the displacement is the global address.
18654 else if (Flags & PPC::MOF_RPlusLo) {
18655 Disp = N.getOperand(1).getOperand(0); // The global address.
18660 Base = N.getOperand(0);
18661 break;
18662 }
18663 // This is a constant address at most 32 bits. The base will be
18664 // zero or load-immediate-shifted and the displacement will be
18665 // the low 16 bits of the address.
18666 else if (Flags & PPC::MOF_AddrIsSImm32) {
18667 auto *CN = cast<ConstantSDNode>(N);
18668 EVT CNType = CN->getValueType(0);
18669 uint64_t CNImm = CN->getZExtValue();
18670 // If this address fits entirely in a 16-bit sext immediate field, codegen
18671 // this as "d, 0".
18672 int16_t Imm;
18673 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18674 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18675 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18676 CNType);
18677 break;
18678 }
18679 // Handle 32-bit sext immediate with LIS + Addr mode.
18680 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18681 (!Align || isAligned(*Align, CNImm))) {
18682 int32_t Addr = (int32_t)CNImm;
18683 // Otherwise, break this down into LIS + Disp.
18684 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18685 Base =
18686 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18687 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18688 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18689 break;
18690 }
18691 }
18692 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18693 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18694 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18695 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18696 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18697 } else
18698 Base = N;
18699 break;
18700 }
18701 case PPC::AM_PrefixDForm: {
18702 int64_t Imm34 = 0;
18703 unsigned Opcode = N.getOpcode();
18704 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18705 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18706 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18707 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18708 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18709 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18710 else
18711 Base = N.getOperand(0);
18712 } else if (isIntS34Immediate(N, Imm34)) {
18713 // The address is a 34-bit signed immediate.
18714 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18715 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18716 }
18717 break;
18718 }
18719 case PPC::AM_PCRel: {
18720 // When selecting PC-Relative instructions, "Base" is not utilized as
18721 // we select the address as [PC+imm].
18722 Disp = N;
18723 break;
18724 }
18725 case PPC::AM_None:
18726 break;
18727 default: { // By default, X-Form is always available to be selected.
18728 // When a frame index is not aligned, we also match by XForm.
18729 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18730 Base = FI ? N : N.getOperand(1);
18731 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18732 N.getValueType())
18733 : N.getOperand(0);
18734 break;
18735 }
18736 }
18737 return Mode;
18738}
18739
18741 bool Return,
18742 bool IsVarArg) const {
18743 switch (CC) {
18744 case CallingConv::Cold:
18745 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18746 default:
18747 return CC_PPC64_ELF;
18748 }
18749}
18750
18752 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18753}
18754
18757 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18758 if (shouldInlineQuadwordAtomics() && Size == 128)
18760
18761 switch (AI->getOperation()) {
18765 default:
18767 }
18768
18769 llvm_unreachable("unreachable atomicrmw operation");
18770}
18771
18774 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18775 if (shouldInlineQuadwordAtomics() && Size == 128)
18778}
18779
18780static Intrinsic::ID
18782 switch (BinOp) {
18783 default:
18784 llvm_unreachable("Unexpected AtomicRMW BinOp");
18786 return Intrinsic::ppc_atomicrmw_xchg_i128;
18787 case AtomicRMWInst::Add:
18788 return Intrinsic::ppc_atomicrmw_add_i128;
18789 case AtomicRMWInst::Sub:
18790 return Intrinsic::ppc_atomicrmw_sub_i128;
18791 case AtomicRMWInst::And:
18792 return Intrinsic::ppc_atomicrmw_and_i128;
18793 case AtomicRMWInst::Or:
18794 return Intrinsic::ppc_atomicrmw_or_i128;
18795 case AtomicRMWInst::Xor:
18796 return Intrinsic::ppc_atomicrmw_xor_i128;
18798 return Intrinsic::ppc_atomicrmw_nand_i128;
18799 }
18800}
18801
18803 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18804 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18805 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18806 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18807 Type *ValTy = Incr->getType();
18808 assert(ValTy->getPrimitiveSizeInBits() == 128);
18811 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18812 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18813 Value *IncrHi =
18814 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18815 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18816 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18817 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18818 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18819 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18820 return Builder.CreateOr(
18821 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18822}
18823
18825 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18826 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18827 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18828 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18829 Type *ValTy = CmpVal->getType();
18830 assert(ValTy->getPrimitiveSizeInBits() == 128);
18831 Function *IntCmpXchg =
18832 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18833 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18834 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18835 Value *CmpHi =
18836 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18837 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18838 Value *NewHi =
18839 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18840 emitLeadingFence(Builder, CI, Ord);
18841 Value *LoHi =
18842 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18843 emitTrailingFence(Builder, CI, Ord);
18844 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18845 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18846 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18847 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18848 return Builder.CreateOr(
18849 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18850}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
pre isel intrinsic Pre ISel Intrinsic Lowering
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
bool isDenormal() const
Definition: APFloat.h:1296
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1671
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
BinOp getOperation() const
Definition: Instructions.h:845
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:166
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:889
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:2239
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1800
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1662
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1668
unsigned arg_size() const
Definition: InstrTypes.h:1685
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:685
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:715
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
BasicBlockListType::const_iterator const_iterator
Definition: Function.h:69
arg_iterator arg_begin()
Definition: Function.h:818
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
size_t arg_size() const
Definition: Function.h:851
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:207
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:215
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:174
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
const BasicBlock * getParent() const
Definition: Instruction.h:152
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:184
bool isUnordered() const
Definition: Instructions.h:274
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
bool isAIXFuncTLSModelOptInitDone() const
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:259
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:253
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:271
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:277
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:289
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:295
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:265
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition: SmallPtrSet.h:94
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1206
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:940
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1054
@ TargetJumpTable
Definition: ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:425
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:426
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1255
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1104
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1101
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1147
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1327
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1212
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1569
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:447
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)