LLVM 19.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr :
66 {Attribute::Alignment, Attribute::Dereferenceable,
67 Attribute::DereferenceableOrNull, Attribute::NoAlias,
68 Attribute::NonNull, Attribute::NoUndef, Attribute::Range})
69 CallerAttrs.removeAttribute(Attr);
70
71 if (CallerAttrs.hasAttributes())
72 return false;
73
74 // It's not safe to eliminate the sign / zero extension of the return value.
75 if (CallerAttrs.contains(Attribute::ZExt) ||
76 CallerAttrs.contains(Attribute::SExt))
77 return false;
78
79 // Check if the only use is a function return node.
80 return isUsedByReturnOnly(Node, Chain);
81}
82
84 const uint32_t *CallerPreservedMask,
85 const SmallVectorImpl<CCValAssign> &ArgLocs,
86 const SmallVectorImpl<SDValue> &OutVals) const {
87 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
88 const CCValAssign &ArgLoc = ArgLocs[I];
89 if (!ArgLoc.isRegLoc())
90 continue;
91 MCRegister Reg = ArgLoc.getLocReg();
92 // Only look at callee saved registers.
93 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
94 continue;
95 // Check that we pass the value used for the caller.
96 // (We look for a CopyFromReg reading a virtual register that is used
97 // for the function live-in value of register Reg)
98 SDValue Value = OutVals[I];
99 if (Value->getOpcode() == ISD::AssertZext)
100 Value = Value.getOperand(0);
101 if (Value->getOpcode() != ISD::CopyFromReg)
102 return false;
103 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
104 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
105 return false;
106 }
107 return true;
108}
109
110/// Set CallLoweringInfo attribute flags based on a call instruction
111/// and called function attributes.
113 unsigned ArgIdx) {
114 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
115 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
116 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
117 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
118 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
119 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
120 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
121 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
122 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
123 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
124 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
125 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
126 Alignment = Call->getParamStackAlign(ArgIdx);
127 IndirectType = nullptr;
129 "multiple ABI attributes?");
130 if (IsByVal) {
131 IndirectType = Call->getParamByValType(ArgIdx);
132 if (!Alignment)
133 Alignment = Call->getParamAlign(ArgIdx);
134 }
135 if (IsPreallocated)
136 IndirectType = Call->getParamPreallocatedType(ArgIdx);
137 if (IsInAlloca)
138 IndirectType = Call->getParamInAllocaType(ArgIdx);
139 if (IsSRet)
140 IndirectType = Call->getParamStructRetType(ArgIdx);
141}
142
143/// Generate a libcall taking the given operands as arguments and returning a
144/// result of type RetVT.
145std::pair<SDValue, SDValue>
148 MakeLibCallOptions CallOptions,
149 const SDLoc &dl,
150 SDValue InChain) const {
151 if (!InChain)
152 InChain = DAG.getEntryNode();
153
155 Args.reserve(Ops.size());
156
158 for (unsigned i = 0; i < Ops.size(); ++i) {
159 SDValue NewOp = Ops[i];
160 Entry.Node = NewOp;
161 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
162 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
163 CallOptions.IsSExt);
164 Entry.IsZExt = !Entry.IsSExt;
165
166 if (CallOptions.IsSoften &&
168 Entry.IsSExt = Entry.IsZExt = false;
169 }
170 Args.push_back(Entry);
171 }
172
173 if (LC == RTLIB::UNKNOWN_LIBCALL)
174 report_fatal_error("Unsupported library call operation!");
177
178 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
180 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
181 bool zeroExtend = !signExtend;
182
183 if (CallOptions.IsSoften &&
185 signExtend = zeroExtend = false;
186 }
187
188 CLI.setDebugLoc(dl)
189 .setChain(InChain)
190 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
191 .setNoReturn(CallOptions.DoesNotReturn)
194 .setSExtResult(signExtend)
195 .setZExtResult(zeroExtend);
196 return LowerCallTo(CLI);
197}
198
200 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
201 unsigned SrcAS, const AttributeList &FuncAttributes) const {
202 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
203 Op.getSrcAlign() < Op.getDstAlign())
204 return false;
205
206 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
207
208 if (VT == MVT::Other) {
209 // Use the largest integer type whose alignment constraints are satisfied.
210 // We only need to check DstAlign here as SrcAlign is always greater or
211 // equal to DstAlign (or zero).
212 VT = MVT::i64;
213 if (Op.isFixedDstAlign())
214 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
215 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
217 assert(VT.isInteger());
218
219 // Find the largest legal integer type.
220 MVT LVT = MVT::i64;
221 while (!isTypeLegal(LVT))
222 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
223 assert(LVT.isInteger());
224
225 // If the type we've chosen is larger than the largest legal integer type
226 // then use that instead.
227 if (VT.bitsGT(LVT))
228 VT = LVT;
229 }
230
231 unsigned NumMemOps = 0;
232 uint64_t Size = Op.size();
233 while (Size) {
234 unsigned VTSize = VT.getSizeInBits() / 8;
235 while (VTSize > Size) {
236 // For now, only use non-vector load / store's for the left-over pieces.
237 EVT NewVT = VT;
238 unsigned NewVTSize;
239
240 bool Found = false;
241 if (VT.isVector() || VT.isFloatingPoint()) {
242 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
245 Found = true;
246 else if (NewVT == MVT::i64 &&
248 isSafeMemOpType(MVT::f64)) {
249 // i64 is usually not legal on 32-bit targets, but f64 may be.
250 NewVT = MVT::f64;
251 Found = true;
252 }
253 }
254
255 if (!Found) {
256 do {
257 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
258 if (NewVT == MVT::i8)
259 break;
260 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
261 }
262 NewVTSize = NewVT.getSizeInBits() / 8;
263
264 // If the new VT cannot cover all of the remaining bits, then consider
265 // issuing a (or a pair of) unaligned and overlapping load / store.
266 unsigned Fast;
267 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
269 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
271 Fast)
272 VTSize = Size;
273 else {
274 VT = NewVT;
275 VTSize = NewVTSize;
276 }
277 }
278
279 if (++NumMemOps > Limit)
280 return false;
281
282 MemOps.push_back(VT);
283 Size -= VTSize;
284 }
285
286 return true;
287}
288
289/// Soften the operands of a comparison. This code is shared among BR_CC,
290/// SELECT_CC, and SETCC handlers.
292 SDValue &NewLHS, SDValue &NewRHS,
293 ISD::CondCode &CCCode,
294 const SDLoc &dl, const SDValue OldLHS,
295 const SDValue OldRHS) const {
296 SDValue Chain;
297 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
298 OldRHS, Chain);
299}
300
302 SDValue &NewLHS, SDValue &NewRHS,
303 ISD::CondCode &CCCode,
304 const SDLoc &dl, const SDValue OldLHS,
305 const SDValue OldRHS,
306 SDValue &Chain,
307 bool IsSignaling) const {
308 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
309 // not supporting it. We can update this code when libgcc provides such
310 // functions.
311
312 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
313 && "Unsupported setcc type!");
314
315 // Expand into one or more soft-fp libcall(s).
316 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
317 bool ShouldInvertCC = false;
318 switch (CCCode) {
319 case ISD::SETEQ:
320 case ISD::SETOEQ:
321 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
322 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
323 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
324 break;
325 case ISD::SETNE:
326 case ISD::SETUNE:
327 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
328 (VT == MVT::f64) ? RTLIB::UNE_F64 :
329 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
330 break;
331 case ISD::SETGE:
332 case ISD::SETOGE:
333 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
334 (VT == MVT::f64) ? RTLIB::OGE_F64 :
335 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
336 break;
337 case ISD::SETLT:
338 case ISD::SETOLT:
339 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
340 (VT == MVT::f64) ? RTLIB::OLT_F64 :
341 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
342 break;
343 case ISD::SETLE:
344 case ISD::SETOLE:
345 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
346 (VT == MVT::f64) ? RTLIB::OLE_F64 :
347 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
348 break;
349 case ISD::SETGT:
350 case ISD::SETOGT:
351 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
352 (VT == MVT::f64) ? RTLIB::OGT_F64 :
353 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
354 break;
355 case ISD::SETO:
356 ShouldInvertCC = true;
357 [[fallthrough]];
358 case ISD::SETUO:
359 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
360 (VT == MVT::f64) ? RTLIB::UO_F64 :
361 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
362 break;
363 case ISD::SETONE:
364 // SETONE = O && UNE
365 ShouldInvertCC = true;
366 [[fallthrough]];
367 case ISD::SETUEQ:
368 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
369 (VT == MVT::f64) ? RTLIB::UO_F64 :
370 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
371 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
372 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
373 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
374 break;
375 default:
376 // Invert CC for unordered comparisons
377 ShouldInvertCC = true;
378 switch (CCCode) {
379 case ISD::SETULT:
380 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
381 (VT == MVT::f64) ? RTLIB::OGE_F64 :
382 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
383 break;
384 case ISD::SETULE:
385 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
386 (VT == MVT::f64) ? RTLIB::OGT_F64 :
387 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
388 break;
389 case ISD::SETUGT:
390 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
391 (VT == MVT::f64) ? RTLIB::OLE_F64 :
392 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
393 break;
394 case ISD::SETUGE:
395 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
396 (VT == MVT::f64) ? RTLIB::OLT_F64 :
397 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
398 break;
399 default: llvm_unreachable("Do not know how to soften this setcc!");
400 }
401 }
402
403 // Use the target specific return value for comparison lib calls.
405 SDValue Ops[2] = {NewLHS, NewRHS};
407 EVT OpsVT[2] = { OldLHS.getValueType(),
408 OldRHS.getValueType() };
409 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
410 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
411 NewLHS = Call.first;
412 NewRHS = DAG.getConstant(0, dl, RetVT);
413
414 CCCode = getCmpLibcallCC(LC1);
415 if (ShouldInvertCC) {
416 assert(RetVT.isInteger());
417 CCCode = getSetCCInverse(CCCode, RetVT);
418 }
419
420 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
421 // Update Chain.
422 Chain = Call.second;
423 } else {
424 EVT SetCCVT =
425 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
426 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
427 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
428 CCCode = getCmpLibcallCC(LC2);
429 if (ShouldInvertCC)
430 CCCode = getSetCCInverse(CCCode, RetVT);
431 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
432 if (Chain)
433 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
434 Call2.second);
435 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
436 Tmp.getValueType(), Tmp, NewLHS);
437 NewRHS = SDValue();
438 }
439}
440
441/// Return the entry encoding for a jump table in the current function. The
442/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
444 // In non-pic modes, just use the address of a block.
445 if (!isPositionIndependent())
447
448 // In PIC mode, if the target supports a GPRel32 directive, use it.
449 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
451
452 // Otherwise, use a label difference.
454}
455
457 SelectionDAG &DAG) const {
458 // If our PIC model is GP relative, use the global offset table as the base.
459 unsigned JTEncoding = getJumpTableEncoding();
460
464
465 return Table;
466}
467
468/// This returns the relocation base for the given PIC jumptable, the same as
469/// getPICJumpTableRelocBase, but as an MCExpr.
470const MCExpr *
472 unsigned JTI,MCContext &Ctx) const{
473 // The normal PIC reloc base is the label at the start of the jump table.
474 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
475}
476
478 SDValue Addr, int JTI,
479 SelectionDAG &DAG) const {
480 SDValue Chain = Value;
481 // Jump table debug info is only needed if CodeView is enabled.
483 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
484 }
485 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
486}
487
488bool
490 const TargetMachine &TM = getTargetMachine();
491 const GlobalValue *GV = GA->getGlobal();
492
493 // If the address is not even local to this DSO we will have to load it from
494 // a got and then add the offset.
495 if (!TM.shouldAssumeDSOLocal(GV))
496 return false;
497
498 // If the code is position independent we will have to add a base register.
499 if (isPositionIndependent())
500 return false;
501
502 // Otherwise we can do it.
503 return true;
504}
505
506//===----------------------------------------------------------------------===//
507// Optimization Methods
508//===----------------------------------------------------------------------===//
509
510/// If the specified instruction has a constant integer operand and there are
511/// bits set in that constant that are not demanded, then clear those bits and
512/// return true.
514 const APInt &DemandedBits,
515 const APInt &DemandedElts,
516 TargetLoweringOpt &TLO) const {
517 SDLoc DL(Op);
518 unsigned Opcode = Op.getOpcode();
519
520 // Early-out if we've ended up calling an undemanded node, leave this to
521 // constant folding.
522 if (DemandedBits.isZero() || DemandedElts.isZero())
523 return false;
524
525 // Do target-specific constant optimization.
526 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
527 return TLO.New.getNode();
528
529 // FIXME: ISD::SELECT, ISD::SELECT_CC
530 switch (Opcode) {
531 default:
532 break;
533 case ISD::XOR:
534 case ISD::AND:
535 case ISD::OR: {
536 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
537 if (!Op1C || Op1C->isOpaque())
538 return false;
539
540 // If this is a 'not' op, don't touch it because that's a canonical form.
541 const APInt &C = Op1C->getAPIntValue();
542 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
543 return false;
544
545 if (!C.isSubsetOf(DemandedBits)) {
546 EVT VT = Op.getValueType();
547 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
548 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
549 Op->getFlags());
550 return TLO.CombineTo(Op, NewOp);
551 }
552
553 break;
554 }
555 }
556
557 return false;
558}
559
561 const APInt &DemandedBits,
562 TargetLoweringOpt &TLO) const {
563 EVT VT = Op.getValueType();
564 APInt DemandedElts = VT.isVector()
566 : APInt(1, 1);
567 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
568}
569
570/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
571/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
572/// but it could be generalized for targets with other types of implicit
573/// widening casts.
575 const APInt &DemandedBits,
576 TargetLoweringOpt &TLO) const {
577 assert(Op.getNumOperands() == 2 &&
578 "ShrinkDemandedOp only supports binary operators!");
579 assert(Op.getNode()->getNumValues() == 1 &&
580 "ShrinkDemandedOp only supports nodes with one result!");
581
582 EVT VT = Op.getValueType();
583 SelectionDAG &DAG = TLO.DAG;
584 SDLoc dl(Op);
585
586 // Early return, as this function cannot handle vector types.
587 if (VT.isVector())
588 return false;
589
590 // Don't do this if the node has another user, which may require the
591 // full value.
592 if (!Op.getNode()->hasOneUse())
593 return false;
594
595 // Search for the smallest integer type with free casts to and from
596 // Op's type. For expedience, just check power-of-2 integer types.
597 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
598 unsigned DemandedSize = DemandedBits.getActiveBits();
599 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
600 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
601 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
602 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
603 // We found a type with free casts.
604 SDValue X = DAG.getNode(
605 Op.getOpcode(), dl, SmallVT,
606 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
607 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
608 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
609 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
610 return TLO.CombineTo(Op, Z);
611 }
612 }
613 return false;
614}
615
617 DAGCombinerInfo &DCI) const {
618 SelectionDAG &DAG = DCI.DAG;
619 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
620 !DCI.isBeforeLegalizeOps());
621 KnownBits Known;
622
623 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
624 if (Simplified) {
625 DCI.AddToWorklist(Op.getNode());
627 }
628 return Simplified;
629}
630
632 const APInt &DemandedElts,
633 DAGCombinerInfo &DCI) const {
634 SelectionDAG &DAG = DCI.DAG;
635 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
636 !DCI.isBeforeLegalizeOps());
637 KnownBits Known;
638
639 bool Simplified =
640 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
641 if (Simplified) {
642 DCI.AddToWorklist(Op.getNode());
644 }
645 return Simplified;
646}
647
649 KnownBits &Known,
651 unsigned Depth,
652 bool AssumeSingleUse) const {
653 EVT VT = Op.getValueType();
654
655 // Since the number of lanes in a scalable vector is unknown at compile time,
656 // we track one bit which is implicitly broadcast to all lanes. This means
657 // that all lanes in a scalable vector are considered demanded.
658 APInt DemandedElts = VT.isFixedLengthVector()
660 : APInt(1, 1);
661 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
662 AssumeSingleUse);
663}
664
665// TODO: Under what circumstances can we create nodes? Constant folding?
667 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
668 SelectionDAG &DAG, unsigned Depth) const {
669 EVT VT = Op.getValueType();
670
671 // Limit search depth.
673 return SDValue();
674
675 // Ignore UNDEFs.
676 if (Op.isUndef())
677 return SDValue();
678
679 // Not demanding any bits/elts from Op.
680 if (DemandedBits == 0 || DemandedElts == 0)
681 return DAG.getUNDEF(VT);
682
683 bool IsLE = DAG.getDataLayout().isLittleEndian();
684 unsigned NumElts = DemandedElts.getBitWidth();
685 unsigned BitWidth = DemandedBits.getBitWidth();
686 KnownBits LHSKnown, RHSKnown;
687 switch (Op.getOpcode()) {
688 case ISD::BITCAST: {
689 if (VT.isScalableVector())
690 return SDValue();
691
692 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
693 EVT SrcVT = Src.getValueType();
694 EVT DstVT = Op.getValueType();
695 if (SrcVT == DstVT)
696 return Src;
697
698 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
699 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
700 if (NumSrcEltBits == NumDstEltBits)
701 if (SDValue V = SimplifyMultipleUseDemandedBits(
702 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
703 return DAG.getBitcast(DstVT, V);
704
705 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
706 unsigned Scale = NumDstEltBits / NumSrcEltBits;
707 unsigned NumSrcElts = SrcVT.getVectorNumElements();
708 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
709 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
710 for (unsigned i = 0; i != Scale; ++i) {
711 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
712 unsigned BitOffset = EltOffset * NumSrcEltBits;
713 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
714 if (!Sub.isZero()) {
715 DemandedSrcBits |= Sub;
716 for (unsigned j = 0; j != NumElts; ++j)
717 if (DemandedElts[j])
718 DemandedSrcElts.setBit((j * Scale) + i);
719 }
720 }
721
722 if (SDValue V = SimplifyMultipleUseDemandedBits(
723 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
724 return DAG.getBitcast(DstVT, V);
725 }
726
727 // TODO - bigendian once we have test coverage.
728 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
729 unsigned Scale = NumSrcEltBits / NumDstEltBits;
730 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
731 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
732 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
733 for (unsigned i = 0; i != NumElts; ++i)
734 if (DemandedElts[i]) {
735 unsigned Offset = (i % Scale) * NumDstEltBits;
736 DemandedSrcBits.insertBits(DemandedBits, Offset);
737 DemandedSrcElts.setBit(i / Scale);
738 }
739
740 if (SDValue V = SimplifyMultipleUseDemandedBits(
741 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
742 return DAG.getBitcast(DstVT, V);
743 }
744
745 break;
746 }
747 case ISD::FREEZE: {
748 SDValue N0 = Op.getOperand(0);
749 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
750 /*PoisonOnly=*/false))
751 return N0;
752 break;
753 }
754 case ISD::AND: {
755 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
756 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
757
758 // If all of the demanded bits are known 1 on one side, return the other.
759 // These bits cannot contribute to the result of the 'and' in this
760 // context.
761 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
762 return Op.getOperand(0);
763 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
764 return Op.getOperand(1);
765 break;
766 }
767 case ISD::OR: {
768 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
769 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
770
771 // If all of the demanded bits are known zero on one side, return the
772 // other. These bits cannot contribute to the result of the 'or' in this
773 // context.
774 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
775 return Op.getOperand(0);
776 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
777 return Op.getOperand(1);
778 break;
779 }
780 case ISD::XOR: {
781 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
782 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
783
784 // If all of the demanded bits are known zero on one side, return the
785 // other.
786 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
787 return Op.getOperand(0);
788 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
789 return Op.getOperand(1);
790 break;
791 }
792 case ISD::SHL: {
793 // If we are only demanding sign bits then we can use the shift source
794 // directly.
795 if (const APInt *MaxSA =
796 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
797 SDValue Op0 = Op.getOperand(0);
798 unsigned ShAmt = MaxSA->getZExtValue();
799 unsigned NumSignBits =
800 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
801 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
802 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
803 return Op0;
804 }
805 break;
806 }
807 case ISD::SETCC: {
808 SDValue Op0 = Op.getOperand(0);
809 SDValue Op1 = Op.getOperand(1);
810 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
811 // If (1) we only need the sign-bit, (2) the setcc operands are the same
812 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
813 // -1, we may be able to bypass the setcc.
814 if (DemandedBits.isSignMask() &&
818 // If we're testing X < 0, then this compare isn't needed - just use X!
819 // FIXME: We're limiting to integer types here, but this should also work
820 // if we don't care about FP signed-zero. The use of SETLT with FP means
821 // that we don't care about NaNs.
822 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
824 return Op0;
825 }
826 break;
827 }
829 // If none of the extended bits are demanded, eliminate the sextinreg.
830 SDValue Op0 = Op.getOperand(0);
831 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
832 unsigned ExBits = ExVT.getScalarSizeInBits();
833 if (DemandedBits.getActiveBits() <= ExBits &&
835 return Op0;
836 // If the input is already sign extended, just drop the extension.
837 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
838 if (NumSignBits >= (BitWidth - ExBits + 1))
839 return Op0;
840 break;
841 }
845 if (VT.isScalableVector())
846 return SDValue();
847
848 // If we only want the lowest element and none of extended bits, then we can
849 // return the bitcasted source vector.
850 SDValue Src = Op.getOperand(0);
851 EVT SrcVT = Src.getValueType();
852 EVT DstVT = Op.getValueType();
853 if (IsLE && DemandedElts == 1 &&
854 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
855 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
856 return DAG.getBitcast(DstVT, Src);
857 }
858 break;
859 }
861 if (VT.isScalableVector())
862 return SDValue();
863
864 // If we don't demand the inserted element, return the base vector.
865 SDValue Vec = Op.getOperand(0);
866 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
867 EVT VecVT = Vec.getValueType();
868 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
869 !DemandedElts[CIdx->getZExtValue()])
870 return Vec;
871 break;
872 }
874 if (VT.isScalableVector())
875 return SDValue();
876
877 SDValue Vec = Op.getOperand(0);
878 SDValue Sub = Op.getOperand(1);
879 uint64_t Idx = Op.getConstantOperandVal(2);
880 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
881 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
882 // If we don't demand the inserted subvector, return the base vector.
883 if (DemandedSubElts == 0)
884 return Vec;
885 break;
886 }
887 case ISD::VECTOR_SHUFFLE: {
889 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
890
891 // If all the demanded elts are from one operand and are inline,
892 // then we can use the operand directly.
893 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
894 for (unsigned i = 0; i != NumElts; ++i) {
895 int M = ShuffleMask[i];
896 if (M < 0 || !DemandedElts[i])
897 continue;
898 AllUndef = false;
899 IdentityLHS &= (M == (int)i);
900 IdentityRHS &= ((M - NumElts) == i);
901 }
902
903 if (AllUndef)
904 return DAG.getUNDEF(Op.getValueType());
905 if (IdentityLHS)
906 return Op.getOperand(0);
907 if (IdentityRHS)
908 return Op.getOperand(1);
909 break;
910 }
911 default:
912 // TODO: Probably okay to remove after audit; here to reduce change size
913 // in initial enablement patch for scalable vectors
914 if (VT.isScalableVector())
915 return SDValue();
916
917 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
918 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
919 Op, DemandedBits, DemandedElts, DAG, Depth))
920 return V;
921 break;
922 }
923 return SDValue();
924}
925
928 unsigned Depth) const {
929 EVT VT = Op.getValueType();
930 // Since the number of lanes in a scalable vector is unknown at compile time,
931 // we track one bit which is implicitly broadcast to all lanes. This means
932 // that all lanes in a scalable vector are considered demanded.
933 APInt DemandedElts = VT.isFixedLengthVector()
935 : APInt(1, 1);
936 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
937 Depth);
938}
939
941 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
942 unsigned Depth) const {
943 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
944 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
945 Depth);
946}
947
948// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
949// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
951 const TargetLowering &TLI,
952 const APInt &DemandedBits,
953 const APInt &DemandedElts,
954 unsigned Depth) {
955 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
956 "SRL or SRA node is required here!");
957 // Is the right shift using an immediate value of 1?
958 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
959 if (!N1C || !N1C->isOne())
960 return SDValue();
961
962 // We are looking for an avgfloor
963 // add(ext, ext)
964 // or one of these as a avgceil
965 // add(add(ext, ext), 1)
966 // add(add(ext, 1), ext)
967 // add(ext, add(ext, 1))
968 SDValue Add = Op.getOperand(0);
969 if (Add.getOpcode() != ISD::ADD)
970 return SDValue();
971
972 SDValue ExtOpA = Add.getOperand(0);
973 SDValue ExtOpB = Add.getOperand(1);
974 SDValue Add2;
975 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
976 ConstantSDNode *ConstOp;
977 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
978 ConstOp->isOne()) {
979 ExtOpA = Op1;
980 ExtOpB = Op3;
981 Add2 = A;
982 return true;
983 }
984 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
985 ConstOp->isOne()) {
986 ExtOpA = Op1;
987 ExtOpB = Op2;
988 Add2 = A;
989 return true;
990 }
991 return false;
992 };
993 bool IsCeil =
994 (ExtOpA.getOpcode() == ISD::ADD &&
995 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
996 (ExtOpB.getOpcode() == ISD::ADD &&
997 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
998
999 // If the shift is signed (sra):
1000 // - Needs >= 2 sign bit for both operands.
1001 // - Needs >= 2 zero bits.
1002 // If the shift is unsigned (srl):
1003 // - Needs >= 1 zero bit for both operands.
1004 // - Needs 1 demanded bit zero and >= 2 sign bits.
1005 unsigned ShiftOpc = Op.getOpcode();
1006 bool IsSigned = false;
1007 unsigned KnownBits;
1008 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1009 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1010 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1011 unsigned NumZeroA =
1012 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1013 unsigned NumZeroB =
1014 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1015 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1016
1017 switch (ShiftOpc) {
1018 default:
1019 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1020 case ISD::SRA: {
1021 if (NumZero >= 2 && NumSigned < NumZero) {
1022 IsSigned = false;
1023 KnownBits = NumZero;
1024 break;
1025 }
1026 if (NumSigned >= 1) {
1027 IsSigned = true;
1028 KnownBits = NumSigned;
1029 break;
1030 }
1031 return SDValue();
1032 }
1033 case ISD::SRL: {
1034 if (NumZero >= 1 && NumSigned < NumZero) {
1035 IsSigned = false;
1036 KnownBits = NumZero;
1037 break;
1038 }
1039 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1040 IsSigned = true;
1041 KnownBits = NumSigned;
1042 break;
1043 }
1044 return SDValue();
1045 }
1046 }
1047
1048 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1049 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1050
1051 // Find the smallest power-2 type that is legal for this vector size and
1052 // operation, given the original type size and the number of known sign/zero
1053 // bits.
1054 EVT VT = Op.getValueType();
1055 unsigned MinWidth =
1056 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1057 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1058 if (VT.isVector())
1059 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1060 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1061 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1062 // larger type size to do the transform.
1063 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1064 return SDValue();
1065 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1066 Add.getOperand(1)) &&
1067 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1068 Add2.getOperand(1))))
1069 NVT = VT;
1070 else
1071 return SDValue();
1072 }
1073
1074 SDLoc DL(Op);
1075 SDValue ResultAVG =
1076 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1077 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1078 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1079}
1080
1081/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1082/// result of Op are ever used downstream. If we can use this information to
1083/// simplify Op, create a new simplified DAG node and return true, returning the
1084/// original and new nodes in Old and New. Otherwise, analyze the expression and
1085/// return a mask of Known bits for the expression (used to simplify the
1086/// caller). The Known bits may only be accurate for those bits in the
1087/// OriginalDemandedBits and OriginalDemandedElts.
1089 SDValue Op, const APInt &OriginalDemandedBits,
1090 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1091 unsigned Depth, bool AssumeSingleUse) const {
1092 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1093 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1094 "Mask size mismatches value type size!");
1095
1096 // Don't know anything.
1097 Known = KnownBits(BitWidth);
1098
1099 EVT VT = Op.getValueType();
1100 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1101 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1102 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1103 "Unexpected vector size");
1104
1105 APInt DemandedBits = OriginalDemandedBits;
1106 APInt DemandedElts = OriginalDemandedElts;
1107 SDLoc dl(Op);
1108
1109 // Undef operand.
1110 if (Op.isUndef())
1111 return false;
1112
1113 // We can't simplify target constants.
1114 if (Op.getOpcode() == ISD::TargetConstant)
1115 return false;
1116
1117 if (Op.getOpcode() == ISD::Constant) {
1118 // We know all of the bits for a constant!
1119 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1120 return false;
1121 }
1122
1123 if (Op.getOpcode() == ISD::ConstantFP) {
1124 // We know all of the bits for a floating point constant!
1126 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1127 return false;
1128 }
1129
1130 // Other users may use these bits.
1131 bool HasMultiUse = false;
1132 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1134 // Limit search depth.
1135 return false;
1136 }
1137 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1139 DemandedElts = APInt::getAllOnes(NumElts);
1140 HasMultiUse = true;
1141 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1142 // Not demanding any bits/elts from Op.
1143 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1144 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1145 // Limit search depth.
1146 return false;
1147 }
1148
1149 KnownBits Known2;
1150 switch (Op.getOpcode()) {
1151 case ISD::SCALAR_TO_VECTOR: {
1152 if (VT.isScalableVector())
1153 return false;
1154 if (!DemandedElts[0])
1155 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1156
1157 KnownBits SrcKnown;
1158 SDValue Src = Op.getOperand(0);
1159 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1160 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1161 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1162 return true;
1163
1164 // Upper elements are undef, so only get the knownbits if we just demand
1165 // the bottom element.
1166 if (DemandedElts == 1)
1167 Known = SrcKnown.anyextOrTrunc(BitWidth);
1168 break;
1169 }
1170 case ISD::BUILD_VECTOR:
1171 // Collect the known bits that are shared by every demanded element.
1172 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1173 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1174 return false; // Don't fall through, will infinitely loop.
1175 case ISD::SPLAT_VECTOR: {
1176 SDValue Scl = Op.getOperand(0);
1177 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1178 KnownBits KnownScl;
1179 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1180 return true;
1181
1182 // Implicitly truncate the bits to match the official semantics of
1183 // SPLAT_VECTOR.
1184 Known = KnownScl.trunc(BitWidth);
1185 break;
1186 }
1187 case ISD::LOAD: {
1188 auto *LD = cast<LoadSDNode>(Op);
1189 if (getTargetConstantFromLoad(LD)) {
1190 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1191 return false; // Don't fall through, will infinitely loop.
1192 }
1193 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1194 // If this is a ZEXTLoad and we are looking at the loaded value.
1195 EVT MemVT = LD->getMemoryVT();
1196 unsigned MemBits = MemVT.getScalarSizeInBits();
1197 Known.Zero.setBitsFrom(MemBits);
1198 return false; // Don't fall through, will infinitely loop.
1199 }
1200 break;
1201 }
1203 if (VT.isScalableVector())
1204 return false;
1205 SDValue Vec = Op.getOperand(0);
1206 SDValue Scl = Op.getOperand(1);
1207 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1208 EVT VecVT = Vec.getValueType();
1209
1210 // If index isn't constant, assume we need all vector elements AND the
1211 // inserted element.
1212 APInt DemandedVecElts(DemandedElts);
1213 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1214 unsigned Idx = CIdx->getZExtValue();
1215 DemandedVecElts.clearBit(Idx);
1216
1217 // Inserted element is not required.
1218 if (!DemandedElts[Idx])
1219 return TLO.CombineTo(Op, Vec);
1220 }
1221
1222 KnownBits KnownScl;
1223 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1224 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1225 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1226 return true;
1227
1228 Known = KnownScl.anyextOrTrunc(BitWidth);
1229
1230 KnownBits KnownVec;
1231 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1232 Depth + 1))
1233 return true;
1234
1235 if (!!DemandedVecElts)
1236 Known = Known.intersectWith(KnownVec);
1237
1238 return false;
1239 }
1240 case ISD::INSERT_SUBVECTOR: {
1241 if (VT.isScalableVector())
1242 return false;
1243 // Demand any elements from the subvector and the remainder from the src its
1244 // inserted into.
1245 SDValue Src = Op.getOperand(0);
1246 SDValue Sub = Op.getOperand(1);
1247 uint64_t Idx = Op.getConstantOperandVal(2);
1248 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1249 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1250 APInt DemandedSrcElts = DemandedElts;
1251 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1252
1253 KnownBits KnownSub, KnownSrc;
1254 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1255 Depth + 1))
1256 return true;
1257 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1258 Depth + 1))
1259 return true;
1260
1261 Known.Zero.setAllBits();
1262 Known.One.setAllBits();
1263 if (!!DemandedSubElts)
1264 Known = Known.intersectWith(KnownSub);
1265 if (!!DemandedSrcElts)
1266 Known = Known.intersectWith(KnownSrc);
1267
1268 // Attempt to avoid multi-use src if we don't need anything from it.
1269 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1270 !DemandedSrcElts.isAllOnes()) {
1271 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1272 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1273 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1274 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1275 if (NewSub || NewSrc) {
1276 NewSub = NewSub ? NewSub : Sub;
1277 NewSrc = NewSrc ? NewSrc : Src;
1278 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1279 Op.getOperand(2));
1280 return TLO.CombineTo(Op, NewOp);
1281 }
1282 }
1283 break;
1284 }
1286 if (VT.isScalableVector())
1287 return false;
1288 // Offset the demanded elts by the subvector index.
1289 SDValue Src = Op.getOperand(0);
1290 if (Src.getValueType().isScalableVector())
1291 break;
1292 uint64_t Idx = Op.getConstantOperandVal(1);
1293 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1294 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1295
1296 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1297 Depth + 1))
1298 return true;
1299
1300 // Attempt to avoid multi-use src if we don't need anything from it.
1301 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1302 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1303 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1304 if (DemandedSrc) {
1305 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1306 Op.getOperand(1));
1307 return TLO.CombineTo(Op, NewOp);
1308 }
1309 }
1310 break;
1311 }
1312 case ISD::CONCAT_VECTORS: {
1313 if (VT.isScalableVector())
1314 return false;
1315 Known.Zero.setAllBits();
1316 Known.One.setAllBits();
1317 EVT SubVT = Op.getOperand(0).getValueType();
1318 unsigned NumSubVecs = Op.getNumOperands();
1319 unsigned NumSubElts = SubVT.getVectorNumElements();
1320 for (unsigned i = 0; i != NumSubVecs; ++i) {
1321 APInt DemandedSubElts =
1322 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1323 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1324 Known2, TLO, Depth + 1))
1325 return true;
1326 // Known bits are shared by every demanded subvector element.
1327 if (!!DemandedSubElts)
1328 Known = Known.intersectWith(Known2);
1329 }
1330 break;
1331 }
1332 case ISD::VECTOR_SHUFFLE: {
1333 assert(!VT.isScalableVector());
1334 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1335
1336 // Collect demanded elements from shuffle operands..
1337 APInt DemandedLHS, DemandedRHS;
1338 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1339 DemandedRHS))
1340 break;
1341
1342 if (!!DemandedLHS || !!DemandedRHS) {
1343 SDValue Op0 = Op.getOperand(0);
1344 SDValue Op1 = Op.getOperand(1);
1345
1346 Known.Zero.setAllBits();
1347 Known.One.setAllBits();
1348 if (!!DemandedLHS) {
1349 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1350 Depth + 1))
1351 return true;
1352 Known = Known.intersectWith(Known2);
1353 }
1354 if (!!DemandedRHS) {
1355 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1356 Depth + 1))
1357 return true;
1358 Known = Known.intersectWith(Known2);
1359 }
1360
1361 // Attempt to avoid multi-use ops if we don't need anything from them.
1362 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1363 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1364 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1365 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1366 if (DemandedOp0 || DemandedOp1) {
1367 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1368 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1369 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1370 return TLO.CombineTo(Op, NewOp);
1371 }
1372 }
1373 break;
1374 }
1375 case ISD::AND: {
1376 SDValue Op0 = Op.getOperand(0);
1377 SDValue Op1 = Op.getOperand(1);
1378
1379 // If the RHS is a constant, check to see if the LHS would be zero without
1380 // using the bits from the RHS. Below, we use knowledge about the RHS to
1381 // simplify the LHS, here we're using information from the LHS to simplify
1382 // the RHS.
1383 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1384 // Do not increment Depth here; that can cause an infinite loop.
1385 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1386 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1387 if ((LHSKnown.Zero & DemandedBits) ==
1388 (~RHSC->getAPIntValue() & DemandedBits))
1389 return TLO.CombineTo(Op, Op0);
1390
1391 // If any of the set bits in the RHS are known zero on the LHS, shrink
1392 // the constant.
1393 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1394 DemandedElts, TLO))
1395 return true;
1396
1397 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1398 // constant, but if this 'and' is only clearing bits that were just set by
1399 // the xor, then this 'and' can be eliminated by shrinking the mask of
1400 // the xor. For example, for a 32-bit X:
1401 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1402 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1403 LHSKnown.One == ~RHSC->getAPIntValue()) {
1404 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1405 return TLO.CombineTo(Op, Xor);
1406 }
1407 }
1408
1409 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1410 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1411 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1412 (Op0.getOperand(0).isUndef() ||
1414 Op0->hasOneUse()) {
1415 unsigned NumSubElts =
1417 unsigned SubIdx = Op0.getConstantOperandVal(2);
1418 APInt DemandedSub =
1419 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1420 KnownBits KnownSubMask =
1421 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1422 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1423 SDValue NewAnd =
1424 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1425 SDValue NewInsert =
1426 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1427 Op0.getOperand(1), Op0.getOperand(2));
1428 return TLO.CombineTo(Op, NewInsert);
1429 }
1430 }
1431
1432 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1433 Depth + 1))
1434 return true;
1435 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1436 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1437 Known2, TLO, Depth + 1))
1438 return true;
1439 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1440
1441 // If all of the demanded bits are known one on one side, return the other.
1442 // These bits cannot contribute to the result of the 'and'.
1443 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1444 return TLO.CombineTo(Op, Op0);
1445 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1446 return TLO.CombineTo(Op, Op1);
1447 // If all of the demanded bits in the inputs are known zeros, return zero.
1448 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1449 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1450 // If the RHS is a constant, see if we can simplify it.
1451 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1452 TLO))
1453 return true;
1454 // If the operation can be done in a smaller type, do so.
1455 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1456 return true;
1457
1458 // Attempt to avoid multi-use ops if we don't need anything from them.
1459 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1460 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1461 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1462 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1463 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1464 if (DemandedOp0 || DemandedOp1) {
1465 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1466 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1467 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1468 return TLO.CombineTo(Op, NewOp);
1469 }
1470 }
1471
1472 Known &= Known2;
1473 break;
1474 }
1475 case ISD::OR: {
1476 SDValue Op0 = Op.getOperand(0);
1477 SDValue Op1 = Op.getOperand(1);
1478 SDNodeFlags Flags = Op.getNode()->getFlags();
1479 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1480 Depth + 1)) {
1481 if (Flags.hasDisjoint()) {
1482 Flags.setDisjoint(false);
1483 Op->setFlags(Flags);
1484 }
1485 return true;
1486 }
1487 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1488 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1489 Known2, TLO, Depth + 1)) {
1490 if (Flags.hasDisjoint()) {
1491 Flags.setDisjoint(false);
1492 Op->setFlags(Flags);
1493 }
1494 return true;
1495 }
1496 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1497
1498 // If all of the demanded bits are known zero on one side, return the other.
1499 // These bits cannot contribute to the result of the 'or'.
1500 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1501 return TLO.CombineTo(Op, Op0);
1502 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1503 return TLO.CombineTo(Op, Op1);
1504 // If the RHS is a constant, see if we can simplify it.
1505 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1506 return true;
1507 // If the operation can be done in a smaller type, do so.
1508 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1509 return true;
1510
1511 // Attempt to avoid multi-use ops if we don't need anything from them.
1512 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1513 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1514 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1515 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1516 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1517 if (DemandedOp0 || DemandedOp1) {
1518 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1519 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1520 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1521 return TLO.CombineTo(Op, NewOp);
1522 }
1523 }
1524
1525 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1526 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1527 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1528 Op0->hasOneUse() && Op1->hasOneUse()) {
1529 // Attempt to match all commutations - m_c_Or would've been useful!
1530 for (int I = 0; I != 2; ++I) {
1531 SDValue X = Op.getOperand(I).getOperand(0);
1532 SDValue C1 = Op.getOperand(I).getOperand(1);
1533 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1534 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1535 if (Alt.getOpcode() == ISD::OR) {
1536 for (int J = 0; J != 2; ++J) {
1537 if (X == Alt.getOperand(J)) {
1538 SDValue Y = Alt.getOperand(1 - J);
1539 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1540 {C1, C2})) {
1541 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1542 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1543 return TLO.CombineTo(
1544 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1545 }
1546 }
1547 }
1548 }
1549 }
1550 }
1551
1552 Known |= Known2;
1553 break;
1554 }
1555 case ISD::XOR: {
1556 SDValue Op0 = Op.getOperand(0);
1557 SDValue Op1 = Op.getOperand(1);
1558
1559 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1560 Depth + 1))
1561 return true;
1562 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1563 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1564 Depth + 1))
1565 return true;
1566 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1567
1568 // If all of the demanded bits are known zero on one side, return the other.
1569 // These bits cannot contribute to the result of the 'xor'.
1570 if (DemandedBits.isSubsetOf(Known.Zero))
1571 return TLO.CombineTo(Op, Op0);
1572 if (DemandedBits.isSubsetOf(Known2.Zero))
1573 return TLO.CombineTo(Op, Op1);
1574 // If the operation can be done in a smaller type, do so.
1575 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1576 return true;
1577
1578 // If all of the unknown bits are known to be zero on one side or the other
1579 // turn this into an *inclusive* or.
1580 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1581 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1582 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1583
1584 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1585 if (C) {
1586 // If one side is a constant, and all of the set bits in the constant are
1587 // also known set on the other side, turn this into an AND, as we know
1588 // the bits will be cleared.
1589 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1590 // NB: it is okay if more bits are known than are requested
1591 if (C->getAPIntValue() == Known2.One) {
1592 SDValue ANDC =
1593 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1594 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1595 }
1596
1597 // If the RHS is a constant, see if we can change it. Don't alter a -1
1598 // constant because that's a 'not' op, and that is better for combining
1599 // and codegen.
1600 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1601 // We're flipping all demanded bits. Flip the undemanded bits too.
1602 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1603 return TLO.CombineTo(Op, New);
1604 }
1605
1606 unsigned Op0Opcode = Op0.getOpcode();
1607 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1608 if (ConstantSDNode *ShiftC =
1609 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1610 // Don't crash on an oversized shift. We can not guarantee that a
1611 // bogus shift has been simplified to undef.
1612 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1613 uint64_t ShiftAmt = ShiftC->getZExtValue();
1615 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1616 : Ones.lshr(ShiftAmt);
1617 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1618 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1619 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1620 // If the xor constant is a demanded mask, do a 'not' before the
1621 // shift:
1622 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1623 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1624 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1625 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1626 Op0.getOperand(1)));
1627 }
1628 }
1629 }
1630 }
1631 }
1632
1633 // If we can't turn this into a 'not', try to shrink the constant.
1634 if (!C || !C->isAllOnes())
1635 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1636 return true;
1637
1638 // Attempt to avoid multi-use ops if we don't need anything from them.
1639 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1640 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1641 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1642 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1643 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1644 if (DemandedOp0 || DemandedOp1) {
1645 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1646 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1647 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1648 return TLO.CombineTo(Op, NewOp);
1649 }
1650 }
1651
1652 Known ^= Known2;
1653 break;
1654 }
1655 case ISD::SELECT:
1656 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1657 Known, TLO, Depth + 1))
1658 return true;
1659 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1660 Known2, TLO, Depth + 1))
1661 return true;
1662 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1663 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1664
1665 // If the operands are constants, see if we can simplify them.
1666 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1667 return true;
1668
1669 // Only known if known in both the LHS and RHS.
1670 Known = Known.intersectWith(Known2);
1671 break;
1672 case ISD::VSELECT:
1673 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1674 Known, TLO, Depth + 1))
1675 return true;
1676 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1677 Known2, TLO, Depth + 1))
1678 return true;
1679 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1680 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1681
1682 // Only known if known in both the LHS and RHS.
1683 Known = Known.intersectWith(Known2);
1684 break;
1685 case ISD::SELECT_CC:
1686 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1687 Known, TLO, Depth + 1))
1688 return true;
1689 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1690 Known2, TLO, Depth + 1))
1691 return true;
1692 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1693 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1694
1695 // If the operands are constants, see if we can simplify them.
1696 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1697 return true;
1698
1699 // Only known if known in both the LHS and RHS.
1700 Known = Known.intersectWith(Known2);
1701 break;
1702 case ISD::SETCC: {
1703 SDValue Op0 = Op.getOperand(0);
1704 SDValue Op1 = Op.getOperand(1);
1705 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1706 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1707 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1708 // -1, we may be able to bypass the setcc.
1709 if (DemandedBits.isSignMask() &&
1713 // If we're testing X < 0, then this compare isn't needed - just use X!
1714 // FIXME: We're limiting to integer types here, but this should also work
1715 // if we don't care about FP signed-zero. The use of SETLT with FP means
1716 // that we don't care about NaNs.
1717 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1719 return TLO.CombineTo(Op, Op0);
1720
1721 // TODO: Should we check for other forms of sign-bit comparisons?
1722 // Examples: X <= -1, X >= 0
1723 }
1724 if (getBooleanContents(Op0.getValueType()) ==
1726 BitWidth > 1)
1727 Known.Zero.setBitsFrom(1);
1728 break;
1729 }
1730 case ISD::SHL: {
1731 SDValue Op0 = Op.getOperand(0);
1732 SDValue Op1 = Op.getOperand(1);
1733 EVT ShiftVT = Op1.getValueType();
1734
1735 if (const APInt *SA =
1736 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1737 unsigned ShAmt = SA->getZExtValue();
1738 if (ShAmt == 0)
1739 return TLO.CombineTo(Op, Op0);
1740
1741 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1742 // single shift. We can do this if the bottom bits (which are shifted
1743 // out) are never demanded.
1744 // TODO - support non-uniform vector amounts.
1745 if (Op0.getOpcode() == ISD::SRL) {
1746 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1747 if (const APInt *SA2 =
1748 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1749 unsigned C1 = SA2->getZExtValue();
1750 unsigned Opc = ISD::SHL;
1751 int Diff = ShAmt - C1;
1752 if (Diff < 0) {
1753 Diff = -Diff;
1754 Opc = ISD::SRL;
1755 }
1756 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1757 return TLO.CombineTo(
1758 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1759 }
1760 }
1761 }
1762
1763 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1764 // are not demanded. This will likely allow the anyext to be folded away.
1765 // TODO - support non-uniform vector amounts.
1766 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1767 SDValue InnerOp = Op0.getOperand(0);
1768 EVT InnerVT = InnerOp.getValueType();
1769 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1770 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1771 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1772 SDValue NarrowShl = TLO.DAG.getNode(
1773 ISD::SHL, dl, InnerVT, InnerOp,
1774 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1775 return TLO.CombineTo(
1776 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1777 }
1778
1779 // Repeat the SHL optimization above in cases where an extension
1780 // intervenes: (shl (anyext (shr x, c1)), c2) to
1781 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1782 // aren't demanded (as above) and that the shifted upper c1 bits of
1783 // x aren't demanded.
1784 // TODO - support non-uniform vector amounts.
1785 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1786 InnerOp.hasOneUse()) {
1787 if (const APInt *SA2 =
1788 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1789 unsigned InnerShAmt = SA2->getZExtValue();
1790 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1791 DemandedBits.getActiveBits() <=
1792 (InnerBits - InnerShAmt + ShAmt) &&
1793 DemandedBits.countr_zero() >= ShAmt) {
1794 SDValue NewSA =
1795 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1796 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1797 InnerOp.getOperand(0));
1798 return TLO.CombineTo(
1799 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1800 }
1801 }
1802 }
1803 }
1804
1805 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1806 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1807 Depth + 1)) {
1808 SDNodeFlags Flags = Op.getNode()->getFlags();
1809 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1810 // Disable the nsw and nuw flags. We can no longer guarantee that we
1811 // won't wrap after simplification.
1812 Flags.setNoSignedWrap(false);
1813 Flags.setNoUnsignedWrap(false);
1814 Op->setFlags(Flags);
1815 }
1816 return true;
1817 }
1818 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1819 Known.Zero <<= ShAmt;
1820 Known.One <<= ShAmt;
1821 // low bits known zero.
1822 Known.Zero.setLowBits(ShAmt);
1823
1824 // Attempt to avoid multi-use ops if we don't need anything from them.
1825 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1826 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1827 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1828 if (DemandedOp0) {
1829 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1830 return TLO.CombineTo(Op, NewOp);
1831 }
1832 }
1833
1834 // Try shrinking the operation as long as the shift amount will still be
1835 // in range.
1836 if ((ShAmt < DemandedBits.getActiveBits()) &&
1837 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1838 return true;
1839
1840 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1841 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1842 // Only do this if we demand the upper half so the knownbits are correct.
1843 unsigned HalfWidth = BitWidth / 2;
1844 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1845 DemandedBits.countLeadingOnes() >= HalfWidth) {
1846 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1847 if (isNarrowingProfitable(VT, HalfVT) &&
1848 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1849 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1850 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1851 // If we're demanding the upper bits at all, we must ensure
1852 // that the upper bits of the shift result are known to be zero,
1853 // which is equivalent to the narrow shift being NUW.
1854 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1855 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1856 SDNodeFlags Flags;
1857 Flags.setNoSignedWrap(IsNSW);
1858 Flags.setNoUnsignedWrap(IsNUW);
1859 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1860 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1861 ShAmt, HalfVT, dl, TLO.LegalTypes());
1862 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1863 NewShiftAmt, Flags);
1864 SDValue NewExt =
1865 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1866 return TLO.CombineTo(Op, NewExt);
1867 }
1868 }
1869 }
1870 } else {
1871 // This is a variable shift, so we can't shift the demand mask by a known
1872 // amount. But if we are not demanding high bits, then we are not
1873 // demanding those bits from the pre-shifted operand either.
1874 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1875 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1876 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1877 Depth + 1)) {
1878 SDNodeFlags Flags = Op.getNode()->getFlags();
1879 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1880 // Disable the nsw and nuw flags. We can no longer guarantee that we
1881 // won't wrap after simplification.
1882 Flags.setNoSignedWrap(false);
1883 Flags.setNoUnsignedWrap(false);
1884 Op->setFlags(Flags);
1885 }
1886 return true;
1887 }
1888 Known.resetAll();
1889 }
1890 }
1891
1892 // If we are only demanding sign bits then we can use the shift source
1893 // directly.
1894 if (const APInt *MaxSA =
1895 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1896 unsigned ShAmt = MaxSA->getZExtValue();
1897 unsigned NumSignBits =
1898 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1899 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1900 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1901 return TLO.CombineTo(Op, Op0);
1902 }
1903 break;
1904 }
1905 case ISD::SRL: {
1906 SDValue Op0 = Op.getOperand(0);
1907 SDValue Op1 = Op.getOperand(1);
1908 EVT ShiftVT = Op1.getValueType();
1909
1910 // Try to match AVG patterns.
1911 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1912 DemandedElts, Depth + 1))
1913 return TLO.CombineTo(Op, AVG);
1914
1915 if (const APInt *SA =
1916 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1917 unsigned ShAmt = SA->getZExtValue();
1918 if (ShAmt == 0)
1919 return TLO.CombineTo(Op, Op0);
1920
1921 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1922 // single shift. We can do this if the top bits (which are shifted out)
1923 // are never demanded.
1924 // TODO - support non-uniform vector amounts.
1925 if (Op0.getOpcode() == ISD::SHL) {
1926 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1927 if (const APInt *SA2 =
1928 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1929 unsigned C1 = SA2->getZExtValue();
1930 unsigned Opc = ISD::SRL;
1931 int Diff = ShAmt - C1;
1932 if (Diff < 0) {
1933 Diff = -Diff;
1934 Opc = ISD::SHL;
1935 }
1936 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1937 return TLO.CombineTo(
1938 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1939 }
1940 }
1941 }
1942
1943 APInt InDemandedMask = (DemandedBits << ShAmt);
1944
1945 // If the shift is exact, then it does demand the low bits (and knows that
1946 // they are zero).
1947 if (Op->getFlags().hasExact())
1948 InDemandedMask.setLowBits(ShAmt);
1949
1950 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1951 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1952 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1954 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1955 if (isNarrowingProfitable(VT, HalfVT) &&
1956 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1957 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1958 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1959 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1960 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1961 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1962 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1963 ShAmt, HalfVT, dl, TLO.LegalTypes());
1964 SDValue NewShift =
1965 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1966 return TLO.CombineTo(
1967 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1968 }
1969 }
1970
1971 // Compute the new bits that are at the top now.
1972 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1973 Depth + 1))
1974 return true;
1975 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1976 Known.Zero.lshrInPlace(ShAmt);
1977 Known.One.lshrInPlace(ShAmt);
1978 // High bits known zero.
1979 Known.Zero.setHighBits(ShAmt);
1980
1981 // Attempt to avoid multi-use ops if we don't need anything from them.
1982 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1983 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1984 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1985 if (DemandedOp0) {
1986 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1987 return TLO.CombineTo(Op, NewOp);
1988 }
1989 }
1990 } else {
1991 // Use generic knownbits computation as it has support for non-uniform
1992 // shift amounts.
1993 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1994 }
1995 break;
1996 }
1997 case ISD::SRA: {
1998 SDValue Op0 = Op.getOperand(0);
1999 SDValue Op1 = Op.getOperand(1);
2000 EVT ShiftVT = Op1.getValueType();
2001
2002 // If we only want bits that already match the signbit then we don't need
2003 // to shift.
2004 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2005 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2006 NumHiDemandedBits)
2007 return TLO.CombineTo(Op, Op0);
2008
2009 // If this is an arithmetic shift right and only the low-bit is set, we can
2010 // always convert this into a logical shr, even if the shift amount is
2011 // variable. The low bit of the shift cannot be an input sign bit unless
2012 // the shift amount is >= the size of the datatype, which is undefined.
2013 if (DemandedBits.isOne())
2014 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2015
2016 // Try to match AVG patterns.
2017 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2018 DemandedElts, Depth + 1))
2019 return TLO.CombineTo(Op, AVG);
2020
2021 if (const APInt *SA =
2022 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2023 unsigned ShAmt = SA->getZExtValue();
2024 if (ShAmt == 0)
2025 return TLO.CombineTo(Op, Op0);
2026
2027 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2028 // supports sext_inreg.
2029 if (Op0.getOpcode() == ISD::SHL) {
2030 if (const APInt *InnerSA =
2031 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2032 unsigned LowBits = BitWidth - ShAmt;
2033 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2034 if (VT.isVector())
2035 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2037
2038 if (*InnerSA == ShAmt) {
2039 if (!TLO.LegalOperations() ||
2041 return TLO.CombineTo(
2042 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2043 Op0.getOperand(0),
2044 TLO.DAG.getValueType(ExtVT)));
2045
2046 // Even if we can't convert to sext_inreg, we might be able to
2047 // remove this shift pair if the input is already sign extended.
2048 unsigned NumSignBits =
2049 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2050 if (NumSignBits > ShAmt)
2051 return TLO.CombineTo(Op, Op0.getOperand(0));
2052 }
2053 }
2054 }
2055
2056 APInt InDemandedMask = (DemandedBits << ShAmt);
2057
2058 // If the shift is exact, then it does demand the low bits (and knows that
2059 // they are zero).
2060 if (Op->getFlags().hasExact())
2061 InDemandedMask.setLowBits(ShAmt);
2062
2063 // If any of the demanded bits are produced by the sign extension, we also
2064 // demand the input sign bit.
2065 if (DemandedBits.countl_zero() < ShAmt)
2066 InDemandedMask.setSignBit();
2067
2068 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2069 Depth + 1))
2070 return true;
2071 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2072 Known.Zero.lshrInPlace(ShAmt);
2073 Known.One.lshrInPlace(ShAmt);
2074
2075 // If the input sign bit is known to be zero, or if none of the top bits
2076 // are demanded, turn this into an unsigned shift right.
2077 if (Known.Zero[BitWidth - ShAmt - 1] ||
2078 DemandedBits.countl_zero() >= ShAmt) {
2079 SDNodeFlags Flags;
2080 Flags.setExact(Op->getFlags().hasExact());
2081 return TLO.CombineTo(
2082 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2083 }
2084
2085 int Log2 = DemandedBits.exactLogBase2();
2086 if (Log2 >= 0) {
2087 // The bit must come from the sign.
2088 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2089 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2090 }
2091
2092 if (Known.One[BitWidth - ShAmt - 1])
2093 // New bits are known one.
2094 Known.One.setHighBits(ShAmt);
2095
2096 // Attempt to avoid multi-use ops if we don't need anything from them.
2097 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2098 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2099 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2100 if (DemandedOp0) {
2101 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2102 return TLO.CombineTo(Op, NewOp);
2103 }
2104 }
2105 }
2106 break;
2107 }
2108 case ISD::FSHL:
2109 case ISD::FSHR: {
2110 SDValue Op0 = Op.getOperand(0);
2111 SDValue Op1 = Op.getOperand(1);
2112 SDValue Op2 = Op.getOperand(2);
2113 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2114
2115 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2116 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2117
2118 // For fshl, 0-shift returns the 1st arg.
2119 // For fshr, 0-shift returns the 2nd arg.
2120 if (Amt == 0) {
2121 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2122 Known, TLO, Depth + 1))
2123 return true;
2124 break;
2125 }
2126
2127 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2128 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2129 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2130 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2131 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2132 Depth + 1))
2133 return true;
2134 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2135 Depth + 1))
2136 return true;
2137
2138 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2139 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2140 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2141 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2142 Known = Known.unionWith(Known2);
2143
2144 // Attempt to avoid multi-use ops if we don't need anything from them.
2145 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2146 !DemandedElts.isAllOnes()) {
2147 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2148 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2149 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2150 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2151 if (DemandedOp0 || DemandedOp1) {
2152 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2153 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2154 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2155 DemandedOp1, Op2);
2156 return TLO.CombineTo(Op, NewOp);
2157 }
2158 }
2159 }
2160
2161 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2162 if (isPowerOf2_32(BitWidth)) {
2163 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2164 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2165 Known2, TLO, Depth + 1))
2166 return true;
2167 }
2168 break;
2169 }
2170 case ISD::ROTL:
2171 case ISD::ROTR: {
2172 SDValue Op0 = Op.getOperand(0);
2173 SDValue Op1 = Op.getOperand(1);
2174 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2175
2176 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2177 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2178 return TLO.CombineTo(Op, Op0);
2179
2180 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2181 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2182 unsigned RevAmt = BitWidth - Amt;
2183
2184 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2185 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2186 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2187 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2188 Depth + 1))
2189 return true;
2190
2191 // rot*(x, 0) --> x
2192 if (Amt == 0)
2193 return TLO.CombineTo(Op, Op0);
2194
2195 // See if we don't demand either half of the rotated bits.
2196 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2197 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2198 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2199 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2200 }
2201 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2202 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2203 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2204 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2205 }
2206 }
2207
2208 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2209 if (isPowerOf2_32(BitWidth)) {
2210 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2211 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2212 Depth + 1))
2213 return true;
2214 }
2215 break;
2216 }
2217 case ISD::SMIN:
2218 case ISD::SMAX:
2219 case ISD::UMIN:
2220 case ISD::UMAX: {
2221 unsigned Opc = Op.getOpcode();
2222 SDValue Op0 = Op.getOperand(0);
2223 SDValue Op1 = Op.getOperand(1);
2224
2225 // If we're only demanding signbits, then we can simplify to OR/AND node.
2226 unsigned BitOp =
2227 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2228 unsigned NumSignBits =
2229 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2230 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2231 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2232 if (NumSignBits >= NumDemandedUpperBits)
2233 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2234
2235 // Check if one arg is always less/greater than (or equal) to the other arg.
2236 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2237 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2238 switch (Opc) {
2239 case ISD::SMIN:
2240 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2241 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2242 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2243 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2244 Known = KnownBits::smin(Known0, Known1);
2245 break;
2246 case ISD::SMAX:
2247 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2248 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2249 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2250 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2251 Known = KnownBits::smax(Known0, Known1);
2252 break;
2253 case ISD::UMIN:
2254 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2255 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2256 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2257 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2258 Known = KnownBits::umin(Known0, Known1);
2259 break;
2260 case ISD::UMAX:
2261 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2262 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2263 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2264 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2265 Known = KnownBits::umax(Known0, Known1);
2266 break;
2267 }
2268 break;
2269 }
2270 case ISD::BITREVERSE: {
2271 SDValue Src = Op.getOperand(0);
2272 APInt DemandedSrcBits = DemandedBits.reverseBits();
2273 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2274 Depth + 1))
2275 return true;
2276 Known.One = Known2.One.reverseBits();
2277 Known.Zero = Known2.Zero.reverseBits();
2278 break;
2279 }
2280 case ISD::BSWAP: {
2281 SDValue Src = Op.getOperand(0);
2282
2283 // If the only bits demanded come from one byte of the bswap result,
2284 // just shift the input byte into position to eliminate the bswap.
2285 unsigned NLZ = DemandedBits.countl_zero();
2286 unsigned NTZ = DemandedBits.countr_zero();
2287
2288 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2289 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2290 // have 14 leading zeros, round to 8.
2291 NLZ = alignDown(NLZ, 8);
2292 NTZ = alignDown(NTZ, 8);
2293 // If we need exactly one byte, we can do this transformation.
2294 if (BitWidth - NLZ - NTZ == 8) {
2295 // Replace this with either a left or right shift to get the byte into
2296 // the right place.
2297 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2298 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2299 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2300 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2301 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2302 return TLO.CombineTo(Op, NewOp);
2303 }
2304 }
2305
2306 APInt DemandedSrcBits = DemandedBits.byteSwap();
2307 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2308 Depth + 1))
2309 return true;
2310 Known.One = Known2.One.byteSwap();
2311 Known.Zero = Known2.Zero.byteSwap();
2312 break;
2313 }
2314 case ISD::CTPOP: {
2315 // If only 1 bit is demanded, replace with PARITY as long as we're before
2316 // op legalization.
2317 // FIXME: Limit to scalars for now.
2318 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2319 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2320 Op.getOperand(0)));
2321
2322 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2323 break;
2324 }
2326 SDValue Op0 = Op.getOperand(0);
2327 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2328 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2329
2330 // If we only care about the highest bit, don't bother shifting right.
2331 if (DemandedBits.isSignMask()) {
2332 unsigned MinSignedBits =
2333 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2334 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2335 // However if the input is already sign extended we expect the sign
2336 // extension to be dropped altogether later and do not simplify.
2337 if (!AlreadySignExtended) {
2338 // Compute the correct shift amount type, which must be getShiftAmountTy
2339 // for scalar types after legalization.
2340 SDValue ShiftAmt =
2341 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2342 return TLO.CombineTo(Op,
2343 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2344 }
2345 }
2346
2347 // If none of the extended bits are demanded, eliminate the sextinreg.
2348 if (DemandedBits.getActiveBits() <= ExVTBits)
2349 return TLO.CombineTo(Op, Op0);
2350
2351 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2352
2353 // Since the sign extended bits are demanded, we know that the sign
2354 // bit is demanded.
2355 InputDemandedBits.setBit(ExVTBits - 1);
2356
2357 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2358 Depth + 1))
2359 return true;
2360 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2361
2362 // If the sign bit of the input is known set or clear, then we know the
2363 // top bits of the result.
2364
2365 // If the input sign bit is known zero, convert this into a zero extension.
2366 if (Known.Zero[ExVTBits - 1])
2367 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2368
2369 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2370 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2371 Known.One.setBitsFrom(ExVTBits);
2372 Known.Zero &= Mask;
2373 } else { // Input sign bit unknown
2374 Known.Zero &= Mask;
2375 Known.One &= Mask;
2376 }
2377 break;
2378 }
2379 case ISD::BUILD_PAIR: {
2380 EVT HalfVT = Op.getOperand(0).getValueType();
2381 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2382
2383 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2384 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2385
2386 KnownBits KnownLo, KnownHi;
2387
2388 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2389 return true;
2390
2391 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2392 return true;
2393
2394 Known = KnownHi.concat(KnownLo);
2395 break;
2396 }
2398 if (VT.isScalableVector())
2399 return false;
2400 [[fallthrough]];
2401 case ISD::ZERO_EXTEND: {
2402 SDValue Src = Op.getOperand(0);
2403 EVT SrcVT = Src.getValueType();
2404 unsigned InBits = SrcVT.getScalarSizeInBits();
2405 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2406 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2407
2408 // If none of the top bits are demanded, convert this into an any_extend.
2409 if (DemandedBits.getActiveBits() <= InBits) {
2410 // If we only need the non-extended bits of the bottom element
2411 // then we can just bitcast to the result.
2412 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2413 VT.getSizeInBits() == SrcVT.getSizeInBits())
2414 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2415
2416 unsigned Opc =
2418 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2419 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2420 }
2421
2422 SDNodeFlags Flags = Op->getFlags();
2423 APInt InDemandedBits = DemandedBits.trunc(InBits);
2424 APInt InDemandedElts = DemandedElts.zext(InElts);
2425 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2426 Depth + 1)) {
2427 if (Flags.hasNonNeg()) {
2428 Flags.setNonNeg(false);
2429 Op->setFlags(Flags);
2430 }
2431 return true;
2432 }
2433 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2434 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2435 Known = Known.zext(BitWidth);
2436
2437 // Attempt to avoid multi-use ops if we don't need anything from them.
2438 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2439 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2440 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2441 break;
2442 }
2444 if (VT.isScalableVector())
2445 return false;
2446 [[fallthrough]];
2447 case ISD::SIGN_EXTEND: {
2448 SDValue Src = Op.getOperand(0);
2449 EVT SrcVT = Src.getValueType();
2450 unsigned InBits = SrcVT.getScalarSizeInBits();
2451 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2452 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2453
2454 APInt InDemandedElts = DemandedElts.zext(InElts);
2455 APInt InDemandedBits = DemandedBits.trunc(InBits);
2456
2457 // Since some of the sign extended bits are demanded, we know that the sign
2458 // bit is demanded.
2459 InDemandedBits.setBit(InBits - 1);
2460
2461 // If none of the top bits are demanded, convert this into an any_extend.
2462 if (DemandedBits.getActiveBits() <= InBits) {
2463 // If we only need the non-extended bits of the bottom element
2464 // then we can just bitcast to the result.
2465 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2466 VT.getSizeInBits() == SrcVT.getSizeInBits())
2467 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2468
2469 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2471 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2472 InBits) {
2473 unsigned Opc =
2475 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2476 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2477 }
2478 }
2479
2480 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2481 Depth + 1))
2482 return true;
2483 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2484 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2485
2486 // If the sign bit is known one, the top bits match.
2487 Known = Known.sext(BitWidth);
2488
2489 // If the sign bit is known zero, convert this to a zero extend.
2490 if (Known.isNonNegative()) {
2491 unsigned Opc =
2493 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2494 SDNodeFlags Flags;
2495 if (!IsVecInReg)
2496 Flags.setNonNeg(true);
2497 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2498 }
2499 }
2500
2501 // Attempt to avoid multi-use ops if we don't need anything from them.
2502 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2503 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2504 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2505 break;
2506 }
2508 if (VT.isScalableVector())
2509 return false;
2510 [[fallthrough]];
2511 case ISD::ANY_EXTEND: {
2512 SDValue Src = Op.getOperand(0);
2513 EVT SrcVT = Src.getValueType();
2514 unsigned InBits = SrcVT.getScalarSizeInBits();
2515 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2516 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2517
2518 // If we only need the bottom element then we can just bitcast.
2519 // TODO: Handle ANY_EXTEND?
2520 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2521 VT.getSizeInBits() == SrcVT.getSizeInBits())
2522 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2523
2524 APInt InDemandedBits = DemandedBits.trunc(InBits);
2525 APInt InDemandedElts = DemandedElts.zext(InElts);
2526 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2527 Depth + 1))
2528 return true;
2529 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2530 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2531 Known = Known.anyext(BitWidth);
2532
2533 // Attempt to avoid multi-use ops if we don't need anything from them.
2534 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2535 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2536 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2537 break;
2538 }
2539 case ISD::TRUNCATE: {
2540 SDValue Src = Op.getOperand(0);
2541
2542 // Simplify the input, using demanded bit information, and compute the known
2543 // zero/one bits live out.
2544 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2545 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2546 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2547 Depth + 1))
2548 return true;
2549 Known = Known.trunc(BitWidth);
2550
2551 // Attempt to avoid multi-use ops if we don't need anything from them.
2552 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2553 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2554 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2555
2556 // If the input is only used by this truncate, see if we can shrink it based
2557 // on the known demanded bits.
2558 switch (Src.getOpcode()) {
2559 default:
2560 break;
2561 case ISD::SRL:
2562 // Shrink SRL by a constant if none of the high bits shifted in are
2563 // demanded.
2564 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2565 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2566 // undesirable.
2567 break;
2568
2569 if (Src.getNode()->hasOneUse()) {
2570 const APInt *ShAmtC =
2571 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2572 if (!ShAmtC || ShAmtC->uge(BitWidth))
2573 break;
2574 uint64_t ShVal = ShAmtC->getZExtValue();
2575
2576 APInt HighBits =
2577 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2578 HighBits.lshrInPlace(ShVal);
2579 HighBits = HighBits.trunc(BitWidth);
2580
2581 if (!(HighBits & DemandedBits)) {
2582 // None of the shifted in bits are needed. Add a truncate of the
2583 // shift input, then shift it.
2584 SDValue NewShAmt =
2585 TLO.DAG.getShiftAmountConstant(ShVal, VT, dl, TLO.LegalTypes());
2586 SDValue NewTrunc =
2587 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2588 return TLO.CombineTo(
2589 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2590 }
2591 }
2592 break;
2593 }
2594
2595 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2596 break;
2597 }
2598 case ISD::AssertZext: {
2599 // AssertZext demands all of the high bits, plus any of the low bits
2600 // demanded by its users.
2601 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2603 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2604 TLO, Depth + 1))
2605 return true;
2606 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2607
2608 Known.Zero |= ~InMask;
2609 Known.One &= (~Known.Zero);
2610 break;
2611 }
2613 SDValue Src = Op.getOperand(0);
2614 SDValue Idx = Op.getOperand(1);
2615 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2616 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2617
2618 if (SrcEltCnt.isScalable())
2619 return false;
2620
2621 // Demand the bits from every vector element without a constant index.
2622 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2623 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2624 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2625 if (CIdx->getAPIntValue().ult(NumSrcElts))
2626 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2627
2628 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2629 // anything about the extended bits.
2630 APInt DemandedSrcBits = DemandedBits;
2631 if (BitWidth > EltBitWidth)
2632 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2633
2634 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2635 Depth + 1))
2636 return true;
2637
2638 // Attempt to avoid multi-use ops if we don't need anything from them.
2639 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2640 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2641 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2642 SDValue NewOp =
2643 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2644 return TLO.CombineTo(Op, NewOp);
2645 }
2646 }
2647
2648 Known = Known2;
2649 if (BitWidth > EltBitWidth)
2650 Known = Known.anyext(BitWidth);
2651 break;
2652 }
2653 case ISD::BITCAST: {
2654 if (VT.isScalableVector())
2655 return false;
2656 SDValue Src = Op.getOperand(0);
2657 EVT SrcVT = Src.getValueType();
2658 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2659
2660 // If this is an FP->Int bitcast and if the sign bit is the only
2661 // thing demanded, turn this into a FGETSIGN.
2662 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2663 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2664 SrcVT.isFloatingPoint()) {
2665 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2666 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2667 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2668 SrcVT != MVT::f128) {
2669 // Cannot eliminate/lower SHL for f128 yet.
2670 EVT Ty = OpVTLegal ? VT : MVT::i32;
2671 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2672 // place. We expect the SHL to be eliminated by other optimizations.
2673 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2674 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2675 if (!OpVTLegal && OpVTSizeInBits > 32)
2676 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2677 unsigned ShVal = Op.getValueSizeInBits() - 1;
2678 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2679 return TLO.CombineTo(Op,
2680 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2681 }
2682 }
2683
2684 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2685 // Demand the elt/bit if any of the original elts/bits are demanded.
2686 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2687 unsigned Scale = BitWidth / NumSrcEltBits;
2688 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2689 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2690 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2691 for (unsigned i = 0; i != Scale; ++i) {
2692 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2693 unsigned BitOffset = EltOffset * NumSrcEltBits;
2694 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2695 if (!Sub.isZero()) {
2696 DemandedSrcBits |= Sub;
2697 for (unsigned j = 0; j != NumElts; ++j)
2698 if (DemandedElts[j])
2699 DemandedSrcElts.setBit((j * Scale) + i);
2700 }
2701 }
2702
2703 APInt KnownSrcUndef, KnownSrcZero;
2704 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2705 KnownSrcZero, TLO, Depth + 1))
2706 return true;
2707
2708 KnownBits KnownSrcBits;
2709 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2710 KnownSrcBits, TLO, Depth + 1))
2711 return true;
2712 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2713 // TODO - bigendian once we have test coverage.
2714 unsigned Scale = NumSrcEltBits / BitWidth;
2715 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2716 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2717 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2718 for (unsigned i = 0; i != NumElts; ++i)
2719 if (DemandedElts[i]) {
2720 unsigned Offset = (i % Scale) * BitWidth;
2721 DemandedSrcBits.insertBits(DemandedBits, Offset);
2722 DemandedSrcElts.setBit(i / Scale);
2723 }
2724
2725 if (SrcVT.isVector()) {
2726 APInt KnownSrcUndef, KnownSrcZero;
2727 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2728 KnownSrcZero, TLO, Depth + 1))
2729 return true;
2730 }
2731
2732 KnownBits KnownSrcBits;
2733 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2734 KnownSrcBits, TLO, Depth + 1))
2735 return true;
2736
2737 // Attempt to avoid multi-use ops if we don't need anything from them.
2738 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2739 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2740 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2741 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2742 return TLO.CombineTo(Op, NewOp);
2743 }
2744 }
2745 }
2746
2747 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2748 // recursive call where Known may be useful to the caller.
2749 if (Depth > 0) {
2750 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2751 return false;
2752 }
2753 break;
2754 }
2755 case ISD::MUL:
2756 if (DemandedBits.isPowerOf2()) {
2757 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2758 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2759 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2760 unsigned CTZ = DemandedBits.countr_zero();
2761 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2762 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2763 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2764 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2765 return TLO.CombineTo(Op, Shl);
2766 }
2767 }
2768 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2769 // X * X is odd iff X is odd.
2770 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2771 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2772 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2773 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2774 return TLO.CombineTo(Op, And1);
2775 }
2776 [[fallthrough]];
2777 case ISD::ADD:
2778 case ISD::SUB: {
2779 // Add, Sub, and Mul don't demand any bits in positions beyond that
2780 // of the highest bit demanded of them.
2781 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2782 SDNodeFlags Flags = Op.getNode()->getFlags();
2783 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2784 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2785 KnownBits KnownOp0, KnownOp1;
2786 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2787 Depth + 1) ||
2788 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2789 Depth + 1) ||
2790 // See if the operation should be performed at a smaller bit width.
2791 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2792 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2793 // Disable the nsw and nuw flags. We can no longer guarantee that we
2794 // won't wrap after simplification.
2795 Flags.setNoSignedWrap(false);
2796 Flags.setNoUnsignedWrap(false);
2797 Op->setFlags(Flags);
2798 }
2799 return true;
2800 }
2801
2802 // neg x with only low bit demanded is simply x.
2803 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2804 isNullConstant(Op0))
2805 return TLO.CombineTo(Op, Op1);
2806
2807 // Attempt to avoid multi-use ops if we don't need anything from them.
2808 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2809 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2810 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2811 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2812 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2813 if (DemandedOp0 || DemandedOp1) {
2814 Flags.setNoSignedWrap(false);
2815 Flags.setNoUnsignedWrap(false);
2816 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2817 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2818 SDValue NewOp =
2819 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2820 return TLO.CombineTo(Op, NewOp);
2821 }
2822 }
2823
2824 // If we have a constant operand, we may be able to turn it into -1 if we
2825 // do not demand the high bits. This can make the constant smaller to
2826 // encode, allow more general folding, or match specialized instruction
2827 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2828 // is probably not useful (and could be detrimental).
2830 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2831 if (C && !C->isAllOnes() && !C->isOne() &&
2832 (C->getAPIntValue() | HighMask).isAllOnes()) {
2833 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2834 // Disable the nsw and nuw flags. We can no longer guarantee that we
2835 // won't wrap after simplification.
2836 Flags.setNoSignedWrap(false);
2837 Flags.setNoUnsignedWrap(false);
2838 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2839 return TLO.CombineTo(Op, NewOp);
2840 }
2841
2842 // Match a multiply with a disguised negated-power-of-2 and convert to a
2843 // an equivalent shift-left amount.
2844 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2845 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2846 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2847 return 0;
2848
2849 // Don't touch opaque constants. Also, ignore zero and power-of-2
2850 // multiplies. Those will get folded later.
2851 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2852 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2853 !MulC->getAPIntValue().isPowerOf2()) {
2854 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2855 if (UnmaskedC.isNegatedPowerOf2())
2856 return (-UnmaskedC).logBase2();
2857 }
2858 return 0;
2859 };
2860
2861 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2862 unsigned ShlAmt) {
2863 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2864 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2865 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2866 return TLO.CombineTo(Op, Res);
2867 };
2868
2870 if (Op.getOpcode() == ISD::ADD) {
2871 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2872 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2873 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2874 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2875 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2876 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2877 }
2878 if (Op.getOpcode() == ISD::SUB) {
2879 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2880 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2881 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2882 }
2883 }
2884
2885 if (Op.getOpcode() == ISD::MUL) {
2886 Known = KnownBits::mul(KnownOp0, KnownOp1);
2887 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2889 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2890 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2891 }
2892 break;
2893 }
2894 default:
2895 // We also ask the target about intrinsics (which could be specific to it).
2896 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2897 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2898 // TODO: Probably okay to remove after audit; here to reduce change size
2899 // in initial enablement patch for scalable vectors
2900 if (Op.getValueType().isScalableVector())
2901 break;
2902 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2903 Known, TLO, Depth))
2904 return true;
2905 break;
2906 }
2907
2908 // Just use computeKnownBits to compute output bits.
2909 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2910 break;
2911 }
2912
2913 // If we know the value of all of the demanded bits, return this as a
2914 // constant.
2915 if (!isTargetCanonicalConstantNode(Op) &&
2916 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2917 // Avoid folding to a constant if any OpaqueConstant is involved.
2918 const SDNode *N = Op.getNode();
2919 for (SDNode *Op :
2921 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2922 if (C->isOpaque())
2923 return false;
2924 }
2925 if (VT.isInteger())
2926 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2927 if (VT.isFloatingPoint())
2928 return TLO.CombineTo(
2929 Op,
2930 TLO.DAG.getConstantFP(
2931 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2932 }
2933
2934 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2935 // Try again just for the original demanded elts.
2936 // Ensure we do this AFTER constant folding above.
2937 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2938 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2939
2940 return false;
2941}
2942
2944 const APInt &DemandedElts,
2945 DAGCombinerInfo &DCI) const {
2946 SelectionDAG &DAG = DCI.DAG;
2947 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2948 !DCI.isBeforeLegalizeOps());
2949
2950 APInt KnownUndef, KnownZero;
2951 bool Simplified =
2952 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2953 if (Simplified) {
2954 DCI.AddToWorklist(Op.getNode());
2955 DCI.CommitTargetLoweringOpt(TLO);
2956 }
2957
2958 return Simplified;
2959}
2960
2961/// Given a vector binary operation and known undefined elements for each input
2962/// operand, compute whether each element of the output is undefined.
2964 const APInt &UndefOp0,
2965 const APInt &UndefOp1) {
2966 EVT VT = BO.getValueType();
2968 "Vector binop only");
2969
2970 EVT EltVT = VT.getVectorElementType();
2971 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2972 assert(UndefOp0.getBitWidth() == NumElts &&
2973 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2974
2975 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2976 const APInt &UndefVals) {
2977 if (UndefVals[Index])
2978 return DAG.getUNDEF(EltVT);
2979
2980 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2981 // Try hard to make sure that the getNode() call is not creating temporary
2982 // nodes. Ignore opaque integers because they do not constant fold.
2983 SDValue Elt = BV->getOperand(Index);
2984 auto *C = dyn_cast<ConstantSDNode>(Elt);
2985 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2986 return Elt;
2987 }
2988
2989 return SDValue();
2990 };
2991
2992 APInt KnownUndef = APInt::getZero(NumElts);
2993 for (unsigned i = 0; i != NumElts; ++i) {
2994 // If both inputs for this element are either constant or undef and match
2995 // the element type, compute the constant/undef result for this element of
2996 // the vector.
2997 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2998 // not handle FP constants. The code within getNode() should be refactored
2999 // to avoid the danger of creating a bogus temporary node here.
3000 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3001 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3002 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3003 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3004 KnownUndef.setBit(i);
3005 }
3006 return KnownUndef;
3007}
3008
3010 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3011 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3012 bool AssumeSingleUse) const {
3013 EVT VT = Op.getValueType();
3014 unsigned Opcode = Op.getOpcode();
3015 APInt DemandedElts = OriginalDemandedElts;
3016 unsigned NumElts = DemandedElts.getBitWidth();
3017 assert(VT.isVector() && "Expected vector op");
3018
3019 KnownUndef = KnownZero = APInt::getZero(NumElts);
3020
3021 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3022 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3023 return false;
3024
3025 // TODO: For now we assume we know nothing about scalable vectors.
3026 if (VT.isScalableVector())
3027 return false;
3028
3029 assert(VT.getVectorNumElements() == NumElts &&
3030 "Mask size mismatches value type element count!");
3031
3032 // Undef operand.
3033 if (Op.isUndef()) {
3034 KnownUndef.setAllBits();
3035 return false;
3036 }
3037
3038 // If Op has other users, assume that all elements are needed.
3039 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3040 DemandedElts.setAllBits();
3041
3042 // Not demanding any elements from Op.
3043 if (DemandedElts == 0) {
3044 KnownUndef.setAllBits();
3045 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3046 }
3047
3048 // Limit search depth.
3050 return false;
3051
3052 SDLoc DL(Op);
3053 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3054 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3055
3056 // Helper for demanding the specified elements and all the bits of both binary
3057 // operands.
3058 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3059 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3060 TLO.DAG, Depth + 1);
3061 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3062 TLO.DAG, Depth + 1);
3063 if (NewOp0 || NewOp1) {
3064 SDValue NewOp =
3065 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3066 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3067 return TLO.CombineTo(Op, NewOp);
3068 }
3069 return false;
3070 };
3071
3072 switch (Opcode) {
3073 case ISD::SCALAR_TO_VECTOR: {
3074 if (!DemandedElts[0]) {
3075 KnownUndef.setAllBits();
3076 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3077 }
3078 SDValue ScalarSrc = Op.getOperand(0);
3079 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3080 SDValue Src = ScalarSrc.getOperand(0);
3081 SDValue Idx = ScalarSrc.getOperand(1);
3082 EVT SrcVT = Src.getValueType();
3083
3084 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3085
3086 if (SrcEltCnt.isScalable())
3087 return false;
3088
3089 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3090 if (isNullConstant(Idx)) {
3091 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3092 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3093 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3094 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3095 TLO, Depth + 1))
3096 return true;
3097 }
3098 }
3099 KnownUndef.setHighBits(NumElts - 1);
3100 break;
3101 }
3102 case ISD::BITCAST: {
3103 SDValue Src = Op.getOperand(0);
3104 EVT SrcVT = Src.getValueType();
3105
3106 // We only handle vectors here.
3107 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3108 if (!SrcVT.isVector())
3109 break;
3110
3111 // Fast handling of 'identity' bitcasts.
3112 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3113 if (NumSrcElts == NumElts)
3114 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3115 KnownZero, TLO, Depth + 1);
3116
3117 APInt SrcDemandedElts, SrcZero, SrcUndef;
3118
3119 // Bitcast from 'large element' src vector to 'small element' vector, we
3120 // must demand a source element if any DemandedElt maps to it.
3121 if ((NumElts % NumSrcElts) == 0) {
3122 unsigned Scale = NumElts / NumSrcElts;
3123 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3124 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3125 TLO, Depth + 1))
3126 return true;
3127
3128 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3129 // of the large element.
3130 // TODO - bigendian once we have test coverage.
3131 if (IsLE) {
3132 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3133 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3134 for (unsigned i = 0; i != NumElts; ++i)
3135 if (DemandedElts[i]) {
3136 unsigned Ofs = (i % Scale) * EltSizeInBits;
3137 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3138 }
3139
3140 KnownBits Known;
3141 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3142 TLO, Depth + 1))
3143 return true;
3144
3145 // The bitcast has split each wide element into a number of
3146 // narrow subelements. We have just computed the Known bits
3147 // for wide elements. See if element splitting results in
3148 // some subelements being zero. Only for demanded elements!
3149 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3150 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3151 .isAllOnes())
3152 continue;
3153 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3154 unsigned Elt = Scale * SrcElt + SubElt;
3155 if (DemandedElts[Elt])
3156 KnownZero.setBit(Elt);
3157 }
3158 }
3159 }
3160
3161 // If the src element is zero/undef then all the output elements will be -
3162 // only demanded elements are guaranteed to be correct.
3163 for (unsigned i = 0; i != NumSrcElts; ++i) {
3164 if (SrcDemandedElts[i]) {
3165 if (SrcZero[i])
3166 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3167 if (SrcUndef[i])
3168 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3169 }
3170 }
3171 }
3172
3173 // Bitcast from 'small element' src vector to 'large element' vector, we
3174 // demand all smaller source elements covered by the larger demanded element
3175 // of this vector.
3176 if ((NumSrcElts % NumElts) == 0) {
3177 unsigned Scale = NumSrcElts / NumElts;
3178 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3179 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3180 TLO, Depth + 1))
3181 return true;
3182
3183 // If all the src elements covering an output element are zero/undef, then
3184 // the output element will be as well, assuming it was demanded.
3185 for (unsigned i = 0; i != NumElts; ++i) {
3186 if (DemandedElts[i]) {
3187 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3188 KnownZero.setBit(i);
3189 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3190 KnownUndef.setBit(i);
3191 }
3192 }
3193 }
3194 break;
3195 }
3196 case ISD::FREEZE: {
3197 SDValue N0 = Op.getOperand(0);
3198 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3199 /*PoisonOnly=*/false))
3200 return TLO.CombineTo(Op, N0);
3201
3202 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3203 // freeze(op(x, ...)) -> op(freeze(x), ...).
3204 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3205 return TLO.CombineTo(
3207 TLO.DAG.getFreeze(N0.getOperand(0))));
3208 break;
3209 }
3210 case ISD::BUILD_VECTOR: {
3211 // Check all elements and simplify any unused elements with UNDEF.
3212 if (!DemandedElts.isAllOnes()) {
3213 // Don't simplify BROADCASTS.
3214 if (llvm::any_of(Op->op_values(),
3215 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3216 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3217 bool Updated = false;
3218 for (unsigned i = 0; i != NumElts; ++i) {
3219 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3220 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3221 KnownUndef.setBit(i);
3222 Updated = true;
3223 }
3224 }
3225 if (Updated)
3226 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3227 }
3228 }
3229 for (unsigned i = 0; i != NumElts; ++i) {
3230 SDValue SrcOp = Op.getOperand(i);
3231 if (SrcOp.isUndef()) {
3232 KnownUndef.setBit(i);
3233 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3235 KnownZero.setBit(i);
3236 }
3237 }
3238 break;
3239 }
3240 case ISD::CONCAT_VECTORS: {
3241 EVT SubVT = Op.getOperand(0).getValueType();
3242 unsigned NumSubVecs = Op.getNumOperands();
3243 unsigned NumSubElts = SubVT.getVectorNumElements();
3244 for (unsigned i = 0; i != NumSubVecs; ++i) {
3245 SDValue SubOp = Op.getOperand(i);
3246 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3247 APInt SubUndef, SubZero;
3248 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3249 Depth + 1))
3250 return true;
3251 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3252 KnownZero.insertBits(SubZero, i * NumSubElts);
3253 }
3254
3255 // Attempt to avoid multi-use ops if we don't need anything from them.
3256 if (!DemandedElts.isAllOnes()) {
3257 bool FoundNewSub = false;
3258 SmallVector<SDValue, 2> DemandedSubOps;
3259 for (unsigned i = 0; i != NumSubVecs; ++i) {
3260 SDValue SubOp = Op.getOperand(i);
3261 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3262 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3263 SubOp, SubElts, TLO.DAG, Depth + 1);
3264 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3265 FoundNewSub = NewSubOp ? true : FoundNewSub;
3266 }
3267 if (FoundNewSub) {
3268 SDValue NewOp =
3269 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3270 return TLO.CombineTo(Op, NewOp);
3271 }
3272 }
3273 break;
3274 }
3275 case ISD::INSERT_SUBVECTOR: {
3276 // Demand any elements from the subvector and the remainder from the src its
3277 // inserted into.
3278 SDValue Src = Op.getOperand(0);
3279 SDValue Sub = Op.getOperand(1);
3280 uint64_t Idx = Op.getConstantOperandVal(2);
3281 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3282 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3283 APInt DemandedSrcElts = DemandedElts;
3284 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3285
3286 APInt SubUndef, SubZero;
3287 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3288 Depth + 1))
3289 return true;
3290
3291 // If none of the src operand elements are demanded, replace it with undef.
3292 if (!DemandedSrcElts && !Src.isUndef())
3293 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3294 TLO.DAG.getUNDEF(VT), Sub,
3295 Op.getOperand(2)));
3296
3297 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3298 TLO, Depth + 1))
3299 return true;
3300 KnownUndef.insertBits(SubUndef, Idx);
3301 KnownZero.insertBits(SubZero, Idx);
3302
3303 // Attempt to avoid multi-use ops if we don't need anything from them.
3304 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3305 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3306 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3307 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3308 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3309 if (NewSrc || NewSub) {
3310 NewSrc = NewSrc ? NewSrc : Src;
3311 NewSub = NewSub ? NewSub : Sub;
3312 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3313 NewSub, Op.getOperand(2));
3314 return TLO.CombineTo(Op, NewOp);
3315 }
3316 }
3317 break;
3318 }
3320 // Offset the demanded elts by the subvector index.
3321 SDValue Src = Op.getOperand(0);
3322 if (Src.getValueType().isScalableVector())
3323 break;
3324 uint64_t Idx = Op.getConstantOperandVal(1);
3325 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3326 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3327
3328 APInt SrcUndef, SrcZero;
3329 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3330 Depth + 1))
3331 return true;
3332 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3333 KnownZero = SrcZero.extractBits(NumElts, Idx);
3334
3335 // Attempt to avoid multi-use ops if we don't need anything from them.
3336 if (!DemandedElts.isAllOnes()) {
3337 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3338 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3339 if (NewSrc) {
3340 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3341 Op.getOperand(1));
3342 return TLO.CombineTo(Op, NewOp);
3343 }
3344 }
3345 break;
3346 }
3348 SDValue Vec = Op.getOperand(0);
3349 SDValue Scl = Op.getOperand(1);
3350 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3351
3352 // For a legal, constant insertion index, if we don't need this insertion
3353 // then strip it, else remove it from the demanded elts.
3354 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3355 unsigned Idx = CIdx->getZExtValue();
3356 if (!DemandedElts[Idx])
3357 return TLO.CombineTo(Op, Vec);
3358
3359 APInt DemandedVecElts(DemandedElts);
3360 DemandedVecElts.clearBit(Idx);
3361 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3362 KnownZero, TLO, Depth + 1))
3363 return true;
3364
3365 KnownUndef.setBitVal(Idx, Scl.isUndef());
3366
3367 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3368 break;
3369 }
3370
3371 APInt VecUndef, VecZero;
3372 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3373 Depth + 1))
3374 return true;
3375 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3376 break;
3377 }
3378 case ISD::VSELECT: {
3379 SDValue Sel = Op.getOperand(0);
3380 SDValue LHS = Op.getOperand(1);
3381 SDValue RHS = Op.getOperand(2);
3382
3383 // Try to transform the select condition based on the current demanded
3384 // elements.
3385 APInt UndefSel, ZeroSel;
3386 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3387 Depth + 1))
3388 return true;
3389
3390 // See if we can simplify either vselect operand.
3391 APInt DemandedLHS(DemandedElts);
3392 APInt DemandedRHS(DemandedElts);
3393 APInt UndefLHS, ZeroLHS;
3394 APInt UndefRHS, ZeroRHS;
3395 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3396 Depth + 1))
3397 return true;
3398 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3399 Depth + 1))
3400 return true;
3401
3402 KnownUndef = UndefLHS & UndefRHS;
3403 KnownZero = ZeroLHS & ZeroRHS;
3404
3405 // If we know that the selected element is always zero, we don't need the
3406 // select value element.
3407 APInt DemandedSel = DemandedElts & ~KnownZero;
3408 if (DemandedSel != DemandedElts)
3409 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3410 Depth + 1))
3411 return true;
3412
3413 break;
3414 }
3415 case ISD::VECTOR_SHUFFLE: {
3416 SDValue LHS = Op.getOperand(0);
3417 SDValue RHS = Op.getOperand(1);
3418 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3419
3420 // Collect demanded elements from shuffle operands..
3421 APInt DemandedLHS(NumElts, 0);
3422 APInt DemandedRHS(NumElts, 0);
3423 for (unsigned i = 0; i != NumElts; ++i) {
3424 int M = ShuffleMask[i];
3425 if (M < 0 || !DemandedElts[i])
3426 continue;
3427 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3428 if (M < (int)NumElts)
3429 DemandedLHS.setBit(M);
3430 else
3431 DemandedRHS.setBit(M - NumElts);
3432 }
3433
3434 // See if we can simplify either shuffle operand.
3435 APInt UndefLHS, ZeroLHS;
3436 APInt UndefRHS, ZeroRHS;
3437 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3438 Depth + 1))
3439 return true;
3440 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3441 Depth + 1))
3442 return true;
3443
3444 // Simplify mask using undef elements from LHS/RHS.
3445 bool Updated = false;
3446 bool IdentityLHS = true, IdentityRHS = true;
3447 SmallVector<int, 32> NewMask(ShuffleMask);
3448 for (unsigned i = 0; i != NumElts; ++i) {
3449 int &M = NewMask[i];
3450 if (M < 0)
3451 continue;
3452 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3453 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3454 Updated = true;
3455 M = -1;
3456 }
3457 IdentityLHS &= (M < 0) || (M == (int)i);
3458 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3459 }
3460
3461 // Update legal shuffle masks based on demanded elements if it won't reduce
3462 // to Identity which can cause premature removal of the shuffle mask.
3463 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3464 SDValue LegalShuffle =
3465 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3466 if (LegalShuffle)
3467 return TLO.CombineTo(Op, LegalShuffle);
3468 }
3469
3470 // Propagate undef/zero elements from LHS/RHS.
3471 for (unsigned i = 0; i != NumElts; ++i) {
3472 int M = ShuffleMask[i];
3473 if (M < 0) {
3474 KnownUndef.setBit(i);
3475 } else if (M < (int)NumElts) {
3476 if (UndefLHS[M])
3477 KnownUndef.setBit(i);
3478 if (ZeroLHS[M])
3479 KnownZero.setBit(i);
3480 } else {
3481 if (UndefRHS[M - NumElts])
3482 KnownUndef.setBit(i);
3483 if (ZeroRHS[M - NumElts])
3484 KnownZero.setBit(i);
3485 }
3486 }
3487 break;
3488 }
3492 APInt SrcUndef, SrcZero;
3493 SDValue Src = Op.getOperand(0);
3494 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3495 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3496 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3497 Depth + 1))
3498 return true;
3499 KnownZero = SrcZero.zextOrTrunc(NumElts);
3500 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3501
3502 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3503 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3504 DemandedSrcElts == 1) {
3505 // aext - if we just need the bottom element then we can bitcast.
3506 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3507 }
3508
3509 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3510 // zext(undef) upper bits are guaranteed to be zero.
3511 if (DemandedElts.isSubsetOf(KnownUndef))
3512 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3513 KnownUndef.clearAllBits();
3514
3515 // zext - if we just need the bottom element then we can mask:
3516 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3517 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3518 Op->isOnlyUserOf(Src.getNode()) &&
3519 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3520 SDLoc DL(Op);
3521 EVT SrcVT = Src.getValueType();
3522 EVT SrcSVT = SrcVT.getScalarType();
3523 SmallVector<SDValue> MaskElts;
3524 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3525 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3526 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3527 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3528 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3529 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3530 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3531 }
3532 }
3533 }
3534 break;
3535 }
3536
3537 // TODO: There are more binop opcodes that could be handled here - MIN,
3538 // MAX, saturated math, etc.
3539 case ISD::ADD: {
3540 SDValue Op0 = Op.getOperand(0);
3541 SDValue Op1 = Op.getOperand(1);
3542 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3543 APInt UndefLHS, ZeroLHS;
3544 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3545 Depth + 1, /*AssumeSingleUse*/ true))
3546 return true;
3547 }
3548 [[fallthrough]];
3549 }
3550 case ISD::AVGCEILS:
3551 case ISD::AVGCEILU:
3552 case ISD::AVGFLOORS:
3553 case ISD::AVGFLOORU:
3554 case ISD::OR:
3555 case ISD::XOR:
3556 case ISD::SUB:
3557 case ISD::FADD:
3558 case ISD::FSUB:
3559 case ISD::FMUL:
3560 case ISD::FDIV:
3561 case ISD::FREM: {
3562 SDValue Op0 = Op.getOperand(0);
3563 SDValue Op1 = Op.getOperand(1);
3564
3565 APInt UndefRHS, ZeroRHS;
3566 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3567 Depth + 1))
3568 return true;
3569 APInt UndefLHS, ZeroLHS;
3570 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3571 Depth + 1))
3572 return true;
3573
3574 KnownZero = ZeroLHS & ZeroRHS;
3575 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3576
3577 // Attempt to avoid multi-use ops if we don't need anything from them.
3578 // TODO - use KnownUndef to relax the demandedelts?
3579 if (!DemandedElts.isAllOnes())
3580 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3581 return true;
3582 break;
3583 }
3584 case ISD::SHL:
3585 case ISD::SRL:
3586 case ISD::SRA:
3587 case ISD::ROTL:
3588 case ISD::ROTR: {
3589 SDValue Op0 = Op.getOperand(0);
3590 SDValue Op1 = Op.getOperand(1);
3591
3592 APInt UndefRHS, ZeroRHS;
3593 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3594 Depth + 1))
3595 return true;
3596 APInt UndefLHS, ZeroLHS;
3597 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3598 Depth + 1))
3599 return true;
3600
3601 KnownZero = ZeroLHS;
3602 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3603
3604 // Attempt to avoid multi-use ops if we don't need anything from them.
3605 // TODO - use KnownUndef to relax the demandedelts?
3606 if (!DemandedElts.isAllOnes())
3607 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3608 return true;
3609 break;
3610 }
3611 case ISD::MUL:
3612 case ISD::MULHU:
3613 case ISD::MULHS:
3614 case ISD::AND: {
3615 SDValue Op0 = Op.getOperand(0);
3616 SDValue Op1 = Op.getOperand(1);
3617
3618 APInt SrcUndef, SrcZero;
3619 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3620 Depth + 1))
3621 return true;
3622 // If we know that a demanded element was zero in Op1 we don't need to
3623 // demand it in Op0 - its guaranteed to be zero.
3624 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3625 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3626 TLO, Depth + 1))
3627 return true;
3628
3629 KnownUndef &= DemandedElts0;
3630 KnownZero &= DemandedElts0;
3631
3632 // If every element pair has a zero/undef then just fold to zero.
3633 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3634 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3635 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3636 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3637
3638 // If either side has a zero element, then the result element is zero, even
3639 // if the other is an UNDEF.
3640 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3641 // and then handle 'and' nodes with the rest of the binop opcodes.
3642 KnownZero |= SrcZero;
3643 KnownUndef &= SrcUndef;
3644 KnownUndef &= ~KnownZero;
3645
3646 // Attempt to avoid multi-use ops if we don't need anything from them.
3647 if (!DemandedElts.isAllOnes())
3648 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3649 return true;
3650 break;
3651 }
3652 case ISD::TRUNCATE:
3653 case ISD::SIGN_EXTEND:
3654 case ISD::ZERO_EXTEND:
3655 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3656 KnownZero, TLO, Depth + 1))
3657 return true;
3658
3659 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3660 // zext(undef) upper bits are guaranteed to be zero.
3661 if (DemandedElts.isSubsetOf(KnownUndef))
3662 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3663 KnownUndef.clearAllBits();
3664 }
3665 break;
3666 default: {
3667 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3668 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3669 KnownZero, TLO, Depth))
3670 return true;
3671 } else {
3672 KnownBits Known;
3673 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3674 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3675 TLO, Depth, AssumeSingleUse))
3676 return true;
3677 }
3678 break;
3679 }
3680 }
3681 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3682
3683 // Constant fold all undef cases.
3684 // TODO: Handle zero cases as well.
3685 if (DemandedElts.isSubsetOf(KnownUndef))
3686 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3687
3688 return false;
3689}
3690
3691/// Determine which of the bits specified in Mask are known to be either zero or
3692/// one and return them in the Known.
3694 KnownBits &Known,
3695 const APInt &DemandedElts,
3696 const SelectionDAG &DAG,
3697 unsigned Depth) const {
3698 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3699 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3700 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3701 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3702 "Should use MaskedValueIsZero if you don't know whether Op"
3703 " is a target node!");
3704 Known.resetAll();
3705}
3706
3709 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3710 unsigned Depth) const {
3711 Known.resetAll();
3712}
3713
3715 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3716 // The low bits are known zero if the pointer is aligned.
3717 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3718}
3719
3722 unsigned Depth) const {
3723 return Align(1);
3724}
3725
3726/// This method can be implemented by targets that want to expose additional
3727/// information about sign bits to the DAG Combiner.
3729 const APInt &,
3730 const SelectionDAG &,
3731 unsigned Depth) const {
3732 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3733 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3734 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3735 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3736 "Should use ComputeNumSignBits if you don't know whether Op"
3737 " is a target node!");
3738 return 1;
3739}
3740
3742 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3743 const MachineRegisterInfo &MRI, unsigned Depth) const {
3744 return 1;
3745}
3746
3748 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3749 TargetLoweringOpt &TLO, unsigned Depth) const {
3750 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3751 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3753 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3754 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3755 " is a target node!");
3756 return false;
3757}
3758
3760 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3761 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3762 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3763 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3764 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3765 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3766 "Should use SimplifyDemandedBits if you don't know whether Op"
3767 " is a target node!");
3768 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3769 return false;
3770}
3771
3773 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3774 SelectionDAG &DAG, unsigned Depth) const {
3775 assert(
3776 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3777 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3778 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3779 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3780 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3781 " is a target node!");
3782 return SDValue();
3783}
3784
3785SDValue
3788 SelectionDAG &DAG) const {
3789 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3790 if (!LegalMask) {
3791 std::swap(N0, N1);
3793 LegalMask = isShuffleMaskLegal(Mask, VT);
3794 }
3795
3796 if (!LegalMask)
3797 return SDValue();
3798
3799 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3800}
3801
3803 return nullptr;
3804}
3805
3807 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3808 bool PoisonOnly, unsigned Depth) const {
3809 assert(
3810 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3811 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3812 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3813 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3814 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3815 " is a target node!");
3816
3817 // If Op can't create undef/poison and none of its operands are undef/poison
3818 // then Op is never undef/poison.
3819 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3820 /*ConsiderFlags*/ true, Depth) &&
3821 all_of(Op->ops(), [&](SDValue V) {
3822 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3823 Depth + 1);
3824 });
3825}
3826
3828 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3829 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3830 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3831 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3832 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3833 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3834 "Should use canCreateUndefOrPoison if you don't know whether Op"
3835 " is a target node!");
3836 // Be conservative and return true.
3837 return true;
3838}
3839
3841 const SelectionDAG &DAG,
3842 bool SNaN,
3843 unsigned Depth) const {
3844 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3845 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3846 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3847 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3848 "Should use isKnownNeverNaN if you don't know whether Op"
3849 " is a target node!");
3850 return false;
3851}
3852
3854 const APInt &DemandedElts,
3855 APInt &UndefElts,
3856 const SelectionDAG &DAG,
3857 unsigned Depth) const {
3858 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3859 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3860 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3861 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3862 "Should use isSplatValue if you don't know whether Op"
3863 " is a target node!");
3864 return false;
3865}
3866
3867// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3868// work with truncating build vectors and vectors with elements of less than
3869// 8 bits.
3871 if (!N)
3872 return false;
3873
3874 unsigned EltWidth;
3875 APInt CVal;
3876 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3877 /*AllowTruncation=*/true)) {
3878 CVal = CN->getAPIntValue();
3879 EltWidth = N.getValueType().getScalarSizeInBits();
3880 } else
3881 return false;
3882
3883 // If this is a truncating splat, truncate the splat value.
3884 // Otherwise, we may fail to match the expected values below.
3885 if (EltWidth < CVal.getBitWidth())
3886 CVal = CVal.trunc(EltWidth);
3887
3888 switch (getBooleanContents(N.getValueType())) {
3890 return CVal[0];
3892 return CVal.isOne();
3894 return CVal.isAllOnes();
3895 }
3896
3897 llvm_unreachable("Invalid boolean contents");
3898}
3899
3901 if (!N)
3902 return false;
3903
3904 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3905 if (!CN) {
3906 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3907 if (!BV)
3908 return false;
3909
3910 // Only interested in constant splats, we don't care about undef
3911 // elements in identifying boolean constants and getConstantSplatNode
3912 // returns NULL if all ops are undef;
3913 CN = BV->getConstantSplatNode();
3914 if (!CN)
3915 return false;
3916 }
3917
3918 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3919 return !CN->getAPIntValue()[0];
3920
3921 return CN->isZero();
3922}
3923
3925 bool SExt) const {
3926 if (VT == MVT::i1)
3927 return N->isOne();
3928
3930 switch (Cnt) {
3932 // An extended value of 1 is always true, unless its original type is i1,
3933 // in which case it will be sign extended to -1.
3934 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3937 return N->isAllOnes() && SExt;
3938 }
3939 llvm_unreachable("Unexpected enumeration.");
3940}
3941
3942/// This helper function of SimplifySetCC tries to optimize the comparison when
3943/// either operand of the SetCC node is a bitwise-and instruction.
3944SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3945 ISD::CondCode Cond, const SDLoc &DL,
3946 DAGCombinerInfo &DCI) const {
3947 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3948 std::swap(N0, N1);
3949
3950 SelectionDAG &DAG = DCI.DAG;
3951 EVT OpVT = N0.getValueType();
3952 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3953 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3954 return SDValue();
3955
3956 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3957 // iff everything but LSB is known zero:
3958 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3961 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3962 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3963 if (DAG.MaskedValueIsZero(N0, UpperBits))
3964 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3965 }
3966
3967 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3968 // test in a narrow type that we can truncate to with no cost. Examples:
3969 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3970 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3971 // TODO: This conservatively checks for type legality on the source and
3972 // destination types. That may inhibit optimizations, but it also
3973 // allows setcc->shift transforms that may be more beneficial.
3974 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3975 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3976 isTypeLegal(OpVT) && N0.hasOneUse()) {
3977 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3978 AndC->getAPIntValue().getActiveBits());
3979 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3980 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3981 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3982 return DAG.getSetCC(DL, VT, Trunc, Zero,
3984 }
3985 }
3986
3987 // Match these patterns in any of their permutations:
3988 // (X & Y) == Y
3989 // (X & Y) != Y
3990 SDValue X, Y;
3991 if (N0.getOperand(0) == N1) {
3992 X = N0.getOperand(1);
3993 Y = N0.getOperand(0);
3994 } else if (N0.getOperand(1) == N1) {
3995 X = N0.getOperand(0);
3996 Y = N0.getOperand(1);
3997 } else {
3998 return SDValue();
3999 }
4000
4001 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4002 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4003 // its liable to create and infinite loop.
4004 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4005 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4007 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4008 // Note that where Y is variable and is known to have at most one bit set
4009 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4010 // equivalent when Y == 0.
4011 assert(OpVT.isInteger());
4013 if (DCI.isBeforeLegalizeOps() ||
4015 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4016 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4017 // If the target supports an 'and-not' or 'and-complement' logic operation,
4018 // try to use that to make a comparison operation more efficient.
4019 // But don't do this transform if the mask is a single bit because there are
4020 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4021 // 'rlwinm' on PPC).
4022
4023 // Bail out if the compare operand that we want to turn into a zero is
4024 // already a zero (otherwise, infinite loop).
4025 if (isNullConstant(Y))
4026 return SDValue();
4027
4028 // Transform this into: ~X & Y == 0.
4029 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4030 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4031 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4032 }
4033
4034 return SDValue();
4035}
4036
4037/// There are multiple IR patterns that could be checking whether certain
4038/// truncation of a signed number would be lossy or not. The pattern which is
4039/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4040/// We are looking for the following pattern: (KeptBits is a constant)
4041/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4042/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4043/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4044/// We will unfold it into the natural trunc+sext pattern:
4045/// ((%x << C) a>> C) dstcond %x
4046/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4047SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4048 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4049 const SDLoc &DL) const {
4050 // We must be comparing with a constant.
4051 ConstantSDNode *C1;
4052 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4053 return SDValue();
4054
4055 // N0 should be: add %x, (1 << (KeptBits-1))
4056 if (N0->getOpcode() != ISD::ADD)
4057 return SDValue();
4058
4059 // And we must be 'add'ing a constant.
4060 ConstantSDNode *C01;
4061 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4062 return SDValue();
4063
4064 SDValue X = N0->getOperand(0);
4065 EVT XVT = X.getValueType();
4066
4067 // Validate constants ...
4068
4069 APInt I1 = C1->getAPIntValue();
4070
4071 ISD::CondCode NewCond;
4072 if (Cond == ISD::CondCode::SETULT) {
4073 NewCond = ISD::CondCode::SETEQ;
4074 } else if (Cond == ISD::CondCode::SETULE) {
4075 NewCond = ISD::CondCode::SETEQ;
4076 // But need to 'canonicalize' the constant.
4077 I1 += 1;
4078 } else if (Cond == ISD::CondCode::SETUGT) {
4079 NewCond = ISD::CondCode::SETNE;
4080 // But need to 'canonicalize' the constant.
4081 I1 += 1;
4082 } else if (Cond == ISD::CondCode::SETUGE) {
4083 NewCond = ISD::CondCode::SETNE;
4084 } else
4085 return SDValue();
4086
4087 APInt I01 = C01->getAPIntValue();
4088
4089 auto checkConstants = [&I1, &I01]() -> bool {
4090 // Both of them must be power-of-two, and the constant from setcc is bigger.
4091 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4092 };
4093
4094 if (checkConstants()) {
4095 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4096 } else {
4097 // What if we invert constants? (and the target predicate)
4098 I1.negate();
4099 I01.negate();
4100 assert(XVT.isInteger());
4101 NewCond = getSetCCInverse(NewCond, XVT);
4102 if (!checkConstants())
4103 return SDValue();
4104 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4105 }
4106
4107 // They are power-of-two, so which bit is set?
4108 const unsigned KeptBits = I1.logBase2();
4109 const unsigned KeptBitsMinusOne = I01.logBase2();
4110
4111 // Magic!
4112 if (KeptBits != (KeptBitsMinusOne + 1))
4113 return SDValue();
4114 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4115
4116 // We don't want to do this in every single case.
4117 SelectionDAG &DAG = DCI.DAG;
4119 XVT, KeptBits))
4120 return SDValue();
4121
4122 // Unfold into: sext_inreg(%x) cond %x
4123 // Where 'cond' will be either 'eq' or 'ne'.
4124 SDValue SExtInReg = DAG.getNode(
4126 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4127 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4128}
4129
4130// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4131SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4132 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4133 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4135 "Should be a comparison with 0.");
4136 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4137 "Valid only for [in]equality comparisons.");
4138
4139 unsigned NewShiftOpcode;
4140 SDValue X, C, Y;
4141
4142 SelectionDAG &DAG = DCI.DAG;
4143 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4144
4145 // Look for '(C l>>/<< Y)'.
4146 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4147 // The shift should be one-use.
4148 if (!V.hasOneUse())
4149 return false;
4150 unsigned OldShiftOpcode = V.getOpcode();
4151 switch (OldShiftOpcode) {
4152 case ISD::SHL:
4153 NewShiftOpcode = ISD::SRL;
4154 break;
4155 case ISD::SRL:
4156 NewShiftOpcode = ISD::SHL;
4157 break;
4158 default:
4159 return false; // must be a logical shift.
4160 }
4161 // We should be shifting a constant.
4162 // FIXME: best to use isConstantOrConstantVector().
4163 C = V.getOperand(0);
4165 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4166 if (!CC)
4167 return false;
4168 Y = V.getOperand(1);
4169
4171 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4172 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4173 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4174 };
4175
4176 // LHS of comparison should be an one-use 'and'.
4177 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4178 return SDValue();
4179
4180 X = N0.getOperand(0);
4181 SDValue Mask = N0.getOperand(1);
4182
4183 // 'and' is commutative!
4184 if (!Match(Mask)) {
4185 std::swap(X, Mask);
4186 if (!Match(Mask))
4187 return SDValue();
4188 }
4189
4190 EVT VT = X.getValueType();
4191
4192 // Produce:
4193 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4194 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4195 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4196 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4197 return T2;
4198}
4199
4200/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4201/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4202/// handle the commuted versions of these patterns.
4203SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4204 ISD::CondCode Cond, const SDLoc &DL,
4205 DAGCombinerInfo &DCI) const {
4206 unsigned BOpcode = N0.getOpcode();
4207 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4208 "Unexpected binop");
4209 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4210
4211 // (X + Y) == X --> Y == 0
4212 // (X - Y) == X --> Y == 0
4213 // (X ^ Y) == X --> Y == 0
4214 SelectionDAG &DAG = DCI.DAG;
4215 EVT OpVT = N0.getValueType();
4216 SDValue X = N0.getOperand(0);
4217 SDValue Y = N0.getOperand(1);
4218 if (X == N1)
4219 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4220
4221 if (Y != N1)
4222 return SDValue();
4223
4224 // (X + Y) == Y --> X == 0
4225 // (X ^ Y) == Y --> X == 0
4226 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4227 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4228
4229 // The shift would not be valid if the operands are boolean (i1).
4230 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4231 return SDValue();
4232
4233 // (X - Y) == Y --> X == Y << 1
4234 SDValue One =
4235 DAG.getShiftAmountConstant(1, OpVT, DL, !DCI.isBeforeLegalize());
4236 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4237 if (!DCI.isCalledByLegalizer())
4238 DCI.AddToWorklist(YShl1.getNode());
4239 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4240}
4241
4243 SDValue N0, const APInt &C1,
4244 ISD::CondCode Cond, const SDLoc &dl,
4245 SelectionDAG &DAG) {
4246 // Look through truncs that don't change the value of a ctpop.
4247 // FIXME: Add vector support? Need to be careful with setcc result type below.
4248 SDValue CTPOP = N0;
4249 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4251 CTPOP = N0.getOperand(0);
4252
4253 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4254 return SDValue();
4255
4256 EVT CTVT = CTPOP.getValueType();
4257 SDValue CTOp = CTPOP.getOperand(0);
4258
4259 // Expand a power-of-2-or-zero comparison based on ctpop:
4260 // (ctpop x) u< 2 -> (x & x-1) == 0
4261 // (ctpop x) u> 1 -> (x & x-1) != 0
4262 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4263 // Keep the CTPOP if it is a cheap vector op.
4264 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4265 return SDValue();
4266
4267 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4268 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4269 return SDValue();
4270 if (C1 == 0 && (Cond == ISD::SETULT))
4271 return SDValue(); // This is handled elsewhere.
4272
4273 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4274
4275 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4276 SDValue Result = CTOp;
4277 for (unsigned i = 0; i < Passes; i++) {
4278 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4279 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4280 }
4282 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4283 }
4284
4285 // Expand a power-of-2 comparison based on ctpop
4286 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4287 // Keep the CTPOP if it is cheap.
4288 if (TLI.isCtpopFast(CTVT))
4289 return SDValue();
4290
4291 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4292 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4293 assert(CTVT.isInteger());
4294 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4295
4296 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4297 // check before emitting a potentially unnecessary op.
4298 if (DAG.isKnownNeverZero(CTOp)) {
4299 // (ctpop x) == 1 --> (x & x-1) == 0
4300 // (ctpop x) != 1 --> (x & x-1) != 0
4301 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4302 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4303 return RHS;
4304 }
4305
4306 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4307 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4308 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4310 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4311 }
4312
4313 return SDValue();
4314}
4315
4317 ISD::CondCode Cond, const SDLoc &dl,
4318 SelectionDAG &DAG) {
4319 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4320 return SDValue();
4321
4322 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4323 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4324 return SDValue();
4325
4326 auto getRotateSource = [](SDValue X) {
4327 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4328 return X.getOperand(0);
4329 return SDValue();
4330 };
4331
4332 // Peek through a rotated value compared against 0 or -1:
4333 // (rot X, Y) == 0/-1 --> X == 0/-1
4334 // (rot X, Y) != 0/-1 --> X != 0/-1
4335 if (SDValue R = getRotateSource(N0))
4336 return DAG.getSetCC(dl, VT, R, N1, Cond);
4337
4338 // Peek through an 'or' of a rotated value compared against 0:
4339 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4340 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4341 //
4342 // TODO: Add the 'and' with -1 sibling.
4343 // TODO: Recurse through a series of 'or' ops to find the rotate.
4344 EVT OpVT = N0.getValueType();
4345 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4346 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4347 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4348 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4349 }
4350 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4351 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4352 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4353 }
4354 }
4355
4356 return SDValue();
4357}
4358
4360 ISD::CondCode Cond, const SDLoc &dl,
4361 SelectionDAG &DAG) {
4362 // If we are testing for all-bits-clear, we might be able to do that with
4363 // less shifting since bit-order does not matter.
4364 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4365 return SDValue();
4366
4367 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4368 if (!C1 || !C1->isZero())
4369 return SDValue();
4370
4371 if (!N0.hasOneUse() ||
4372 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4373 return SDValue();
4374
4375 unsigned BitWidth = N0.getScalarValueSizeInBits();
4376 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4377 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4378 return SDValue();
4379
4380 // Canonicalize fshr as fshl to reduce pattern-matching.
4381 unsigned ShAmt = ShAmtC->getZExtValue();
4382 if (N0.getOpcode() == ISD::FSHR)
4383 ShAmt = BitWidth - ShAmt;
4384
4385 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4386 SDValue X, Y;
4387 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4388 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4389 return false;
4390 if (Or.getOperand(0) == Other) {
4391 X = Or.getOperand(0);
4392 Y = Or.getOperand(1);
4393 return true;
4394 }
4395 if (Or.getOperand(1) == Other) {
4396 X = Or.getOperand(1);
4397 Y = Or.getOperand(0);
4398 return true;
4399 }
4400 return false;
4401 };
4402
4403 EVT OpVT = N0.getValueType();
4404 EVT ShAmtVT = N0.getOperand(2).getValueType();
4405 SDValue F0 = N0.getOperand(0);
4406 SDValue F1 = N0.getOperand(1);
4407 if (matchOr(F0, F1)) {
4408 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4409 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4410 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4411 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4412 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4413 }
4414 if (matchOr(F1, F0)) {
4415 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4416 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4417 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4418 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4419 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4420 }
4421
4422 return SDValue();
4423}
4424
4425/// Try to simplify a setcc built with the specified operands and cc. If it is
4426/// unable to simplify it, return a null SDValue.
4428 ISD::CondCode Cond, bool foldBooleans,
4429 DAGCombinerInfo &DCI,
4430 const SDLoc &dl) const {
4431 SelectionDAG &DAG = DCI.DAG;
4432 const DataLayout &Layout = DAG.getDataLayout();
4433 EVT OpVT = N0.getValueType();
4435
4436 // Constant fold or commute setcc.
4437 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4438 return Fold;
4439
4440 bool N0ConstOrSplat =
4441 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4442 bool N1ConstOrSplat =
4443 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4444
4445 // Canonicalize toward having the constant on the RHS.
4446 // TODO: Handle non-splat vector constants. All undef causes trouble.
4447 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4448 // infinite loop here when we encounter one.
4450 if (N0ConstOrSplat && !N1ConstOrSplat &&
4451 (DCI.isBeforeLegalizeOps() ||
4452 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4453 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4454
4455 // If we have a subtract with the same 2 non-constant operands as this setcc
4456 // -- but in reverse order -- then try to commute the operands of this setcc
4457 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4458 // instruction on some targets.
4459 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4460 (DCI.isBeforeLegalizeOps() ||
4461 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4462 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4463 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4464 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4465
4466 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4467 return V;
4468
4469 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4470 return V;
4471
4472 if (auto *N1C = isConstOrConstSplat(N1)) {
4473 const APInt &C1 = N1C->getAPIntValue();
4474
4475 // Optimize some CTPOP cases.
4476 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4477 return V;
4478
4479 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4480 // X * Y == 0 --> (X == 0) || (Y == 0)
4481 // X * Y != 0 --> (X != 0) && (Y != 0)
4482 // TODO: This bails out if minsize is set, but if the target doesn't have a
4483 // single instruction multiply for this type, it would likely be
4484 // smaller to decompose.
4485 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4486 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4487 (N0->getFlags().hasNoUnsignedWrap() ||
4488 N0->getFlags().hasNoSignedWrap()) &&
4489 !Attr.hasFnAttr(Attribute::MinSize)) {
4490 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4491 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4492 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4493 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4494 }
4495
4496 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4497 // equality comparison, then we're just comparing whether X itself is
4498 // zero.
4499 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4500 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4501 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4502 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4503 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4504 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4505 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4506 // (srl (ctlz x), 5) == 0 -> X != 0
4507 // (srl (ctlz x), 5) != 1 -> X != 0
4508 Cond = ISD::SETNE;
4509 } else {
4510 // (srl (ctlz x), 5) != 0 -> X == 0
4511 // (srl (ctlz x), 5) == 1 -> X == 0
4512 Cond = ISD::SETEQ;
4513 }
4514 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4515 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4516 Cond);
4517 }
4518 }
4519 }
4520 }
4521
4522 // FIXME: Support vectors.
4523 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4524 const APInt &C1 = N1C->getAPIntValue();
4525
4526 // (zext x) == C --> x == (trunc C)
4527 // (sext x) == C --> x == (trunc C)
4528 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4529 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4530 unsigned MinBits = N0.getValueSizeInBits();
4531 SDValue PreExt;
4532 bool Signed = false;
4533 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4534 // ZExt
4535 MinBits = N0->getOperand(0).getValueSizeInBits();
4536 PreExt = N0->getOperand(0);
4537 } else if (N0->getOpcode() == ISD::AND) {
4538 // DAGCombine turns costly ZExts into ANDs
4539 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4540 if ((C->getAPIntValue()+1).isPowerOf2()) {
4541 MinBits = C->getAPIntValue().countr_one();
4542 PreExt = N0->getOperand(0);
4543 }
4544 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4545 // SExt
4546 MinBits = N0->getOperand(0).getValueSizeInBits();
4547 PreExt = N0->getOperand(0);
4548 Signed = true;
4549 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4550 // ZEXTLOAD / SEXTLOAD
4551 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4552 MinBits = LN0->getMemoryVT().getSizeInBits();
4553 PreExt = N0;
4554 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4555 Signed = true;
4556 MinBits = LN0->getMemoryVT().getSizeInBits();
4557 PreExt = N0;
4558 }
4559 }
4560
4561 // Figure out how many bits we need to preserve this constant.
4562 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4563
4564 // Make sure we're not losing bits from the constant.
4565 if (MinBits > 0 &&
4566 MinBits < C1.getBitWidth() &&
4567 MinBits >= ReqdBits) {
4568 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4569 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4570 // Will get folded away.
4571 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4572 if (MinBits == 1 && C1 == 1)
4573 // Invert the condition.
4574 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4576 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4577 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4578 }
4579
4580 // If truncating the setcc operands is not desirable, we can still
4581 // simplify the expression in some cases:
4582 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4583 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4584 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4585 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4586 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4587 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4588 SDValue TopSetCC = N0->getOperand(0);
4589 unsigned N0Opc = N0->getOpcode();
4590 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4591 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4592 TopSetCC.getOpcode() == ISD::SETCC &&
4593 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4594 (isConstFalseVal(N1) ||
4595 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4596
4597 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4598 (!N1C->isZero() && Cond == ISD::SETNE);
4599
4600 if (!Inverse)
4601 return TopSetCC;
4602
4604 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4605 TopSetCC.getOperand(0).getValueType());
4606 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4607 TopSetCC.getOperand(1),
4608 InvCond);
4609 }
4610 }
4611 }
4612
4613 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4614 // equality or unsigned, and all 1 bits of the const are in the same
4615 // partial word, see if we can shorten the load.
4616 if (DCI.isBeforeLegalize() &&
4618 N0.getOpcode() == ISD::AND && C1 == 0 &&
4619 N0.getNode()->hasOneUse() &&
4620 isa<LoadSDNode>(N0.getOperand(0)) &&
4621 N0.getOperand(0).getNode()->hasOneUse() &&
4622 isa<ConstantSDNode>(N0.getOperand(1))) {
4623 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4624 APInt bestMask;
4625 unsigned bestWidth = 0, bestOffset = 0;
4626 if (Lod->isSimple() && Lod->isUnindexed() &&
4627 (Lod->getMemoryVT().isByteSized() ||
4629 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4630 unsigned origWidth = N0.getValueSizeInBits();
4631 unsigned maskWidth = origWidth;
4632 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4633 // 8 bits, but have to be careful...
4634 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4635 origWidth = Lod->getMemoryVT().getSizeInBits();
4636 const APInt &Mask = N0.getConstantOperandAPInt(1);
4637 // Only consider power-of-2 widths (and at least one byte) as candiates
4638 // for the narrowed load.
4639 for (unsigned width = 8; width < origWidth; width *= 2) {
4640 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4641 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4642 continue;
4643 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4644 // Avoid accessing any padding here for now (we could use memWidth
4645 // instead of origWidth here otherwise).
4646 unsigned maxOffset = origWidth - width;
4647 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4648 if (Mask.isSubsetOf(newMask)) {
4649 unsigned ptrOffset =
4650 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4651 unsigned IsFast = 0;
4652 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4654 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4655 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4656 IsFast) {
4657 bestOffset = ptrOffset / 8;
4658 bestMask = Mask.lshr(offset);
4659 bestWidth = width;
4660 break;
4661 }
4662 }
4663 newMask <<= 8;
4664 }
4665 if (bestWidth)
4666 break;
4667 }
4668 }
4669 if (bestWidth) {
4670 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4671 SDValue Ptr = Lod->getBasePtr();
4672 if (bestOffset != 0)
4673 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4674 SDValue NewLoad =
4675 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4676 Lod->getPointerInfo().getWithOffset(bestOffset),
4677 Lod->getOriginalAlign());
4678 SDValue And =
4679 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4680 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4681 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4682 }
4683 }
4684
4685 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4686 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4687 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4688
4689 // If the comparison constant has bits in the upper part, the
4690 // zero-extended value could never match.
4692 C1.getBitWidth() - InSize))) {
4693 switch (Cond) {
4694 case ISD::SETUGT:
4695 case ISD::SETUGE:
4696 case ISD::SETEQ:
4697 return DAG.getConstant(0, dl, VT);
4698 case ISD::SETULT:
4699 case ISD::SETULE:
4700 case ISD::SETNE:
4701 return DAG.getConstant(1, dl, VT);
4702 case ISD::SETGT:
4703 case ISD::SETGE:
4704 // True if the sign bit of C1 is set.
4705 return DAG.getConstant(C1.isNegative(), dl, VT);
4706 case ISD::SETLT:
4707 case ISD::SETLE:
4708 // True if the sign bit of C1 isn't set.
4709 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4710 default:
4711 break;
4712 }
4713 }
4714
4715 // Otherwise, we can perform the comparison with the low bits.
4716 switch (Cond) {
4717 case ISD::SETEQ:
4718 case ISD::SETNE:
4719 case ISD::SETUGT:
4720 case ISD::SETUGE:
4721 case ISD::SETULT:
4722 case ISD::SETULE: {
4723 EVT newVT = N0.getOperand(0).getValueType();
4724 if (DCI.isBeforeLegalizeOps() ||
4725 (isOperationLegal(ISD::SETCC, newVT) &&
4726 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4727 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4728 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4729
4730 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4731 NewConst, Cond);
4732 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4733 }
4734 break;
4735 }
4736 default:
4737 break; // todo, be more careful with signed comparisons
4738 }
4739 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4740 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4741 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4742 OpVT)) {
4743 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4744 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4745 EVT ExtDstTy = N0.getValueType();
4746 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4747
4748 // If the constant doesn't fit into the number of bits for the source of
4749 // the sign extension, it is impossible for both sides to be equal.
4750 if (C1.getSignificantBits() > ExtSrcTyBits)
4751 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4752
4753 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4754 ExtDstTy != ExtSrcTy && "Unexpected types!");
4755 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4756 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4757 DAG.getConstant(Imm, dl, ExtDstTy));
4758 if (!DCI.isCalledByLegalizer())
4759 DCI.AddToWorklist(ZextOp.getNode());
4760 // Otherwise, make this a use of a zext.
4761 return DAG.getSetCC(dl, VT, ZextOp,
4762 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4763 } else if ((N1C->isZero() || N1C->isOne()) &&
4764 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4765 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4766 // excluded as they are handled below whilst checking for foldBooleans.
4767 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4768 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4769 (N0.getValueType() == MVT::i1 ||
4773 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4774 if (TrueWhenTrue)
4775 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4776 // Invert the condition.
4777 if (N0.getOpcode() == ISD::SETCC) {
4778 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4780 if (DCI.isBeforeLegalizeOps() ||
4782 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4783 }
4784 }
4785
4786 if ((N0.getOpcode() == ISD::XOR ||
4787 (N0.getOpcode() == ISD::AND &&
4788 N0.getOperand(0).getOpcode() == ISD::XOR &&
4789 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4790 isOneConstant(N0.getOperand(1))) {
4791 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4792 // can only do this if the top bits are known zero.
4793 unsigned BitWidth = N0.getValueSizeInBits();
4794 if (DAG.MaskedValueIsZero(N0,
4796 BitWidth-1))) {
4797 // Okay, get the un-inverted input value.
4798 SDValue Val;
4799 if (N0.getOpcode() == ISD::XOR) {
4800 Val = N0.getOperand(0);
4801 } else {
4802 assert(N0.getOpcode() == ISD::AND &&
4803 N0.getOperand(0).getOpcode() == ISD::XOR);
4804 // ((X^1)&1)^1 -> X & 1
4805 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4806 N0.getOperand(0).getOperand(0),
4807 N0.getOperand(1));
4808 }
4809
4810 return DAG.getSetCC(dl, VT, Val, N1,
4812 }
4813 } else if (N1C->isOne()) {
4814 SDValue Op0 = N0;
4815 if (Op0.getOpcode() == ISD::TRUNCATE)
4816 Op0 = Op0.getOperand(0);
4817
4818 if ((Op0.getOpcode() == ISD::XOR) &&
4819 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4820 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4821 SDValue XorLHS = Op0.getOperand(0);
4822 SDValue XorRHS = Op0.getOperand(1);
4823 // Ensure that the input setccs return an i1 type or 0/1 value.
4824 if (Op0.getValueType() == MVT::i1 ||
4829 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4831 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4832 }
4833 }
4834 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4835 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4836 if (Op0.getValueType().bitsGT(VT))
4837 Op0 = DAG.getNode(ISD::AND, dl, VT,
4838 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4839 DAG.getConstant(1, dl, VT));
4840 else if (Op0.getValueType().bitsLT(VT))
4841 Op0 = DAG.getNode(ISD::AND, dl, VT,
4842 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4843 DAG.getConstant(1, dl, VT));
4844
4845 return DAG.getSetCC(dl, VT, Op0,
4846 DAG.getConstant(0, dl, Op0.getValueType()),
4848 }
4849 if (Op0.getOpcode() == ISD::AssertZext &&
4850 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4851 return DAG.getSetCC(dl, VT, Op0,
4852 DAG.getConstant(0, dl, Op0.getValueType()),
4854 }
4855 }
4856
4857 // Given:
4858 // icmp eq/ne (urem %x, %y), 0
4859 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4860 // icmp eq/ne %x, 0
4861 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4862 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4863 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4864 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4865 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4866 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4867 }
4868
4869 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4870 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4871 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4872 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4873 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4874 N1C && N1C->isAllOnes()) {
4875 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4876 DAG.getConstant(0, dl, OpVT),
4878 }
4879
4880 if (SDValue V =
4881 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4882 return V;
4883 }
4884
4885 // These simplifications apply to splat vectors as well.
4886 // TODO: Handle more splat vector cases.
4887 if (auto *N1C = isConstOrConstSplat(N1)) {
4888 const APInt &C1 = N1C->getAPIntValue();
4889
4890 APInt MinVal, MaxVal;
4891 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4893 MinVal = APInt::getSignedMinValue(OperandBitSize);
4894 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4895 } else {
4896 MinVal = APInt::getMinValue(OperandBitSize);
4897 MaxVal = APInt::getMaxValue(OperandBitSize);
4898 }
4899
4900 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4901 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4902 // X >= MIN --> true
4903 if (C1 == MinVal)
4904 return DAG.getBoolConstant(true, dl, VT, OpVT);
4905
4906 if (!VT.isVector()) { // TODO: Support this for vectors.
4907 // X >= C0 --> X > (C0 - 1)
4908 APInt C = C1 - 1;
4910 if ((DCI.isBeforeLegalizeOps() ||
4911 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4912 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4913 isLegalICmpImmediate(C.getSExtValue())))) {
4914 return DAG.getSetCC(dl, VT, N0,
4915 DAG.getConstant(C, dl, N1.getValueType()),
4916 NewCC);
4917 }
4918 }
4919 }
4920
4921 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4922 // X <= MAX --> true
4923 if (C1 == MaxVal)
4924 return DAG.getBoolConstant(true, dl, VT, OpVT);
4925
4926 // X <= C0 --> X < (C0 + 1)
4927 if (!VT.isVector()) { // TODO: Support this for vectors.
4928 APInt C = C1 + 1;
4930 if ((DCI.isBeforeLegalizeOps() ||
4931 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4932 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4933 isLegalICmpImmediate(C.getSExtValue())))) {
4934 return DAG.getSetCC(dl, VT, N0,
4935 DAG.getConstant(C, dl, N1.getValueType()),
4936 NewCC);
4937 }
4938 }
4939 }
4940
4941 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4942 if (C1 == MinVal)
4943 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4944
4945 // TODO: Support this for vectors after legalize ops.
4946 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4947 // Canonicalize setlt X, Max --> setne X, Max
4948 if (C1 == MaxVal)
4949 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4950
4951 // If we have setult X, 1, turn it into seteq X, 0
4952 if (C1 == MinVal+1)
4953 return DAG.getSetCC(dl, VT, N0,
4954 DAG.getConstant(MinVal, dl, N0.getValueType()),
4955 ISD::SETEQ);
4956 }
4957 }
4958
4959 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4960 if (C1 == MaxVal)
4961 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4962
4963 // TODO: Support this for vectors after legalize ops.
4964 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4965 // Canonicalize setgt X, Min --> setne X, Min
4966 if (C1 == MinVal)
4967 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4968
4969 // If we have setugt X, Max-1, turn it into seteq X, Max
4970 if (C1 == MaxVal-1)
4971 return DAG.getSetCC(dl, VT, N0,
4972 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4973 ISD::SETEQ);
4974 }
4975 }
4976
4977 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4978 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4979 if (C1.isZero())
4980 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4981 VT, N0, N1, Cond, DCI, dl))
4982 return CC;
4983
4984 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4985 // For example, when high 32-bits of i64 X are known clear:
4986 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4987 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4988 bool CmpZero = N1C->isZero();
4989 bool CmpNegOne = N1C->isAllOnes();
4990 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4991 // Match or(lo,shl(hi,bw/2)) pattern.
4992 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4993 unsigned EltBits = V.getScalarValueSizeInBits();
4994 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4995 return false;
4996 SDValue LHS = V.getOperand(0);
4997 SDValue RHS = V.getOperand(1);
4998 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4999 // Unshifted element must have zero upperbits.
5000 if (RHS.getOpcode() == ISD::SHL &&
5001 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5002 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5003 DAG.MaskedValueIsZero(LHS, HiBits)) {
5004 Lo = LHS;
5005 Hi = RHS.getOperand(0);
5006 return true;
5007 }
5008 if (LHS.getOpcode() == ISD::SHL &&
5009 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5010 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5011 DAG.MaskedValueIsZero(RHS, HiBits)) {
5012 Lo = RHS;
5013 Hi = LHS.getOperand(0);
5014 return true;
5015 }
5016 return false;
5017 };
5018
5019 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5020 unsigned EltBits = N0.getScalarValueSizeInBits();
5021 unsigned HalfBits = EltBits / 2;
5022 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5023 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5024 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5025 SDValue NewN0 =
5026 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5027 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5028 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5029 };
5030
5031 SDValue Lo, Hi;
5032 if (IsConcat(N0, Lo, Hi))
5033 return MergeConcat(Lo, Hi);
5034
5035 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5036 SDValue Lo0, Lo1, Hi0, Hi1;
5037 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5038 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5039 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5040 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5041 }
5042 }
5043 }
5044 }
5045
5046 // If we have "setcc X, C0", check to see if we can shrink the immediate
5047 // by changing cc.
5048 // TODO: Support this for vectors after legalize ops.
5049 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5050 // SETUGT X, SINTMAX -> SETLT X, 0
5051 // SETUGE X, SINTMIN -> SETLT X, 0
5052 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5053 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5054 return DAG.getSetCC(dl, VT, N0,
5055 DAG.getConstant(0, dl, N1.getValueType()),
5056 ISD::SETLT);
5057
5058 // SETULT X, SINTMIN -> SETGT X, -1
5059 // SETULE X, SINTMAX -> SETGT X, -1
5060 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5061 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5062 return DAG.getSetCC(dl, VT, N0,
5063 DAG.getAllOnesConstant(dl, N1.getValueType()),
5064 ISD::SETGT);
5065 }
5066 }
5067
5068 // Back to non-vector simplifications.
5069 // TODO: Can we do these for vector splats?
5070 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5071 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5072 const APInt &C1 = N1C->getAPIntValue();
5073 EVT ShValTy = N0.getValueType();
5074
5075 // Fold bit comparisons when we can. This will result in an
5076 // incorrect value when boolean false is negative one, unless
5077 // the bitsize is 1 in which case the false value is the same
5078 // in practice regardless of the representation.
5079 if ((VT.getSizeInBits() == 1 ||
5081 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5082 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5083 N0.getOpcode() == ISD::AND) {
5084 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5085 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5086 // Perform the xform if the AND RHS is a single bit.
5087 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5088 if (AndRHS->getAPIntValue().isPowerOf2() &&
5089 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5090 return DAG.getNode(
5091 ISD::TRUNCATE, dl, VT,
5092 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5094 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5095 }
5096 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5097 // (X & 8) == 8 --> (X & 8) >> 3
5098 // Perform the xform if C1 is a single bit.
5099 unsigned ShCt = C1.logBase2();
5100 if (C1.isPowerOf2() &&
5101 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5102 return DAG.getNode(
5103 ISD::TRUNCATE, dl, VT,
5104 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5106 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5107 }
5108 }
5109 }
5110 }
5111
5112 if (C1.getSignificantBits() <= 64 &&
5114 // (X & -256) == 256 -> (X >> 8) == 1
5115 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5116 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5117 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5118 const APInt &AndRHSC = AndRHS->getAPIntValue();
5119 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5120 unsigned ShiftBits = AndRHSC.countr_zero();
5121 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5122 SDValue Shift = DAG.getNode(
5123 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5124 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5125 !DCI.isBeforeLegalize()));
5126 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5127 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5128 }
5129 }
5130 }
5131 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5132 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5133 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5134 // X < 0x100000000 -> (X >> 32) < 1
5135 // X >= 0x100000000 -> (X >> 32) >= 1
5136 // X <= 0x0ffffffff -> (X >> 32) < 1
5137 // X > 0x0ffffffff -> (X >> 32) >= 1
5138 unsigned ShiftBits;
5139 APInt NewC = C1;
5140 ISD::CondCode NewCond = Cond;
5141 if (AdjOne) {
5142 ShiftBits = C1.countr_one();
5143 NewC = NewC + 1;
5144 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5145 } else {
5146 ShiftBits = C1.countr_zero();
5147 }
5148 NewC.lshrInPlace(ShiftBits);
5149 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5151 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5152 SDValue Shift =
5153 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5154 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5155 !DCI.isBeforeLegalize()));
5156 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5157 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5158 }
5159 }
5160 }
5161 }
5162
5163 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5164 auto *CFP = cast<ConstantFPSDNode>(N1);
5165 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5166
5167 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5168 // constant if knowing that the operand is non-nan is enough. We prefer to
5169 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5170 // materialize 0.0.
5171 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5172 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5173
5174 // setcc (fneg x), C -> setcc swap(pred) x, -C
5175 if (N0.getOpcode() == ISD::FNEG) {
5177 if (DCI.isBeforeLegalizeOps() ||
5178 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5179 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5180 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5181 }
5182 }
5183
5184 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5186 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5187 bool IsFabs = N0.getOpcode() == ISD::FABS;
5188 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5189 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5190 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5191 : (IsFabs ? fcInf : fcPosInf);
5192 if (Cond == ISD::SETUEQ)
5193 Flag |= fcNan;
5194 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5195 DAG.getTargetConstant(Flag, dl, MVT::i32));
5196 }
5197 }
5198
5199 // If the condition is not legal, see if we can find an equivalent one
5200 // which is legal.
5202 // If the comparison was an awkward floating-point == or != and one of
5203 // the comparison operands is infinity or negative infinity, convert the
5204 // condition to a less-awkward <= or >=.
5205 if (CFP->getValueAPF().isInfinity()) {
5206 bool IsNegInf = CFP->getValueAPF().isNegative();
5208 switch (Cond) {
5209 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5210 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5211 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5212 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5213 default: break;
5214 }
5215 if (NewCond != ISD::SETCC_INVALID &&
5216 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5217 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5218 }
5219 }
5220 }
5221
5222 if (N0 == N1) {
5223 // The sext(setcc()) => setcc() optimization relies on the appropriate
5224 // constant being emitted.
5225 assert(!N0.getValueType().isInteger() &&
5226 "Integer types should be handled by FoldSetCC");
5227
5228 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5229 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5230 if (UOF == 2) // FP operators that are undefined on NaNs.
5231 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5232 if (UOF == unsigned(EqTrue))
5233 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5234 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5235 // if it is not already.
5236 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5237 if (NewCond != Cond &&
5238 (DCI.isBeforeLegalizeOps() ||
5239 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5240 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5241 }
5242
5243 // ~X > ~Y --> Y > X
5244 // ~X < ~Y --> Y < X
5245 // ~X < C --> X > ~C
5246 // ~X > C --> X < ~C
5247 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5248 N0.getValueType().isInteger()) {
5249 if (isBitwiseNot(N0)) {
5250 if (isBitwiseNot(N1))
5251 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5252
5255 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5256 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5257 }
5258 }
5259 }
5260
5261 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5262 N0.getValueType().isInteger()) {
5263 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5264 N0.getOpcode() == ISD::XOR) {
5265 // Simplify (X+Y) == (X+Z) --> Y == Z
5266 if (N0.getOpcode() == N1.getOpcode()) {
5267 if (N0.getOperand(0) == N1.getOperand(0))
5268 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5269 if (N0.getOperand(1) == N1.getOperand(1))
5270 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5271 if (isCommutativeBinOp(N0.getOpcode())) {
5272 // If X op Y == Y op X, try other combinations.
5273 if (N0.getOperand(0) == N1.getOperand(1))
5274 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5275 Cond);
5276 if (N0.getOperand(1) == N1.getOperand(0))
5277 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5278 Cond);
5279 }
5280 }
5281
5282 // If RHS is a legal immediate value for a compare instruction, we need
5283 // to be careful about increasing register pressure needlessly.
5284 bool LegalRHSImm = false;
5285
5286 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5287 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5288 // Turn (X+C1) == C2 --> X == C2-C1
5289 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5290 return DAG.getSetCC(
5291 dl, VT, N0.getOperand(0),
5292 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5293 dl, N0.getValueType()),
5294 Cond);
5295
5296 // Turn (X^C1) == C2 --> X == C1^C2
5297 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5298 return DAG.getSetCC(
5299 dl, VT, N0.getOperand(0),
5300 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5301 dl, N0.getValueType()),
5302 Cond);
5303 }
5304
5305 // Turn (C1-X) == C2 --> X == C1-C2
5306 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5307 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5308 return DAG.getSetCC(
5309 dl, VT, N0.getOperand(1),
5310 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5311 dl, N0.getValueType()),
5312 Cond);
5313
5314 // Could RHSC fold directly into a compare?
5315 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5316 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5317 }
5318
5319 // (X+Y) == X --> Y == 0 and similar folds.
5320 // Don't do this if X is an immediate that can fold into a cmp
5321 // instruction and X+Y has other uses. It could be an induction variable
5322 // chain, and the transform would increase register pressure.
5323 if (!LegalRHSImm || N0.hasOneUse())
5324 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5325 return V;
5326 }
5327
5328 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5329 N1.getOpcode() == ISD::XOR)
5330 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5331 return V;
5332
5333 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5334 return V;
5335 }
5336
5337 // Fold remainder of division by a constant.
5338 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5339 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5340 // When division is cheap or optimizing for minimum size,
5341 // fall through to DIVREM creation by skipping this fold.
5342 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5343 if (N0.getOpcode() == ISD::UREM) {
5344 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5345 return Folded;
5346 } else if (N0.getOpcode() == ISD::SREM) {
5347 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5348 return Folded;
5349 }
5350 }
5351 }
5352
5353 // Fold away ALL boolean setcc's.
5354 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5355 SDValue Temp;
5356 switch (Cond) {
5357 default: llvm_unreachable("Unknown integer setcc!");
5358 case ISD::SETEQ: // X == Y -> ~(X^Y)
5359 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5360 N0 = DAG.getNOT(dl, Temp, OpVT);
5361 if (!DCI.isCalledByLegalizer())
5362 DCI.AddToWorklist(Temp.getNode());
5363 break;
5364 case ISD::SETNE: // X != Y --> (X^Y)
5365 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5366 break;
5367 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5368 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5369 Temp = DAG.getNOT(dl, N0, OpVT);
5370 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5371 if (!DCI.isCalledByLegalizer())
5372 DCI.AddToWorklist(Temp.getNode());
5373 break;
5374 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5375 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5376 Temp = DAG.getNOT(dl, N1, OpVT);
5377 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5378 if (!DCI.isCalledByLegalizer())
5379 DCI.AddToWorklist(Temp.getNode());
5380 break;
5381 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5382 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5383 Temp = DAG.getNOT(dl, N0, OpVT);
5384 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5385 if (!DCI.isCalledByLegalizer())
5386 DCI.AddToWorklist(Temp.getNode());
5387 break;
5388 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5389 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5390 Temp = DAG.getNOT(dl, N1, OpVT);
5391 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5392 break;
5393 }
5394 if (VT.getScalarType() != MVT::i1) {
5395 if (!DCI.isCalledByLegalizer())
5396 DCI.AddToWorklist(N0.getNode());
5397 // FIXME: If running after legalize, we probably can't do this.
5399 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5400 }
5401 return N0;
5402 }
5403
5404 // Could not fold it.
5405 return SDValue();
5406}
5407
5408/// Returns true (and the GlobalValue and the offset) if the node is a
5409/// GlobalAddress + offset.
5411 int64_t &Offset) const {
5412
5413 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5414
5415 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5416 GA = GASD->getGlobal();
5417 Offset += GASD->getOffset();
5418 return true;
5419 }
5420
5421 if (N->getOpcode() == ISD::ADD) {
5422 SDValue N1 = N->getOperand(0);
5423 SDValue N2 = N->getOperand(1);
5424 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5425 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5426 Offset += V->getSExtValue();
5427 return true;
5428 }
5429 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5430 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5431 Offset += V->getSExtValue();
5432 return true;
5433 }
5434 }
5435 }
5436
5437 return false;
5438}
5439
5441 DAGCombinerInfo &DCI) const {
5442 // Default implementation: no optimization.
5443 return SDValue();
5444}
5445
5446//===----------------------------------------------------------------------===//
5447// Inline Assembler Implementation Methods
5448//===----------------------------------------------------------------------===//
5449
5452 unsigned S = Constraint.size();
5453
5454 if (S == 1) {
5455 switch (Constraint[0]) {
5456 default: break;
5457 case 'r':
5458 return C_RegisterClass;
5459 case 'm': // memory
5460 case 'o': // offsetable
5461 case 'V': // not offsetable
5462 return C_Memory;
5463 case 'p': // Address.
5464 return C_Address;
5465 case 'n': // Simple Integer
5466 case 'E': // Floating Point Constant
5467 case 'F': // Floating Point Constant
5468 return C_Immediate;
5469 case 'i': // Simple Integer or Relocatable Constant
5470 case 's': // Relocatable Constant
5471 case 'X': // Allow ANY value.
5472 case 'I': // Target registers.
5473 case 'J':
5474 case 'K':
5475 case 'L':
5476 case 'M':
5477 case 'N':
5478 case 'O':
5479 case 'P':
5480 case '<':
5481 case '>':
5482 return C_Other;
5483 }
5484 }
5485
5486 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5487 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5488 return C_Memory;
5489 return C_Register;
5490 }
5491 return C_Unknown;
5492}
5493
5494/// Try to replace an X constraint, which matches anything, with another that
5495/// has more specific requirements based on the type of the corresponding
5496/// operand.
5497const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5498 if (ConstraintVT.isInteger())
5499 return "r";
5500 if (ConstraintVT.isFloatingPoint())
5501 return "f"; // works for many targets
5502 return nullptr;
5503}
5504
5506 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5507 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5508 return SDValue();
5509}
5510
5511/// Lower the specified operand into the Ops vector.
5512/// If it is invalid, don't add anything to Ops.
5514 StringRef Constraint,
5515 std::vector<SDValue> &Ops,
5516 SelectionDAG &DAG) const {
5517
5518 if (Constraint.size() > 1)
5519 return;
5520
5521 char ConstraintLetter = Constraint[0];
5522 switch (ConstraintLetter) {
5523 default: break;
5524 case 'X': // Allows any operand
5525 case 'i': // Simple Integer or Relocatable Constant
5526 case 'n': // Simple Integer
5527 case 's': { // Relocatable Constant
5528
5530 uint64_t Offset = 0;
5531
5532 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5533 // etc., since getelementpointer is variadic. We can't use
5534 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5535 // while in this case the GA may be furthest from the root node which is
5536 // likely an ISD::ADD.
5537 while (true) {
5538 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5539 // gcc prints these as sign extended. Sign extend value to 64 bits
5540 // now; without this it would get ZExt'd later in
5541 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5542 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5543 BooleanContent BCont = getBooleanContents(MVT::i64);
5544 ISD::NodeType ExtOpc =
5545 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5546 int64_t ExtVal =
5547 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5548 Ops.push_back(
5549 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5550 return;
5551 }
5552 if (ConstraintLetter != 'n') {
5553 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5554 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5555 GA->getValueType(0),
5556 Offset + GA->getOffset()));
5557 return;
5558 }
5559 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5560 Ops.push_back(DAG.getTargetBlockAddress(
5561 BA->getBlockAddress(), BA->getValueType(0),
5562 Offset + BA->getOffset(), BA->getTargetFlags()));
5563 return;
5564 }
5565 if (isa<BasicBlockSDNode>(Op)) {
5566 Ops.push_back(Op);
5567 return;
5568 }
5569 }
5570 const unsigned OpCode = Op.getOpcode();
5571 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5572 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5573 Op = Op.getOperand(1);
5574 // Subtraction is not commutative.
5575 else if (OpCode == ISD::ADD &&
5576 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5577 Op = Op.getOperand(0);
5578 else
5579 return;
5580 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5581 continue;
5582 }
5583 return;
5584 }
5585 break;
5586 }
5587 }
5588}
5589
5591 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5592}
5593
5594std::pair<unsigned, const TargetRegisterClass *>
5596 StringRef Constraint,
5597 MVT VT) const {
5598 if (!Constraint.starts_with("{"))
5599 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5600 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5601
5602 // Remove the braces from around the name.
5603 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5604
5605 std::pair<unsigned, const TargetRegisterClass *> R =
5606 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5607
5608 // Figure out which register class contains this reg.
5609 for (const TargetRegisterClass *RC : RI->regclasses()) {
5610 // If none of the value types for this register class are valid, we
5611 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5612 if (!isLegalRC(*RI, *RC))
5613 continue;
5614
5615 for (const MCPhysReg &PR : *RC) {
5616 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5617 std::pair<unsigned, const TargetRegisterClass *> S =
5618 std::make_pair(PR, RC);
5619
5620 // If this register class has the requested value type, return it,
5621 // otherwise keep searching and return the first class found
5622 // if no other is found which explicitly has the requested type.
5623 if (RI->isTypeLegalForClass(*RC, VT))
5624 return S;
5625 if (!R.second)
5626 R = S;
5627 }
5628 }
5629 }
5630
5631 return R;
5632}
5633
5634//===----------------------------------------------------------------------===//
5635// Constraint Selection.
5636
5637/// Return true of this is an input operand that is a matching constraint like
5638/// "4".
5640 assert(!ConstraintCode.empty() && "No known constraint!");
5641 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5642}
5643
5644/// If this is an input matching constraint, this method returns the output
5645/// operand it matches.
5647 assert(!ConstraintCode.empty() && "No known constraint!");
5648 return atoi(ConstraintCode.c_str());
5649}
5650
5651/// Split up the constraint string from the inline assembly value into the
5652/// specific constraints and their prefixes, and also tie in the associated
5653/// operand values.
5654/// If this returns an empty vector, and if the constraint string itself
5655/// isn't empty, there was an error parsing.
5658 const TargetRegisterInfo *TRI,
5659 const CallBase &Call) const {
5660 /// Information about all of the constraints.
5661 AsmOperandInfoVector ConstraintOperands;
5662 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5663 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5664
5665 // Do a prepass over the constraints, canonicalizing them, and building up the
5666 // ConstraintOperands list.
5667 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5668 unsigned ResNo = 0; // ResNo - The result number of the next output.
5669 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5670
5671 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5672 ConstraintOperands.emplace_back(std::move(CI));
5673 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5674
5675 // Update multiple alternative constraint count.
5676 if (OpInfo.multipleAlternatives.size() > maCount)
5677 maCount = OpInfo.multipleAlternatives.size();
5678
5679 OpInfo.ConstraintVT = MVT::Other;
5680
5681 // Compute the value type for each operand.
5682 switch (OpInfo.Type) {
5684 // Indirect outputs just consume an argument.
5685 if (OpInfo.isIndirect) {
5686 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5687 break;
5688 }
5689
5690 // The return value of the call is this value. As such, there is no
5691 // corresponding argument.
5692 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5693 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5694 OpInfo.ConstraintVT =
5695 getSimpleValueType(DL, STy->getElementType(ResNo));
5696 } else {
5697 assert(ResNo == 0 && "Asm only has one result!");
5698 OpInfo.ConstraintVT =
5699 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5700 }
5701 ++ResNo;
5702 break;
5703 case InlineAsm::isInput:
5704 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5705 break;
5706 case InlineAsm::isLabel:
5707 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5708 ++LabelNo;
5709 continue;
5711 // Nothing to do.
5712 break;
5713 }
5714
5715 if (OpInfo.CallOperandVal) {
5716 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5717 if (OpInfo.isIndirect) {
5718 OpTy = Call.getParamElementType(ArgNo);
5719 assert(OpTy && "Indirect operand must have elementtype attribute");
5720 }
5721
5722 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5723 if (StructType *STy = dyn_cast<StructType>(OpTy))
5724 if (STy->getNumElements() == 1)
5725 OpTy = STy->getElementType(0);
5726
5727 // If OpTy is not a single value, it may be a struct/union that we
5728 // can tile with integers.
5729 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5730 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5731 switch (BitSize) {
5732 default: break;
5733 case 1:
5734 case 8:
5735 case 16:
5736 case 32:
5737 case 64:
5738 case 128:
5739 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5740 break;
5741 }
5742 }
5743
5744 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5745 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5746 ArgNo++;
5747 }
5748 }
5749
5750 // If we have multiple alternative constraints, select the best alternative.
5751 if (!ConstraintOperands.empty()) {
5752 if (maCount) {
5753 unsigned bestMAIndex = 0;
5754 int bestWeight = -1;
5755 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5756 int weight = -1;
5757 unsigned maIndex;
5758 // Compute the sums of the weights for each alternative, keeping track
5759 // of the best (highest weight) one so far.
5760 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5761 int weightSum = 0;
5762 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5763 cIndex != eIndex; ++cIndex) {
5764 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5765 if (OpInfo.Type == InlineAsm::isClobber)
5766 continue;
5767
5768 // If this is an output operand with a matching input operand,
5769 // look up the matching input. If their types mismatch, e.g. one
5770 // is an integer, the other is floating point, or their sizes are
5771 // different, flag it as an maCantMatch.
5772 if (OpInfo.hasMatchingInput()) {
5773 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5774 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5775 if ((OpInfo.ConstraintVT.isInteger() !=
5776 Input.ConstraintVT.isInteger()) ||
5777 (OpInfo.ConstraintVT.getSizeInBits() !=
5778 Input.ConstraintVT.getSizeInBits())) {
5779 weightSum = -1; // Can't match.
5780 break;
5781 }
5782 }
5783 }
5784 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5785 if (weight == -1) {
5786 weightSum = -1;
5787 break;
5788 }
5789 weightSum += weight;
5790 }
5791 // Update best.
5792 if (weightSum > bestWeight) {
5793 bestWeight = weightSum;
5794 bestMAIndex = maIndex;
5795 }
5796 }
5797
5798 // Now select chosen alternative in each constraint.
5799 for (AsmOperandInfo &cInfo : ConstraintOperands)
5800 if (cInfo.Type != InlineAsm::isClobber)
5801 cInfo.selectAlternative(bestMAIndex);
5802 }
5803 }
5804
5805 // Check and hook up tied operands, choose constraint code to use.
5806 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5807 cIndex != eIndex; ++cIndex) {
5808 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5809
5810 // If this is an output operand with a matching input operand, look up the
5811 // matching input. If their types mismatch, e.g. one is an integer, the
5812 // other is floating point, or their sizes are different, flag it as an
5813 // error.
5814 if (OpInfo.hasMatchingInput()) {
5815 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5816
5817 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5818 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5819 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5820 OpInfo.ConstraintVT);
5821 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5822 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5823 Input.ConstraintVT);
5824 if ((OpInfo.ConstraintVT.isInteger() !=
5825 Input.ConstraintVT.isInteger()) ||
5826 (MatchRC.second != InputRC.second)) {
5827 report_fatal_error("Unsupported asm: input constraint"
5828 " with a matching output constraint of"
5829 " incompatible type!");
5830 }
5831 }
5832 }
5833 }
5834
5835 return ConstraintOperands;
5836}
5837
5838/// Return a number indicating our preference for chosing a type of constraint
5839/// over another, for the purpose of sorting them. Immediates are almost always
5840/// preferrable (when they can be emitted). A higher return value means a
5841/// stronger preference for one constraint type relative to another.
5842/// FIXME: We should prefer registers over memory but doing so may lead to
5843/// unrecoverable register exhaustion later.
5844/// https://github.com/llvm/llvm-project/issues/20571
5846 switch (CT) {
5849 return 4;
5852 return 3;
5854 return 2;
5856 return 1;
5858 return 0;
5859 }
5860 llvm_unreachable("Invalid constraint type");
5861}
5862
5863/// Examine constraint type and operand type and determine a weight value.
5864/// This object must already have been set up with the operand type
5865/// and the current alternative constraint selected.
5868 AsmOperandInfo &info, int maIndex) const {
5870 if (maIndex >= (int)info.multipleAlternatives.size())
5871 rCodes = &info.Codes;
5872 else
5873 rCodes = &info.multipleAlternatives[maIndex].Codes;
5874 ConstraintWeight BestWeight = CW_Invalid;
5875
5876 // Loop over the options, keeping track of the most general one.
5877 for (const std::string &rCode : *rCodes) {
5878 ConstraintWeight weight =
5879 getSingleConstraintMatchWeight(info, rCode.c_str());
5880 if (weight > BestWeight)
5881 BestWeight = weight;
5882 }
5883
5884 return BestWeight;
5885}
5886
5887/// Examine constraint type and operand type and determine a weight value.
5888/// This object must already have been set up with the operand type
5889/// and the current alternative constraint selected.
5892 AsmOperandInfo &info, const char *constraint) const {
5893 ConstraintWeight weight = CW_Invalid;
5894 Value *CallOperandVal = info.CallOperandVal;
5895 // If we don't have a value, we can't do a match,
5896 // but allow it at the lowest weight.
5897 if (!CallOperandVal)
5898 return CW_Default;
5899 // Look at the constraint type.
5900 switch (*constraint) {
5901 case 'i': // immediate integer.
5902 case 'n': // immediate integer with a known value.
5903 if (isa<ConstantInt>(CallOperandVal))
5904 weight = CW_Constant;
5905 break;
5906 case 's': // non-explicit intregal immediate.
5907 if (isa<GlobalValue>(CallOperandVal))
5908 weight = CW_Constant;
5909 break;
5910 case 'E': // immediate float if host format.
5911 case 'F': // immediate float.
5912 if (isa<ConstantFP>(CallOperandVal))
5913 weight = CW_Constant;
5914 break;
5915 case '<': // memory operand with autodecrement.
5916 case '>': // memory operand with autoincrement.
5917 case 'm': // memory operand.
5918 case 'o': // offsettable memory operand
5919 case 'V': // non-offsettable memory operand
5920 weight = CW_Memory;
5921 break;
5922 case 'r': // general register.
5923 case 'g': // general register, memory operand or immediate integer.
5924 // note: Clang converts "g" to "imr".
5925 if (CallOperandVal->getType()->isIntegerTy())
5926 weight = CW_Register;
5927 break;
5928 case 'X': // any operand.
5929 default:
5930 weight = CW_Default;
5931 break;
5932 }
5933 return weight;
5934}
5935
5936/// If there are multiple different constraints that we could pick for this
5937/// operand (e.g. "imr") try to pick the 'best' one.
5938/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5939/// into seven classes:
5940/// Register -> one specific register
5941/// RegisterClass -> a group of regs
5942/// Memory -> memory
5943/// Address -> a symbolic memory reference
5944/// Immediate -> immediate values
5945/// Other -> magic values (such as "Flag Output Operands")
5946/// Unknown -> something we don't recognize yet and can't handle
5947/// Ideally, we would pick the most specific constraint possible: if we have
5948/// something that fits into a register, we would pick it. The problem here
5949/// is that if we have something that could either be in a register or in
5950/// memory that use of the register could cause selection of *other*
5951/// operands to fail: they might only succeed if we pick memory. Because of
5952/// this the heuristic we use is:
5953///
5954/// 1) If there is an 'other' constraint, and if the operand is valid for
5955/// that constraint, use it. This makes us take advantage of 'i'
5956/// constraints when available.
5957/// 2) Otherwise, pick the most general constraint present. This prefers
5958/// 'm' over 'r', for example.
5959///
5961 TargetLowering::AsmOperandInfo &OpInfo) const {
5962 ConstraintGroup Ret;
5963
5964 Ret.reserve(OpInfo.Codes.size());
5965 for (StringRef Code : OpInfo.Codes) {
5966 TargetLowering::ConstraintType CType = getConstraintType(Code);
5967
5968 // Indirect 'other' or 'immediate' constraints are not allowed.
5969 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5970 CType == TargetLowering::C_Register ||
5972 continue;
5973
5974 // Things with matching constraints can only be registers, per gcc
5975 // documentation. This mainly affects "g" constraints.
5976 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5977 continue;
5978
5979 Ret.emplace_back(Code, CType);
5980 }
5981
5982 std::stable_sort(
5983 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5984 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5985 });
5986
5987 return Ret;
5988}
5989
5990/// If we have an immediate, see if we can lower it. Return true if we can,
5991/// false otherwise.
5993 SDValue Op, SelectionDAG *DAG,
5994 const TargetLowering &TLI) {
5995
5996 assert((P.second == TargetLowering::C_Other ||
5997 P.second == TargetLowering::C_Immediate) &&
5998 "need immediate or other");
5999
6000 if (!Op.getNode())
6001 return false;
6002
6003 std::vector<SDValue> ResultOps;
6004 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6005 return !ResultOps.empty();
6006}
6007
6008/// Determines the constraint code and constraint type to use for the specific
6009/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6011 SDValue Op,
6012 SelectionDAG *DAG) const {
6013 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6014
6015 // Single-letter constraints ('r') are very common.
6016 if (OpInfo.Codes.size() == 1) {
6017 OpInfo.ConstraintCode = OpInfo.Codes[0];
6018 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6019 } else {
6020 ConstraintGroup G = getConstraintPreferences(OpInfo);
6021 if (G.empty())
6022 return;
6023
6024 unsigned BestIdx = 0;
6025 for (const unsigned E = G.size();
6026 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6027 G[BestIdx].second == TargetLowering::C_Immediate);
6028 ++BestIdx) {
6029 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6030 break;
6031 // If we're out of constraints, just pick the first one.
6032 if (BestIdx + 1 == E) {
6033 BestIdx = 0;
6034 break;
6035 }
6036 }
6037
6038 OpInfo.ConstraintCode = G[BestIdx].first;
6039 OpInfo.ConstraintType = G[BestIdx].second;
6040 }
6041
6042 // 'X' matches anything.
6043 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6044 // Constants are handled elsewhere. For Functions, the type here is the
6045 // type of the result, which is not what we want to look at; leave them
6046 // alone.
6047 Value *v = OpInfo.CallOperandVal;
6048 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6049 return;
6050 }
6051
6052 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6053 OpInfo.ConstraintCode = "i";
6054 return;
6055 }
6056
6057 // Otherwise, try to resolve it to something we know about by looking at
6058 // the actual operand type.
6059 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6060 OpInfo.ConstraintCode = Repl;
6061 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6062 }
6063 }
6064}
6065
6066/// Given an exact SDIV by a constant, create a multiplication
6067/// with the multiplicative inverse of the constant.
6069 const SDLoc &dl, SelectionDAG &DAG,
6070 SmallVectorImpl<SDNode *> &Created) {
6071 SDValue Op0 = N->getOperand(0);
6072 SDValue Op1 = N->getOperand(1);
6073 EVT VT = N->getValueType(0);
6074 EVT SVT = VT.getScalarType();
6075 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6076 EVT ShSVT = ShVT.getScalarType();
6077
6078 bool UseSRA = false;
6079 SmallVector<SDValue, 16> Shifts, Factors;
6080
6081 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6082 if (C->isZero())
6083 return false;
6084 APInt Divisor = C->getAPIntValue();
6085 unsigned Shift = Divisor.countr_zero();
6086 if (Shift) {
6087 Divisor.ashrInPlace(Shift);
6088 UseSRA = true;
6089 }
6090 APInt Factor = Divisor.multiplicativeInverse();
6091 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6092 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6093 return true;
6094 };
6095
6096 // Collect all magic values from the build vector.
6097 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6098 return SDValue();
6099
6100 SDValue Shift, Factor;
6101 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6102 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6103 Factor = DAG.getBuildVector(VT, dl, Factors);
6104 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6105 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6106 "Expected matchUnaryPredicate to return one element for scalable "
6107 "vectors");
6108 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6109 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6110 } else {
6111 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6112 Shift = Shifts[0];
6113 Factor = Factors[0];
6114 }
6115
6116 SDValue Res = Op0;
6117
6118 // Shift the value upfront if it is even, so the LSB is one.
6119 if (UseSRA) {
6120 // TODO: For UDIV use SRL instead of SRA.
6121 SDNodeFlags Flags;
6122 Flags.setExact(true);
6123 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6124 Created.push_back(Res.getNode());
6125 }
6126
6127 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6128}
6129
6131 SelectionDAG &DAG,
6132 SmallVectorImpl<SDNode *> &Created) const {
6134 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6135 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6136 return SDValue(N, 0); // Lower SDIV as SDIV
6137 return SDValue();
6138}
6139
6140SDValue
6142 SelectionDAG &DAG,
6143 SmallVectorImpl<SDNode *> &Created) const {
6145 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6146 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6147 return SDValue(N, 0); // Lower SREM as SREM
6148 return SDValue();
6149}
6150
6151/// Build sdiv by power-of-2 with conditional move instructions
6152/// Ref: "Hacker's Delight" by Henry Warren 10-1
6153/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6154/// bgez x, label
6155/// add x, x, 2**k-1
6156/// label:
6157/// sra res, x, k
6158/// neg res, res (when the divisor is negative)
6160 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6161 SmallVectorImpl<SDNode *> &Created) const {
6162 unsigned Lg2 = Divisor.countr_zero();
6163 EVT VT = N->getValueType(0);
6164
6165 SDLoc DL(N);
6166 SDValue N0 = N->getOperand(0);
6167 SDValue Zero = DAG.getConstant(0, DL, VT);
6168 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6169 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6170
6171 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6172 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6173 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6174 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6175 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6176
6177 Created.push_back(Cmp.getNode());
6178 Created.push_back(Add.getNode());
6179 Created.push_back(CMov.getNode());
6180
6181 // Divide by pow2.
6182 SDValue SRA =
6183 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6184
6185 // If we're dividing by a positive value, we're done. Otherwise, we must
6186 // negate the result.
6187 if (Divisor.isNonNegative())
6188 return SRA;
6189
6190 Created.push_back(SRA.getNode());
6191 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6192}
6193
6194/// Given an ISD::SDIV node expressing a divide by constant,
6195/// return a DAG expression to select that will generate the same value by
6196/// multiplying by a magic number.
6197/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6199 bool IsAfterLegalization,
6200 SmallVectorImpl<SDNode *> &Created) const {
6201 SDLoc dl(N);
6202 EVT VT = N->getValueType(0);
6203 EVT SVT = VT.getScalarType();
6204 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6205 EVT ShSVT = ShVT.getScalarType();
6206 unsigned EltBits = VT.getScalarSizeInBits();
6207 EVT MulVT;
6208
6209 // Check to see if we can do this.
6210 // FIXME: We should be more aggressive here.
6211 if (!isTypeLegal(VT)) {
6212 // Limit this to simple scalars for now.
6213 if (VT.isVector() || !VT.isSimple())
6214 return SDValue();
6215
6216 // If this type will be promoted to a large enough type with a legal
6217 // multiply operation, we can go ahead and do this transform.
6219 return SDValue();
6220
6221 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6222 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6223 !isOperationLegal(ISD::MUL, MulVT))
6224 return SDValue();
6225 }
6226
6227 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6228 if (N->getFlags().hasExact())
6229 return BuildExactSDIV(*this, N, dl, DAG, Created);
6230
6231 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6232
6233 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6234 if (C->isZero())
6235 return false;
6236
6237 const APInt &Divisor = C->getAPIntValue();
6239 int NumeratorFactor = 0;
6240 int ShiftMask = -1;
6241
6242 if (Divisor.isOne() || Divisor.isAllOnes()) {
6243 // If d is +1/-1, we just multiply the numerator by +1/-1.
6244 NumeratorFactor = Divisor.getSExtValue();
6245 magics.Magic = 0;
6246 magics.ShiftAmount = 0;
6247 ShiftMask = 0;
6248 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6249 // If d > 0 and m < 0, add the numerator.
6250 NumeratorFactor = 1;
6251 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6252 // If d < 0 and m > 0, subtract the numerator.
6253 NumeratorFactor = -1;
6254 }
6255
6256 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6257 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6258 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6259 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6260 return true;
6261 };
6262
6263 SDValue N0 = N->getOperand(0);
6264 SDValue N1 = N->getOperand(1);
6265
6266 // Collect the shifts / magic values from each element.
6267 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6268 return SDValue();
6269
6270 SDValue MagicFactor, Factor, Shift, ShiftMask;
6271 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6272 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6273 Factor = DAG.getBuildVector(VT, dl, Factors);
6274 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6275 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6276 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6277 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6278 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6279 "Expected matchUnaryPredicate to return one element for scalable "
6280 "vectors");
6281 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6282 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6283 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6284 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6285 } else {
6286 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6287 MagicFactor = MagicFactors[0];
6288 Factor = Factors[0];
6289 Shift = Shifts[0];
6290 ShiftMask = ShiftMasks[0];
6291 }
6292
6293 // Multiply the numerator (operand 0) by the magic value.
6294 // FIXME: We should support doing a MUL in a wider type.
6295 auto GetMULHS = [&](SDValue X, SDValue Y) {
6296 // If the type isn't legal, use a wider mul of the type calculated
6297 // earlier.
6298 if (!isTypeLegal(VT)) {
6299 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6300 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6301 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6302 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6303 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6304 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6305 }
6306
6307 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6308 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6309 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6310 SDValue LoHi =
6311 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6312 return SDValue(LoHi.getNode(), 1);
6313 }
6314 // If type twice as wide legal, widen and use a mul plus a shift.
6315 unsigned Size = VT.getScalarSizeInBits();
6316 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6317 if (VT.isVector())
6318 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6320 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6321 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6322 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6323 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6324 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6325 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6326 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6327 }
6328 return SDValue();
6329 };
6330
6331 SDValue Q = GetMULHS(N0, MagicFactor);
6332 if (!Q)
6333 return SDValue();
6334
6335 Created.push_back(Q.getNode());
6336
6337 // (Optionally) Add/subtract the numerator using Factor.
6338 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6339 Created.push_back(Factor.getNode());
6340 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6341 Created.push_back(Q.getNode());
6342
6343 // Shift right algebraic by shift value.
6344 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6345 Created.push_back(Q.getNode());
6346
6347 // Extract the sign bit, mask it and add it to the quotient.
6348 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6349 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6350 Created.push_back(T.getNode());
6351 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6352 Created.push_back(T.getNode());
6353 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6354}
6355
6356/// Given an ISD::UDIV node expressing a divide by constant,
6357/// return a DAG expression to select that will generate the same value by
6358/// multiplying by a magic number.
6359/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6361 bool IsAfterLegalization,
6362 SmallVectorImpl<SDNode *> &Created) const {
6363 SDLoc dl(N);
6364 EVT VT = N->getValueType(0);
6365 EVT SVT = VT.getScalarType();
6366 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6367 EVT ShSVT = ShVT.getScalarType();
6368 unsigned EltBits = VT.getScalarSizeInBits();
6369 EVT MulVT;
6370
6371 // Check to see if we can do this.
6372 // FIXME: We should be more aggressive here.
6373 if (!isTypeLegal(VT)) {
6374 // Limit this to simple scalars for now.
6375 if (VT.isVector() || !VT.isSimple())
6376 return SDValue();
6377
6378 // If this type will be promoted to a large enough type with a legal
6379 // multiply operation, we can go ahead and do this transform.
6381 return SDValue();
6382
6383 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6384 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6385 !isOperationLegal(ISD::MUL, MulVT))
6386 return SDValue();
6387 }
6388
6389 SDValue N0 = N->getOperand(0);
6390 SDValue N1 = N->getOperand(1);
6391
6392 // Try to use leading zeros of the dividend to reduce the multiplier and
6393 // avoid expensive fixups.
6394 // TODO: Support vectors.
6395 unsigned LeadingZeros = 0;
6396 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6397 assert(!isOneConstant(N1) && "Unexpected divisor");
6398 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6399 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6400 // the dividend exceeds the leading zeros for the divisor.
6401 LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6402 }
6403
6404 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6405 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6406
6407 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6408 if (C->isZero())
6409 return false;
6410 const APInt& Divisor = C->getAPIntValue();
6411
6412 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6413
6414 // Magic algorithm doesn't work for division by 1. We need to emit a select
6415 // at the end.
6416 if (Divisor.isOne()) {
6417 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6418 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6419 } else {
6421 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6422
6423 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6424
6425 assert(magics.PreShift < Divisor.getBitWidth() &&
6426 "We shouldn't generate an undefined shift!");
6427 assert(magics.PostShift < Divisor.getBitWidth() &&
6428 "We shouldn't generate an undefined shift!");
6429 assert((!magics.IsAdd || magics.PreShift == 0) &&
6430 "Unexpected pre-shift");
6431 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6432 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6433 NPQFactor = DAG.getConstant(
6434 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6435 : APInt::getZero(EltBits),
6436 dl, SVT);
6437 UseNPQ |= magics.IsAdd;
6438 UsePreShift |= magics.PreShift != 0;
6439 UsePostShift |= magics.PostShift != 0;
6440 }
6441
6442 PreShifts.push_back(PreShift);
6443 MagicFactors.push_back(MagicFactor);
6444 NPQFactors.push_back(NPQFactor);
6445 PostShifts.push_back(PostShift);
6446 return true;
6447 };
6448
6449 // Collect the shifts/magic values from each element.
6450 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6451 return SDValue();
6452
6453 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6454 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6455 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6456 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6457 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6458 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6459 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6460 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6461 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6462 "Expected matchUnaryPredicate to return one for scalable vectors");
6463 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6464 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6465 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6466 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6467 } else {
6468 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6469 PreShift = PreShifts[0];
6470 MagicFactor = MagicFactors[0];
6471 PostShift = PostShifts[0];
6472 }
6473
6474 SDValue Q = N0;
6475 if (UsePreShift) {
6476 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6477 Created.push_back(Q.getNode());
6478 }
6479
6480 // FIXME: We should support doing a MUL in a wider type.
6481 auto GetMULHU = [&](SDValue X, SDValue Y) {
6482 // If the type isn't legal, use a wider mul of the type calculated
6483 // earlier.
6484 if (!isTypeLegal(VT)) {
6485 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6486 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6487 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6488 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6489 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6490 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6491 }
6492
6493 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6494 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6495 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6496 SDValue LoHi =
6497 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6498 return SDValue(LoHi.getNode(), 1);
6499 }
6500 // If type twice as wide legal, widen and use a mul plus a shift.
6501 unsigned Size = VT.getScalarSizeInBits();
6502 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6503 if (VT.isVector())
6504 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6506 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6507 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6508 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6509 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6510 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6511 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6512 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6513 }
6514 return SDValue(); // No mulhu or equivalent
6515 };
6516
6517 // Multiply the numerator (operand 0) by the magic value.
6518 Q = GetMULHU(Q, MagicFactor);
6519 if (!Q)
6520 return SDValue();
6521
6522 Created.push_back(Q.getNode());
6523
6524 if (UseNPQ) {
6525 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6526 Created.push_back(NPQ.getNode());
6527
6528 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6529 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6530 if (VT.isVector())
6531 NPQ = GetMULHU(NPQ, NPQFactor);
6532 else
6533 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6534
6535 Created.push_back(NPQ.getNode());
6536
6537 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6538 Created.push_back(Q.getNode());
6539 }
6540
6541 if (UsePostShift) {
6542 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6543 Created.push_back(Q.getNode());
6544 }
6545
6546 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6547
6548 SDValue One = DAG.getConstant(1, dl, VT);
6549 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6550 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6551}
6552
6553/// If all values in Values that *don't* match the predicate are same 'splat'
6554/// value, then replace all values with that splat value.
6555/// Else, if AlternativeReplacement was provided, then replace all values that
6556/// do match predicate with AlternativeReplacement value.
6557static void
6559 std::function<bool(SDValue)> Predicate,
6560 SDValue AlternativeReplacement = SDValue()) {
6561 SDValue Replacement;
6562 // Is there a value for which the Predicate does *NOT* match? What is it?
6563 auto SplatValue = llvm::find_if_not(Values, Predicate);
6564 if (SplatValue != Values.end()) {
6565 // Does Values consist only of SplatValue's and values matching Predicate?
6566 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6567 return Value == *SplatValue || Predicate(Value);
6568 })) // Then we shall replace values matching predicate with SplatValue.
6569 Replacement = *SplatValue;
6570 }
6571 if (!Replacement) {
6572 // Oops, we did not find the "baseline" splat value.
6573 if (!AlternativeReplacement)
6574 return; // Nothing to do.
6575 // Let's replace with provided value then.
6576 Replacement = AlternativeReplacement;
6577 }
6578 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6579}
6580
6581/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6582/// where the divisor is constant and the comparison target is zero,
6583/// return a DAG expression that will generate the same comparison result
6584/// using only multiplications, additions and shifts/rotations.
6585/// Ref: "Hacker's Delight" 10-17.
6586SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6587 SDValue CompTargetNode,
6589 DAGCombinerInfo &DCI,
6590 const SDLoc &DL) const {
6592 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6593 DCI, DL, Built)) {
6594 for (SDNode *N : Built)
6595 DCI.AddToWorklist(N);
6596 return Folded;
6597 }
6598
6599 return SDValue();
6600}
6601
6602SDValue
6603TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6604 SDValue CompTargetNode, ISD::CondCode Cond,
6605 DAGCombinerInfo &DCI, const SDLoc &DL,
6606 SmallVectorImpl<SDNode *> &Created) const {
6607 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6608 // - D must be constant, with D = D0 * 2^K where D0 is odd
6609 // - P is the multiplicative inverse of D0 modulo 2^W
6610 // - Q = floor(((2^W) - 1) / D)
6611 // where W is the width of the common type of N and D.
6612 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6613 "Only applicable for (in)equality comparisons.");
6614
6615 SelectionDAG &DAG = DCI.DAG;
6616
6617 EVT VT = REMNode.getValueType();
6618 EVT SVT = VT.getScalarType();
6619 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6620 EVT ShSVT = ShVT.getScalarType();
6621
6622 // If MUL is unavailable, we cannot proceed in any case.
6623 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6624 return SDValue();
6625
6626 bool ComparingWithAllZeros = true;
6627 bool AllComparisonsWithNonZerosAreTautological = true;
6628 bool HadTautologicalLanes = false;
6629 bool AllLanesAreTautological = true;
6630 bool HadEvenDivisor = false;
6631 bool AllDivisorsArePowerOfTwo = true;
6632 bool HadTautologicalInvertedLanes = false;
6633 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6634
6635 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6636 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6637 if (CDiv->isZero())
6638 return false;
6639
6640 const APInt &D = CDiv->getAPIntValue();
6641 const APInt &Cmp = CCmp->getAPIntValue();
6642
6643 ComparingWithAllZeros &= Cmp.isZero();
6644
6645 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6646 // if C2 is not less than C1, the comparison is always false.
6647 // But we will only be able to produce the comparison that will give the
6648 // opposive tautological answer. So this lane would need to be fixed up.
6649 bool TautologicalInvertedLane = D.ule(Cmp);
6650 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6651
6652 // If all lanes are tautological (either all divisors are ones, or divisor
6653 // is not greater than the constant we are comparing with),
6654 // we will prefer to avoid the fold.
6655 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6656 HadTautologicalLanes |= TautologicalLane;
6657 AllLanesAreTautological &= TautologicalLane;
6658
6659 // If we are comparing with non-zero, we need'll need to subtract said
6660 // comparison value from the LHS. But there is no point in doing that if
6661 // every lane where we are comparing with non-zero is tautological..
6662 if (!Cmp.isZero())
6663 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6664
6665 // Decompose D into D0 * 2^K
6666 unsigned K = D.countr_zero();
6667 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6668 APInt D0 = D.lshr(K);
6669
6670 // D is even if it has trailing zeros.
6671 HadEvenDivisor |= (K != 0);
6672 // D is a power-of-two if D0 is one.
6673 // If all divisors are power-of-two, we will prefer to avoid the fold.
6674 AllDivisorsArePowerOfTwo &= D0.isOne();
6675
6676 // P = inv(D0, 2^W)
6677 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6678 unsigned W = D.getBitWidth();
6680 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6681
6682 // Q = floor((2^W - 1) u/ D)
6683 // R = ((2^W - 1) u% D)
6684 APInt Q, R;
6686
6687 // If we are comparing with zero, then that comparison constant is okay,
6688 // else it may need to be one less than that.
6689 if (Cmp.ugt(R))
6690 Q -= 1;
6691
6693 "We are expecting that K is always less than all-ones for ShSVT");
6694
6695 // If the lane is tautological the result can be constant-folded.
6696 if (TautologicalLane) {
6697 // Set P and K amount to a bogus values so we can try to splat them.
6698 P = 0;
6699 K = -1;
6700 // And ensure that comparison constant is tautological,
6701 // it will always compare true/false.
6702 Q = -1;
6703 }
6704
6705 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6706 KAmts.push_back(
6707 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6708 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6709 return true;
6710 };
6711
6712 SDValue N = REMNode.getOperand(0);
6713 SDValue D = REMNode.getOperand(1);
6714
6715 // Collect the values from each element.
6716 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6717 return SDValue();
6718
6719 // If all lanes are tautological, the result can be constant-folded.
6720 if (AllLanesAreTautological)
6721 return SDValue();
6722
6723 // If this is a urem by a powers-of-two, avoid the fold since it can be
6724 // best implemented as a bit test.
6725 if (AllDivisorsArePowerOfTwo)
6726 return SDValue();
6727
6728 SDValue PVal, KVal, QVal;
6729 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6730 if (HadTautologicalLanes) {
6731 // Try to turn PAmts into a splat, since we don't care about the values
6732 // that are currently '0'. If we can't, just keep '0'`s.
6734 // Try to turn KAmts into a splat, since we don't care about the values
6735 // that are currently '-1'. If we can't, change them to '0'`s.
6737 DAG.getConstant(0, DL, ShSVT));
6738 }
6739
6740 PVal = DAG.getBuildVector(VT, DL, PAmts);
6741 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6742 QVal = DAG.getBuildVector(VT, DL, QAmts);
6743 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6744 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6745 "Expected matchBinaryPredicate to return one element for "
6746 "SPLAT_VECTORs");
6747 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6748 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6749 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6750 } else {
6751 PVal = PAmts[0];
6752 KVal = KAmts[0];
6753 QVal = QAmts[0];
6754 }
6755
6756 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6757 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6758 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6759 assert(CompTargetNode.getValueType() == N.getValueType() &&
6760 "Expecting that the types on LHS and RHS of comparisons match.");
6761 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6762 }
6763
6764 // (mul N, P)
6765 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6766 Created.push_back(Op0.getNode());
6767
6768 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6769 // divisors as a performance improvement, since rotating by 0 is a no-op.
6770 if (HadEvenDivisor) {
6771 // We need ROTR to do this.
6772 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6773 return SDValue();
6774 // UREM: (rotr (mul N, P), K)
6775 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6776 Created.push_back(Op0.getNode());
6777 }
6778
6779 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6780 SDValue NewCC =
6781 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6783 if (!HadTautologicalInvertedLanes)
6784 return NewCC;
6785
6786 // If any lanes previously compared always-false, the NewCC will give
6787 // always-true result for them, so we need to fixup those lanes.
6788 // Or the other way around for inequality predicate.
6789 assert(VT.isVector() && "Can/should only get here for vectors.");
6790 Created.push_back(NewCC.getNode());
6791
6792 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6793 // if C2 is not less than C1, the comparison is always false.
6794 // But we have produced the comparison that will give the
6795 // opposive tautological answer. So these lanes would need to be fixed up.
6796 SDValue TautologicalInvertedChannels =
6797 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6798 Created.push_back(TautologicalInvertedChannels.getNode());
6799
6800 // NOTE: we avoid letting illegal types through even if we're before legalize
6801 // ops – legalization has a hard time producing good code for this.
6802 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6803 // If we have a vector select, let's replace the comparison results in the
6804 // affected lanes with the correct tautological result.
6805 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6806 DL, SETCCVT, SETCCVT);
6807 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6808 Replacement, NewCC);
6809 }
6810
6811 // Else, we can just invert the comparison result in the appropriate lanes.
6812 //
6813 // NOTE: see the note above VSELECT above.
6814 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6815 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6816 TautologicalInvertedChannels);
6817
6818 return SDValue(); // Don't know how to lower.
6819}
6820
6821/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6822/// where the divisor is constant and the comparison target is zero,
6823/// return a DAG expression that will generate the same comparison result
6824/// using only multiplications, additions and shifts/rotations.
6825/// Ref: "Hacker's Delight" 10-17.
6826SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6827 SDValue CompTargetNode,
6829 DAGCombinerInfo &DCI,
6830 const SDLoc &DL) const {
6832 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6833 DCI, DL, Built)) {
6834 assert(Built.size() <= 7 && "Max size prediction failed.");
6835 for (SDNode *N : Built)
6836 DCI.AddToWorklist(N);
6837 return Folded;
6838 }
6839
6840 return SDValue();
6841}
6842
6843SDValue
6844TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6845 SDValue CompTargetNode, ISD::CondCode Cond,
6846 DAGCombinerInfo &DCI, const SDLoc &DL,
6847 SmallVectorImpl<SDNode *> &Created) const {
6848 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6849 // Fold:
6850 // (seteq/ne (srem N, D), 0)
6851 // To:
6852 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6853 //
6854 // - D must be constant, with D = D0 * 2^K where D0 is odd
6855 // - P is the multiplicative inverse of D0 modulo 2^W
6856 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6857 // - Q = floor((2 * A) / (2^K))
6858 // where W is the width of the common type of N and D.
6859 //
6860 // When D is a power of two (and thus D0 is 1), the normal
6861 // formula for A and Q don't apply, because the derivation
6862 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6863 // does not apply. This specifically fails when N = INT_MIN.
6864 //
6865 // Instead, for power-of-two D, we use:
6866 // - A = 2^(W-1)
6867 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6868 // - Q = 2^(W-K) - 1
6869 // |-> Test that the top K bits are zero after rotation
6870 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6871 "Only applicable for (in)equality comparisons.");
6872
6873 SelectionDAG &DAG = DCI.DAG;
6874
6875 EVT VT = REMNode.getValueType();
6876 EVT SVT = VT.getScalarType();
6877 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6878 EVT ShSVT = ShVT.getScalarType();
6879
6880 // If we are after ops legalization, and MUL is unavailable, we can not
6881 // proceed.
6882 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6883 return SDValue();
6884
6885 // TODO: Could support comparing with non-zero too.
6886 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6887 if (!CompTarget || !CompTarget->isZero())
6888 return SDValue();
6889
6890 bool HadIntMinDivisor = false;
6891 bool HadOneDivisor = false;
6892 bool AllDivisorsAreOnes = true;
6893 bool HadEvenDivisor = false;
6894 bool NeedToApplyOffset = false;
6895 bool AllDivisorsArePowerOfTwo = true;
6896 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6897
6898 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6899 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6900 if (C->isZero())
6901 return false;
6902
6903 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6904
6905 // WARNING: this fold is only valid for positive divisors!
6906 APInt D = C->getAPIntValue();
6907 if (D.isNegative())
6908 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6909
6910 HadIntMinDivisor |= D.isMinSignedValue();
6911
6912 // If all divisors are ones, we will prefer to avoid the fold.
6913 HadOneDivisor |= D.isOne();
6914 AllDivisorsAreOnes &= D.isOne();
6915
6916 // Decompose D into D0 * 2^K
6917 unsigned K = D.countr_zero();
6918 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6919 APInt D0 = D.lshr(K);
6920
6921 if (!D.isMinSignedValue()) {
6922 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6923 // we don't care about this lane in this fold, we'll special-handle it.
6924 HadEvenDivisor |= (K != 0);
6925 }
6926
6927 // D is a power-of-two if D0 is one. This includes INT_MIN.
6928 // If all divisors are power-of-two, we will prefer to avoid the fold.
6929 AllDivisorsArePowerOfTwo &= D0.isOne();
6930
6931 // P = inv(D0, 2^W)
6932 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6933 unsigned W = D.getBitWidth();
6935 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6936
6937 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6939 A.clearLowBits(K);
6940
6941 if (!D.isMinSignedValue()) {
6942 // If divisor INT_MIN, then we don't care about this lane in this fold,
6943 // we'll special-handle it.
6944 NeedToApplyOffset |= A != 0;
6945 }
6946
6947 // Q = floor((2 * A) / (2^K))
6948 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6949
6951 "We are expecting that A is always less than all-ones for SVT");
6953 "We are expecting that K is always less than all-ones for ShSVT");
6954
6955 // If D was a power of two, apply the alternate constant derivation.
6956 if (D0.isOne()) {
6957 // A = 2^(W-1)
6959 // - Q = 2^(W-K) - 1
6960 Q = APInt::getAllOnes(W - K).zext(W);
6961 }
6962
6963 // If the divisor is 1 the result can be constant-folded. Likewise, we
6964 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6965 if (D.isOne()) {
6966 // Set P, A and K to a bogus values so we can try to splat them.
6967 P = 0;
6968 A = -1;
6969 K = -1;
6970
6971 // x ?% 1 == 0 <--> true <--> x u<= -1
6972 Q = -1;
6973 }
6974
6975 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6976 AAmts.push_back(DAG.getConstant(A, DL, SVT));
6977 KAmts.push_back(
6978 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6979 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6980 return true;
6981 };
6982
6983 SDValue N = REMNode.getOperand(0);
6984 SDValue D = REMNode.getOperand(1);
6985
6986 // Collect the values from each element.
6987 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6988 return SDValue();
6989
6990 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6991 if (AllDivisorsAreOnes)
6992 return SDValue();
6993
6994 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6995 // since it can be best implemented as a bit test.
6996 if (AllDivisorsArePowerOfTwo)
6997 return SDValue();
6998
6999 SDValue PVal, AVal, KVal, QVal;
7000 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7001 if (HadOneDivisor) {
7002 // Try to turn PAmts into a splat, since we don't care about the values
7003 // that are currently '0'. If we can't, just keep '0'`s.
7005 // Try to turn AAmts into a splat, since we don't care about the
7006 // values that are currently '-1'. If we can't, change them to '0'`s.
7008 DAG.getConstant(0, DL, SVT));
7009 // Try to turn KAmts into a splat, since we don't care about the values
7010 // that are currently '-1'. If we can't, change them to '0'`s.
7012 DAG.getConstant(0, DL, ShSVT));
7013 }
7014
7015 PVal = DAG.getBuildVector(VT, DL, PAmts);
7016 AVal = DAG.getBuildVector(VT, DL, AAmts);
7017 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7018 QVal = DAG.getBuildVector(VT, DL, QAmts);
7019 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7020 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7021 QAmts.size() == 1 &&
7022 "Expected matchUnaryPredicate to return one element for scalable "
7023 "vectors");
7024 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7025 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7026 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7027 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7028 } else {
7029 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7030 PVal = PAmts[0];
7031 AVal = AAmts[0];
7032 KVal = KAmts[0];
7033 QVal = QAmts[0];
7034 }
7035
7036 // (mul N, P)
7037 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7038 Created.push_back(Op0.getNode());
7039
7040 if (NeedToApplyOffset) {
7041 // We need ADD to do this.
7042 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7043 return SDValue();
7044
7045 // (add (mul N, P), A)
7046 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7047 Created.push_back(Op0.getNode());
7048 }
7049
7050 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7051 // divisors as a performance improvement, since rotating by 0 is a no-op.
7052 if (HadEvenDivisor) {
7053 // We need ROTR to do this.
7054 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7055 return SDValue();
7056 // SREM: (rotr (add (mul N, P), A), K)
7057 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7058 Created.push_back(Op0.getNode());
7059 }
7060
7061 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7062 SDValue Fold =
7063 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7065
7066 // If we didn't have lanes with INT_MIN divisor, then we're done.
7067 if (!HadIntMinDivisor)
7068 return Fold;
7069
7070 // That fold is only valid for positive divisors. Which effectively means,
7071 // it is invalid for INT_MIN divisors. So if we have such a lane,
7072 // we must fix-up results for said lanes.
7073 assert(VT.isVector() && "Can/should only get here for vectors.");
7074
7075 // NOTE: we avoid letting illegal types through even if we're before legalize
7076 // ops – legalization has a hard time producing good code for the code that
7077 // follows.
7078 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7082 return SDValue();
7083
7084 Created.push_back(Fold.getNode());
7085
7086 SDValue IntMin = DAG.getConstant(
7088 SDValue IntMax = DAG.getConstant(
7090 SDValue Zero =
7092
7093 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7094 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7095 Created.push_back(DivisorIsIntMin.getNode());
7096
7097 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7098 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7099 Created.push_back(Masked.getNode());
7100 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7101 Created.push_back(MaskedIsZero.getNode());
7102
7103 // To produce final result we need to blend 2 vectors: 'SetCC' and
7104 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7105 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7106 // constant-folded, select can get lowered to a shuffle with constant mask.
7107 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7108 MaskedIsZero, Fold);
7109
7110 return Blended;
7111}
7112
7115 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7116 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7117 "be a constant integer");
7118 return true;
7119 }
7120
7121 return false;
7122}
7123
7125 const DenormalMode &Mode) const {
7126 SDLoc DL(Op);
7127 EVT VT = Op.getValueType();
7128 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7129 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7130
7131 // This is specifically a check for the handling of denormal inputs, not the
7132 // result.
7133 if (Mode.Input == DenormalMode::PreserveSign ||
7134 Mode.Input == DenormalMode::PositiveZero) {
7135 // Test = X == 0.0
7136 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7137 }
7138
7139 // Testing it with denormal inputs to avoid wrong estimate.
7140 //
7141 // Test = fabs(X) < SmallestNormal
7142 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7143 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7144 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7145 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7146 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7147}
7148
7150 bool LegalOps, bool OptForSize,
7152 unsigned Depth) const {
7153 // fneg is removable even if it has multiple uses.
7154 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7156 return Op.getOperand(0);
7157 }
7158
7159 // Don't recurse exponentially.
7161 return SDValue();
7162
7163 // Pre-increment recursion depth for use in recursive calls.
7164 ++Depth;
7165 const SDNodeFlags Flags = Op->getFlags();
7166 const TargetOptions &Options = DAG.getTarget().Options;
7167 EVT VT = Op.getValueType();
7168 unsigned Opcode = Op.getOpcode();
7169
7170 // Don't allow anything with multiple uses unless we know it is free.
7171 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7172 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7173 isFPExtFree(VT, Op.getOperand(0).getValueType());
7174 if (!IsFreeExtend)
7175 return SDValue();
7176 }
7177
7178 auto RemoveDeadNode = [&](SDValue N) {
7179 if (N && N.getNode()->use_empty())
7180 DAG.RemoveDeadNode(N.getNode());
7181 };
7182
7183 SDLoc DL(Op);
7184
7185 // Because getNegatedExpression can delete nodes we need a handle to keep
7186 // temporary nodes alive in case the recursion manages to create an identical
7187 // node.
7188 std::list<HandleSDNode> Handles;
7189
7190 switch (Opcode) {
7191 case ISD::ConstantFP: {
7192 // Don't invert constant FP values after legalization unless the target says
7193 // the negated constant is legal.
7194 bool IsOpLegal =
7196 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7197 OptForSize);
7198
7199 if (LegalOps && !IsOpLegal)
7200 break;
7201
7202 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7203 V.changeSign();
7204 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7205
7206 // If we already have the use of the negated floating constant, it is free
7207 // to negate it even it has multiple uses.
7208 if (!Op.hasOneUse() && CFP.use_empty())
7209 break;
7211 return CFP;
7212 }
7213 case ISD::BUILD_VECTOR: {
7214 // Only permit BUILD_VECTOR of constants.
7215 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7216 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7217 }))
7218 break;
7219
7220 bool IsOpLegal =
7223 llvm::all_of(Op->op_values(), [&](SDValue N) {
7224 return N.isUndef() ||
7225 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7226 OptForSize);
7227 });
7228
7229 if (LegalOps && !IsOpLegal)
7230 break;
7231
7233 for (SDValue C : Op->op_values()) {
7234 if (C.isUndef()) {
7235 Ops.push_back(C);
7236 continue;
7237 }
7238 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7239 V.changeSign();
7240 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7241 }
7243 return DAG.getBuildVector(VT, DL, Ops);
7244 }
7245 case ISD::FADD: {
7246 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7247 break;
7248
7249 // After operation legalization, it might not be legal to create new FSUBs.
7250 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7251 break;
7252 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7253
7254 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7256 SDValue NegX =
7257 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7258 // Prevent this node from being deleted by the next call.
7259 if (NegX)
7260 Handles.emplace_back(NegX);
7261
7262 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7264 SDValue NegY =
7265 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7266
7267 // We're done with the handles.
7268 Handles.clear();
7269
7270 // Negate the X if its cost is less or equal than Y.
7271 if (NegX && (CostX <= CostY)) {
7272 Cost = CostX;
7273 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7274 if (NegY != N)
7275 RemoveDeadNode(NegY);
7276 return N;
7277 }
7278
7279 // Negate the Y if it is not expensive.
7280 if (NegY) {
7281 Cost = CostY;
7282 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7283 if (NegX != N)
7284 RemoveDeadNode(NegX);
7285 return N;
7286 }
7287 break;
7288 }
7289 case ISD::FSUB: {
7290 // We can't turn -(A-B) into B-A when we honor signed zeros.
7291 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7292 break;
7293
7294 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7295 // fold (fneg (fsub 0, Y)) -> Y
7296 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7297 if (C->isZero()) {
7299 return Y;
7300 }
7301
7302 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7304 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7305 }
7306 case ISD::FMUL:
7307 case ISD::FDIV: {
7308 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7309
7310 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7312 SDValue NegX =
7313 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7314 // Prevent this node from being deleted by the next call.
7315 if (NegX)
7316 Handles.emplace_back(NegX);
7317
7318 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7320 SDValue NegY =
7321 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7322
7323 // We're done with the handles.
7324 Handles.clear();
7325
7326 // Negate the X if its cost is less or equal than Y.
7327 if (NegX && (CostX <= CostY)) {
7328 Cost = CostX;
7329 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7330 if (NegY != N)
7331 RemoveDeadNode(NegY);
7332 return N;
7333 }
7334
7335 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7336 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7337 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7338 break;
7339
7340 // Negate the Y if it is not expensive.
7341 if (NegY) {
7342 Cost = CostY;
7343 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7344 if (NegX != N)
7345 RemoveDeadNode(NegX);
7346 return N;
7347 }
7348 break;
7349 }
7350 case ISD::FMA:
7351 case ISD::FMAD: {
7352 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7353 break;
7354
7355 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7357 SDValue NegZ =
7358 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7359 // Give up if fail to negate the Z.
7360 if (!NegZ)
7361 break;
7362
7363 // Prevent this node from being deleted by the next two calls.
7364 Handles.emplace_back(NegZ);
7365
7366 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7368 SDValue NegX =
7369 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7370 // Prevent this node from being deleted by the next call.
7371 if (NegX)
7372 Handles.emplace_back(NegX);
7373
7374 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7376 SDValue NegY =
7377 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7378
7379 // We're done with the handles.
7380 Handles.clear();
7381
7382 // Negate the X if its cost is less or equal than Y.
7383 if (NegX && (CostX <= CostY)) {
7384 Cost = std::min(CostX, CostZ);
7385 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7386 if (NegY != N)
7387 RemoveDeadNode(NegY);
7388 return N;
7389 }
7390
7391 // Negate the Y if it is not expensive.
7392 if (NegY) {
7393 Cost = std::min(CostY, CostZ);
7394 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7395 if (NegX != N)
7396 RemoveDeadNode(NegX);
7397 return N;
7398 }
7399 break;
7400 }
7401
7402 case ISD::FP_EXTEND:
7403 case ISD::FSIN:
7404 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7405 OptForSize, Cost, Depth))
7406 return DAG.getNode(Opcode, DL, VT, NegV);
7407 break;
7408 case ISD::FP_ROUND:
7409 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7410 OptForSize, Cost, Depth))
7411 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7412 break;
7413 case ISD::SELECT:
7414 case ISD::VSELECT: {
7415 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7416 // iff at least one cost is cheaper and the other is neutral/cheaper
7417 SDValue LHS = Op.getOperand(1);
7419 SDValue NegLHS =
7420 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7421 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7422 RemoveDeadNode(NegLHS);
7423 break;
7424 }
7425
7426 // Prevent this node from being deleted by the next call.
7427 Handles.emplace_back(NegLHS);
7428
7429 SDValue RHS = Op.getOperand(2);
7431 SDValue NegRHS =
7432 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7433
7434 // We're done with the handles.
7435 Handles.clear();
7436
7437 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7438 (CostLHS != NegatibleCost::Cheaper &&
7439 CostRHS != NegatibleCost::Cheaper)) {
7440 RemoveDeadNode(NegLHS);
7441 RemoveDeadNode(NegRHS);
7442 break;
7443 }
7444
7445 Cost = std::min(CostLHS, CostRHS);
7446 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7447 }
7448 }
7449
7450 return SDValue();
7451}
7452
7453//===----------------------------------------------------------------------===//
7454// Legalization Utilities
7455//===----------------------------------------------------------------------===//
7456
7457bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7458 SDValue LHS, SDValue RHS,
7460 EVT HiLoVT, SelectionDAG &DAG,
7461 MulExpansionKind Kind, SDValue LL,
7462 SDValue LH, SDValue RL, SDValue RH) const {
7463 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7464 Opcode == ISD::SMUL_LOHI);
7465
7466 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7468 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7470 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7472 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7474
7475 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7476 return false;
7477
7478 unsigned OuterBitSize = VT.getScalarSizeInBits();
7479 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7480
7481 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7482 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7483 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7484
7485 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7486 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7487 bool Signed) -> bool {
7488 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7489 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7490 Hi = SDValue(Lo.getNode(), 1);
7491 return true;
7492 }
7493 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7494 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7495 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7496 return true;
7497 }
7498 return false;
7499 };
7500
7501 SDValue Lo, Hi;
7502
7503 if (!LL.getNode() && !RL.getNode() &&
7505 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7506 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7507 }
7508
7509 if (!LL.getNode())
7510 return false;
7511
7512 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7513 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7514 DAG.MaskedValueIsZero(RHS, HighMask)) {
7515 // The inputs are both zero-extended.
7516 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7517 Result.push_back(Lo);
7518 Result.push_back(Hi);
7519 if (Opcode != ISD::MUL) {
7520 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7521 Result.push_back(Zero);
7522 Result.push_back(Zero);
7523 }
7524 return true;
7525 }
7526 }
7527
7528 if (!VT.isVector() && Opcode == ISD::MUL &&
7529 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7530 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7531 // The input values are both sign-extended.
7532 // TODO non-MUL case?
7533 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7534 Result.push_back(Lo);
7535 Result.push_back(Hi);
7536 return true;
7537 }
7538 }
7539
7540 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7541 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7542
7543 if (!LH.getNode() && !RH.getNode() &&
7546 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7547 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7548 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7549 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7550 }
7551
7552 if (!LH.getNode())
7553 return false;
7554
7555 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7556 return false;
7557
7558 Result.push_back(Lo);
7559
7560 if (Opcode == ISD::MUL) {
7561 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7562 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7563 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7564 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7565 Result.push_back(Hi);
7566 return true;
7567 }
7568
7569 // Compute the full width result.
7570 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7571 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7572 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7573 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7574 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7575 };
7576
7577 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7578 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7579 return false;
7580
7581 // This is effectively the add part of a multiply-add of half-sized operands,
7582 // so it cannot overflow.
7583 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7584
7585 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7586 return false;
7587
7588 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7589 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7590
7591 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7593 if (UseGlue)
7594 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7595 Merge(Lo, Hi));
7596 else
7597 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7598 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7599
7600 SDValue Carry = Next.getValue(1);
7601 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7602 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7603
7604 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7605 return false;
7606
7607 if (UseGlue)
7608 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7609 Carry);
7610 else
7611 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7612 Zero, Carry);
7613
7614 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7615
7616 if (Opcode == ISD::SMUL_LOHI) {
7617 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7618 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7619 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7620
7621 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7622 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7623 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7624 }
7625
7626 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7627 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7628 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7629 return true;
7630}
7631
7633 SelectionDAG &DAG, MulExpansionKind Kind,
7634 SDValue LL, SDValue LH, SDValue RL,
7635 SDValue RH) const {
7637 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7638 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7639 DAG, Kind, LL, LH, RL, RH);
7640 if (Ok) {
7641 assert(Result.size() == 2);
7642 Lo = Result[0];
7643 Hi = Result[1];
7644 }
7645 return Ok;
7646}
7647
7648// Optimize unsigned division or remainder by constants for types twice as large
7649// as a legal VT.
7650//
7651// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7652// can be computed
7653// as:
7654// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7655// Remainder = Sum % Constant
7656// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7657//
7658// For division, we can compute the remainder using the algorithm described
7659// above, subtract it from the dividend to get an exact multiple of Constant.
7660// Then multiply that exact multiply by the multiplicative inverse modulo
7661// (1 << (BitWidth / 2)) to get the quotient.
7662
7663// If Constant is even, we can shift right the dividend and the divisor by the
7664// number of trailing zeros in Constant before applying the remainder algorithm.
7665// If we're after the quotient, we can subtract this value from the shifted
7666// dividend and multiply by the multiplicative inverse of the shifted divisor.
7667// If we want the remainder, we shift the value left by the number of trailing
7668// zeros and add the bits that were shifted out of the dividend.
7671 EVT HiLoVT, SelectionDAG &DAG,
7672 SDValue LL, SDValue LH) const {
7673 unsigned Opcode = N->getOpcode();
7674 EVT VT = N->getValueType(0);
7675
7676 // TODO: Support signed division/remainder.
7677 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7678 return false;
7679 assert(
7680 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7681 "Unexpected opcode");
7682
7683 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7684 if (!CN)
7685 return false;
7686
7687 APInt Divisor = CN->getAPIntValue();
7688 unsigned BitWidth = Divisor.getBitWidth();
7689 unsigned HBitWidth = BitWidth / 2;
7691 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7692
7693 // Divisor needs to less than (1 << HBitWidth).
7694 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7695 if (Divisor.uge(HalfMaxPlus1))
7696 return false;
7697
7698 // We depend on the UREM by constant optimization in DAGCombiner that requires
7699 // high multiply.
7700 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7702 return false;
7703
7704 // Don't expand if optimizing for size.
7705 if (DAG.shouldOptForSize())
7706 return false;
7707
7708 // Early out for 0 or 1 divisors.
7709 if (Divisor.ule(1))
7710 return false;
7711
7712 // If the divisor is even, shift it until it becomes odd.
7713 unsigned TrailingZeros = 0;
7714 if (!Divisor[0]) {
7715 TrailingZeros = Divisor.countr_zero();
7716 Divisor.lshrInPlace(TrailingZeros);
7717 }
7718
7719 SDLoc dl(N);
7720 SDValue Sum;
7721 SDValue PartialRem;
7722
7723 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7724 // then add in the carry.
7725 // TODO: If we can't split it in half, we might be able to split into 3 or
7726 // more pieces using a smaller bit width.
7727 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7728 assert(!LL == !LH && "Expected both input halves or no input halves!");
7729 if (!LL)
7730 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7731
7732 // Shift the input by the number of TrailingZeros in the divisor. The
7733 // shifted out bits will be added to the remainder later.
7734 if (TrailingZeros) {
7735 // Save the shifted off bits if we need the remainder.
7736 if (Opcode != ISD::UDIV) {
7737 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7738 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7739 DAG.getConstant(Mask, dl, HiLoVT));
7740 }
7741
7742 LL = DAG.getNode(
7743 ISD::OR, dl, HiLoVT,
7744 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7745 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7746 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7747 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7748 HiLoVT, dl)));
7749 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7750 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7751 }
7752
7753 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7754 EVT SetCCType =
7755 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7757 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7758 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7759 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7760 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7761 } else {
7762 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7763 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7764 // If the boolean for the target is 0 or 1, we can add the setcc result
7765 // directly.
7766 if (getBooleanContents(HiLoVT) ==
7768 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7769 else
7770 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7771 DAG.getConstant(0, dl, HiLoVT));
7772 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7773 }
7774 }
7775
7776 // If we didn't find a sum, we can't do the expansion.
7777 if (!Sum)
7778 return false;
7779
7780 // Perform a HiLoVT urem on the Sum using truncated divisor.
7781 SDValue RemL =
7782 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7783 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7784 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7785
7786 if (Opcode != ISD::UREM) {
7787 // Subtract the remainder from the shifted dividend.
7788 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7789 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7790
7791 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7792
7793 // Multiply by the multiplicative inverse of the divisor modulo
7794 // (1 << BitWidth).
7795 APInt MulFactor = Divisor.multiplicativeInverse();
7796
7797 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7798 DAG.getConstant(MulFactor, dl, VT));
7799
7800 // Split the quotient into low and high parts.
7801 SDValue QuotL, QuotH;
7802 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7803 Result.push_back(QuotL);
7804 Result.push_back(QuotH);
7805 }
7806
7807 if (Opcode != ISD::UDIV) {
7808 // If we shifted the input, shift the remainder left and add the bits we
7809 // shifted off the input.
7810 if (TrailingZeros) {
7811 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7812 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7813 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7814 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7815 }
7816 Result.push_back(RemL);
7817 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7818 }
7819
7820 return true;
7821}
7822
7823// Check that (every element of) Z is undef or not an exact multiple of BW.
7824static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7826 Z,
7827 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7828 true);
7829}
7830
7832 EVT VT = Node->getValueType(0);
7833 SDValue ShX, ShY;
7834 SDValue ShAmt, InvShAmt;
7835 SDValue X = Node->getOperand(0);
7836 SDValue Y = Node->getOperand(1);
7837 SDValue Z = Node->getOperand(2);
7838 SDValue Mask = Node->getOperand(3);
7839 SDValue VL = Node->getOperand(4);
7840
7841 unsigned BW = VT.getScalarSizeInBits();
7842 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7843 SDLoc DL(SDValue(Node, 0));
7844
7845 EVT ShVT = Z.getValueType();
7846 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7847 // fshl: X << C | Y >> (BW - C)
7848 // fshr: X << (BW - C) | Y >> C
7849 // where C = Z % BW is not zero
7850 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7851 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7852 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7853 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7854 VL);
7855 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7856 VL);
7857 } else {
7858 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7859 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7860 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7861 if (isPowerOf2_32(BW)) {
7862 // Z % BW -> Z & (BW - 1)
7863 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7864 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7865 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7866 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7867 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7868 } else {
7869 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7870 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7871 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7872 }
7873
7874 SDValue One = DAG.getConstant(1, DL, ShVT);
7875 if (IsFSHL) {
7876 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7877 SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7878 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7879 } else {
7880 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7881 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7882 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7883 }
7884 }
7885 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7886}
7887
7889 SelectionDAG &DAG) const {
7890 if (Node->isVPOpcode())
7891 return expandVPFunnelShift(Node, DAG);
7892
7893 EVT VT = Node->getValueType(0);
7894
7895 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7899 return SDValue();
7900
7901 SDValue X = Node->getOperand(0);
7902 SDValue Y = Node->getOperand(1);
7903 SDValue Z = Node->getOperand(2);
7904
7905 unsigned BW = VT.getScalarSizeInBits();
7906 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7907 SDLoc DL(SDValue(Node, 0));
7908
7909 EVT ShVT = Z.getValueType();
7910
7911 // If a funnel shift in the other direction is more supported, use it.
7912 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7913 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7914 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7915 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7916 // fshl X, Y, Z -> fshr X, Y, -Z
7917 // fshr X, Y, Z -> fshl X, Y, -Z
7918 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7919 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7920 } else {
7921 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7922 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7923 SDValue One = DAG.getConstant(1, DL, ShVT);
7924 if (IsFSHL) {
7925 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7926 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7927 } else {
7928 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7929 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7930 }
7931 Z = DAG.getNOT(DL, Z, ShVT);
7932 }
7933 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7934 }
7935
7936 SDValue ShX, ShY;
7937 SDValue ShAmt, InvShAmt;
7938 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7939 // fshl: X << C | Y >> (BW - C)
7940 // fshr: X << (BW - C) | Y >> C
7941 // where C = Z % BW is not zero
7942 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7943 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7944 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7945 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7946 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7947 } else {
7948 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7949 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7950 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7951 if (isPowerOf2_32(BW)) {
7952 // Z % BW -> Z & (BW - 1)
7953 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7954 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7955 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7956 } else {
7957 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7958 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7959 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7960 }
7961
7962 SDValue One = DAG.getConstant(1, DL, ShVT);
7963 if (IsFSHL) {
7964 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7965 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7966 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7967 } else {
7968 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7969 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7970 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7971 }
7972 }
7973 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7974}
7975
7976// TODO: Merge with expandFunnelShift.
7977SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7978 SelectionDAG &DAG) const {
7979 EVT VT = Node->getValueType(0);
7980 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7981 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7982 SDValue Op0 = Node->getOperand(0);
7983 SDValue Op1 = Node->getOperand(1);
7984 SDLoc DL(SDValue(Node, 0));
7985
7986 EVT ShVT = Op1.getValueType();
7987 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7988
7989 // If a rotate in the other direction is more supported, use it.
7990 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7991 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7992 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7993 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7994 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7995 }
7996
7997 if (!AllowVectorOps && VT.isVector() &&
8003 return SDValue();
8004
8005 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8006 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8007 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8008 SDValue ShVal;
8009 SDValue HsVal;
8010 if (isPowerOf2_32(EltSizeInBits)) {
8011 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8012 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8013 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8014 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8015 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8016 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8017 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8018 } else {
8019 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8020 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8021 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8022 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8023 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8024 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8025 SDValue One = DAG.getConstant(1, DL, ShVT);
8026 HsVal =
8027 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8028 }
8029 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8030}
8031
8033 SelectionDAG &DAG) const {
8034 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8035 EVT VT = Node->getValueType(0);
8036 unsigned VTBits = VT.getScalarSizeInBits();
8037 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8038
8039 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8040 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8041 SDValue ShOpLo = Node->getOperand(0);
8042 SDValue ShOpHi = Node->getOperand(1);
8043 SDValue ShAmt = Node->getOperand(2);
8044 EVT ShAmtVT = ShAmt.getValueType();
8045 EVT ShAmtCCVT =
8046 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8047 SDLoc dl(Node);
8048
8049 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8050 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8051 // away during isel.
8052 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8053 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8054 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8055 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8056 : DAG.getConstant(0, dl, VT);
8057
8058 SDValue Tmp2, Tmp3;
8059 if (IsSHL) {
8060 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8061 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8062 } else {
8063 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8064 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8065 }
8066
8067 // If the shift amount is larger or equal than the width of a part we don't
8068 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8069 // values for large shift amounts.
8070 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8071 DAG.getConstant(VTBits, dl, ShAmtVT));
8072 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8073 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8074
8075 if (IsSHL) {
8076 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8077 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8078 } else {
8079 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8080 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8081 }
8082}
8083
8085 SelectionDAG &DAG) const {
8086 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8087 SDValue Src = Node->getOperand(OpNo);
8088 EVT SrcVT = Src.getValueType();
8089 EVT DstVT = Node->getValueType(0);
8090 SDLoc dl(SDValue(Node, 0));
8091
8092 // FIXME: Only f32 to i64 conversions are supported.
8093 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8094 return false;
8095
8096 if (Node->isStrictFPOpcode())
8097 // When a NaN is converted to an integer a trap is allowed. We can't
8098 // use this expansion here because it would eliminate that trap. Other
8099 // traps are also allowed and cannot be eliminated. See
8100 // IEEE 754-2008 sec 5.8.
8101 return false;
8102
8103 // Expand f32 -> i64 conversion
8104 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8105 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8106 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8107 EVT IntVT = SrcVT.changeTypeToInteger();
8108 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8109
8110 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8111 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8112 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8113 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8114 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8115 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8116
8117 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8118
8119 SDValue ExponentBits = DAG.getNode(
8120 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8121 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8122 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8123
8124 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8125 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8126 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8127 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8128
8129 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8130 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8131 DAG.getConstant(0x00800000, dl, IntVT));
8132
8133 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8134
8135 R = DAG.getSelectCC(
8136 dl, Exponent, ExponentLoBit,
8137 DAG.getNode(ISD::SHL, dl, DstVT, R,
8138 DAG.getZExtOrTrunc(
8139 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8140 dl, IntShVT)),
8141 DAG.getNode(ISD::SRL, dl, DstVT, R,
8142 DAG.getZExtOrTrunc(
8143 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8144 dl, IntShVT)),
8145 ISD::SETGT);
8146
8147 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8148 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8149
8150 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8151 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8152 return true;
8153}
8154
8156 SDValue &Chain,
8157 SelectionDAG &DAG) const {
8158 SDLoc dl(SDValue(Node, 0));
8159 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8160 SDValue Src = Node->getOperand(OpNo);
8161
8162 EVT SrcVT = Src.getValueType();
8163 EVT DstVT = Node->getValueType(0);
8164 EVT SetCCVT =
8165 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8166 EVT DstSetCCVT =
8167 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8168
8169 // Only expand vector types if we have the appropriate vector bit operations.
8170 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8172 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8174 return false;
8175
8176 // If the maximum float value is smaller then the signed integer range,
8177 // the destination signmask can't be represented by the float, so we can
8178 // just use FP_TO_SINT directly.
8179 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8180 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8181 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8183 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8184 if (Node->isStrictFPOpcode()) {
8185 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8186 { Node->getOperand(0), Src });
8187 Chain = Result.getValue(1);
8188 } else
8189 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8190 return true;
8191 }
8192
8193 // Don't expand it if there isn't cheap fsub instruction.
8195 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8196 return false;
8197
8198 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8199 SDValue Sel;
8200
8201 if (Node->isStrictFPOpcode()) {
8202 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8203 Node->getOperand(0), /*IsSignaling*/ true);
8204 Chain = Sel.getValue(1);
8205 } else {
8206 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8207 }
8208
8209 bool Strict = Node->isStrictFPOpcode() ||
8210 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8211
8212 if (Strict) {
8213 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8214 // signmask then offset (the result of which should be fully representable).
8215 // Sel = Src < 0x8000000000000000
8216 // FltOfs = select Sel, 0, 0x8000000000000000
8217 // IntOfs = select Sel, 0, 0x8000000000000000
8218 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8219
8220 // TODO: Should any fast-math-flags be set for the FSUB?
8221 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8222 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8223 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8224 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8225 DAG.getConstant(0, dl, DstVT),
8226 DAG.getConstant(SignMask, dl, DstVT));
8227 SDValue SInt;
8228 if (Node->isStrictFPOpcode()) {
8229 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8230 { Chain, Src, FltOfs });
8231 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8232 { Val.getValue(1), Val });
8233 Chain = SInt.getValue(1);
8234 } else {
8235 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8236 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8237 }
8238 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8239 } else {
8240 // Expand based on maximum range of FP_TO_SINT:
8241 // True = fp_to_sint(Src)
8242 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8243 // Result = select (Src < 0x8000000000000000), True, False
8244
8245 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8246 // TODO: Should any fast-math-flags be set for the FSUB?
8247 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8248 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8249 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8250 DAG.getConstant(SignMask, dl, DstVT));
8251 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8252 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8253 }
8254 return true;
8255}
8256
8258 SDValue &Chain,
8259 SelectionDAG &DAG) const {
8260 // This transform is not correct for converting 0 when rounding mode is set
8261 // to round toward negative infinity which will produce -0.0. So disable under
8262 // strictfp.
8263 if (Node->isStrictFPOpcode())
8264 return false;
8265
8266 SDValue Src = Node->getOperand(0);
8267 EVT SrcVT = Src.getValueType();
8268 EVT DstVT = Node->getValueType(0);
8269
8270 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8271 return false;
8272
8273 // Only expand vector types if we have the appropriate vector bit operations.
8274 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8279 return false;
8280
8281 SDLoc dl(SDValue(Node, 0));
8282 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8283
8284 // Implementation of unsigned i64 to f64 following the algorithm in
8285 // __floatundidf in compiler_rt. This implementation performs rounding
8286 // correctly in all rounding modes with the exception of converting 0
8287 // when rounding toward negative infinity. In that case the fsub will produce
8288 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8289 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8290 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8291 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8292 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8293 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8294 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8295
8296 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8297 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8298 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8299 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8300 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8301 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8302 SDValue HiSub =
8303 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8304 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8305 return true;
8306}
8307
8308SDValue
8310 SelectionDAG &DAG) const {
8311 unsigned Opcode = Node->getOpcode();
8312 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8313 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8314 "Wrong opcode");
8315
8316 if (Node->getFlags().hasNoNaNs()) {
8317 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8318 SDValue Op1 = Node->getOperand(0);
8319 SDValue Op2 = Node->getOperand(1);
8320 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8321 // Copy FMF flags, but always set the no-signed-zeros flag
8322 // as this is implied by the FMINNUM/FMAXNUM semantics.
8323 SDNodeFlags Flags = Node->getFlags();
8324 Flags.setNoSignedZeros(true);
8325 SelCC->setFlags(Flags);
8326 return SelCC;
8327 }
8328
8329 return SDValue();
8330}
8331
8333 SelectionDAG &DAG) const {
8334 SDLoc dl(Node);
8335 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8337 EVT VT = Node->getValueType(0);
8338
8339 if (VT.isScalableVector())
8341 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8342
8343 if (isOperationLegalOrCustom(NewOp, VT)) {
8344 SDValue Quiet0 = Node->getOperand(0);
8345 SDValue Quiet1 = Node->getOperand(1);
8346
8347 if (!Node->getFlags().hasNoNaNs()) {
8348 // Insert canonicalizes if it's possible we need to quiet to get correct
8349 // sNaN behavior.
8350 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8351 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8352 Node->getFlags());
8353 }
8354 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8355 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8356 Node->getFlags());
8357 }
8358 }
8359
8360 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8361 }
8362
8363 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8364 // instead if there are no NaNs and there can't be an incompatible zero
8365 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8366 if ((Node->getFlags().hasNoNaNs() ||
8367 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8368 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8369 (Node->getFlags().hasNoSignedZeros() ||
8370 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8371 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8372 unsigned IEEE2018Op =
8373 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8374 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8375 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8376 Node->getOperand(1), Node->getFlags());
8377 }
8378
8379 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8380 return SelCC;
8381
8382 return SDValue();
8383}
8384
8386 SelectionDAG &DAG) const {
8387 SDLoc DL(N);
8388 SDValue LHS = N->getOperand(0);
8389 SDValue RHS = N->getOperand(1);
8390 unsigned Opc = N->getOpcode();
8391 EVT VT = N->getValueType(0);
8392 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8393 bool IsMax = Opc == ISD::FMAXIMUM;
8394
8395 if (VT.isVector() &&
8397 return SDValue();
8398
8399 // First, implement comparison not propagating NaN. If no native fmin or fmax
8400 // available, use plain select with setcc instead.
8402 unsigned CompOpcIeee = IsMax ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8403 unsigned CompOpc = IsMax ? ISD::FMAXNUM : ISD::FMINNUM;
8404
8405 // FIXME: We should probably define fminnum/fmaxnum variants with correct
8406 // signed zero behavior.
8407 bool MinMaxMustRespectOrderedZero = false;
8408
8409 if (isOperationLegalOrCustom(CompOpcIeee, VT)) {
8410 MinMax = DAG.getNode(CompOpcIeee, DL, VT, LHS, RHS);
8411 MinMaxMustRespectOrderedZero = true;
8412 } else if (isOperationLegalOrCustom(CompOpc, VT)) {
8413 MinMax = DAG.getNode(CompOpc, DL, VT, LHS, RHS);
8414 } else {
8415 // NaN (if exists) will be propagated later, so orderness doesn't matter.
8416 SDValue Compare =
8417 DAG.getSetCC(DL, CCVT, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
8418 MinMax = DAG.getSelect(DL, VT, Compare, LHS, RHS);
8419 }
8420
8421 // Propagate any NaN of both operands
8422 if (!N->getFlags().hasNoNaNs() &&
8423 (!DAG.isKnownNeverNaN(RHS) || !DAG.isKnownNeverNaN(LHS))) {
8424 ConstantFP *FPNaN = ConstantFP::get(
8426 MinMax = DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, LHS, RHS, ISD::SETUO),
8427 DAG.getConstantFP(*FPNaN, DL, VT), MinMax);
8428 }
8429
8430 // fminimum/fmaximum requires -0.0 less than +0.0
8431 if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8433 SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8434 DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8435 SDValue TestZero =
8436 DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8437 SDValue LCmp = DAG.getSelect(
8438 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8439 MinMax);
8440 SDValue RCmp = DAG.getSelect(
8441 DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS,
8442 LCmp);
8443 MinMax = DAG.getSelect(DL, VT, IsZero, RCmp, MinMax);
8444 }
8445
8446 return MinMax;
8447}
8448
8449/// Returns a true value if if this FPClassTest can be performed with an ordered
8450/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8451/// std::nullopt if it cannot be performed as a compare with 0.
8452static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8453 const fltSemantics &Semantics,
8454 const MachineFunction &MF) {
8455 FPClassTest OrderedMask = Test & ~fcNan;
8456 FPClassTest NanTest = Test & fcNan;
8457 bool IsOrdered = NanTest == fcNone;
8458 bool IsUnordered = NanTest == fcNan;
8459
8460 // Skip cases that are testing for only a qnan or snan.
8461 if (!IsOrdered && !IsUnordered)
8462 return std::nullopt;
8463
8464 if (OrderedMask == fcZero &&
8465 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8466 return IsOrdered;
8467 if (OrderedMask == (fcZero | fcSubnormal) &&
8468 MF.getDenormalMode(Semantics).inputsAreZero())
8469 return IsOrdered;
8470 return std::nullopt;
8471}
8472
8475 const SDLoc &DL,
8476 SelectionDAG &DAG) const {
8477 EVT OperandVT = Op.getValueType();
8478 assert(OperandVT.isFloatingPoint());
8479
8480 // Degenerated cases.
8481 if (Test == fcNone)
8482 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8483 if ((Test & fcAllFlags) == fcAllFlags)
8484 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8485
8486 // PPC double double is a pair of doubles, of which the higher part determines
8487 // the value class.
8488 if (OperandVT == MVT::ppcf128) {
8489 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8490 DAG.getConstant(1, DL, MVT::i32));
8491 OperandVT = MVT::f64;
8492 }
8493
8494 // Some checks may be represented as inversion of simpler check, for example
8495 // "inf|normal|subnormal|zero" => !"nan".
8496 bool IsInverted = false;
8497 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8498 IsInverted = true;
8499 Test = InvertedCheck;
8500 }
8501
8502 // Floating-point type properties.
8503 EVT ScalarFloatVT = OperandVT.getScalarType();
8504 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8505 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8506 bool IsF80 = (ScalarFloatVT == MVT::f80);
8507
8508 // Some checks can be implemented using float comparisons, if floating point
8509 // exceptions are ignored.
8510 if (Flags.hasNoFPExcept() &&
8512 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8513 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8514
8515 if (std::optional<bool> IsCmp0 =
8516 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8517 IsCmp0 && (isCondCodeLegalOrCustom(
8518 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8519 OperandVT.getScalarType().getSimpleVT()))) {
8520
8521 // If denormals could be implicitly treated as 0, this is not equivalent
8522 // to a compare with 0 since it will also be true for denormals.
8523 return DAG.getSetCC(DL, ResultVT, Op,
8524 DAG.getConstantFP(0.0, DL, OperandVT),
8525 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8526 }
8527
8528 if (Test == fcNan &&
8530 OperandVT.getScalarType().getSimpleVT())) {
8531 return DAG.getSetCC(DL, ResultVT, Op, Op,
8532 IsInverted ? ISD::SETO : ISD::SETUO);
8533 }
8534
8535 if (Test == fcInf &&
8537 OperandVT.getScalarType().getSimpleVT()) &&
8539 // isinf(x) --> fabs(x) == inf
8540 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8541 SDValue Inf =
8542 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8543 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8544 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8545 }
8546 }
8547
8548 // In the general case use integer operations.
8549 unsigned BitSize = OperandVT.getScalarSizeInBits();
8550 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8551 if (OperandVT.isVector())
8552 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8553 OperandVT.getVectorElementCount());
8554 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8555
8556 // Various masks.
8557 APInt SignBit = APInt::getSignMask(BitSize);
8558 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8559 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8560 const unsigned ExplicitIntBitInF80 = 63;
8561 APInt ExpMask = Inf;
8562 if (IsF80)
8563 ExpMask.clearBit(ExplicitIntBitInF80);
8564 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8565 APInt QNaNBitMask =
8566 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8567 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8568
8569 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8570 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8571 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8572 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8573 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8574 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8575
8576 SDValue Res;
8577 const auto appendResult = [&](SDValue PartialRes) {
8578 if (PartialRes) {
8579 if (Res)
8580 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8581 else
8582 Res = PartialRes;
8583 }
8584 };
8585
8586 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8587 const auto getIntBitIsSet = [&]() -> SDValue {
8588 if (!IntBitIsSetV) {
8589 APInt IntBitMask(BitSize, 0);
8590 IntBitMask.setBit(ExplicitIntBitInF80);
8591 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8592 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8593 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8594 }
8595 return IntBitIsSetV;
8596 };
8597
8598 // Split the value into sign bit and absolute value.
8599 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8600 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8601 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8602
8603 // Tests that involve more than one class should be processed first.
8604 SDValue PartialRes;
8605
8606 if (IsF80)
8607 ; // Detect finite numbers of f80 by checking individual classes because
8608 // they have different settings of the explicit integer bit.
8609 else if ((Test & fcFinite) == fcFinite) {
8610 // finite(V) ==> abs(V) < exp_mask
8611 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8612 Test &= ~fcFinite;
8613 } else if ((Test & fcFinite) == fcPosFinite) {
8614 // finite(V) && V > 0 ==> V < exp_mask
8615 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8616 Test &= ~fcPosFinite;
8617 } else if ((Test & fcFinite) == fcNegFinite) {
8618 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8619 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8620 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8621 Test &= ~fcNegFinite;
8622 }
8623 appendResult(PartialRes);
8624
8625 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8626 // fcZero | fcSubnormal => test all exponent bits are 0
8627 // TODO: Handle sign bit specific cases
8628 if (PartialCheck == (fcZero | fcSubnormal)) {
8629 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8630 SDValue ExpIsZero =
8631 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8632 appendResult(ExpIsZero);
8633 Test &= ~PartialCheck & fcAllFlags;
8634 }
8635 }
8636
8637 // Check for individual classes.
8638
8639 if (unsigned PartialCheck = Test & fcZero) {
8640 if (PartialCheck == fcPosZero)
8641 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8642 else if (PartialCheck == fcZero)
8643 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8644 else // ISD::fcNegZero
8645 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8646 appendResult(PartialRes);
8647 }
8648
8649 if (unsigned PartialCheck = Test & fcSubnormal) {
8650 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8651 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8652 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8653 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8654 SDValue VMinusOneV =
8655 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8656 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8657 if (PartialCheck == fcNegSubnormal)
8658 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8659 appendResult(PartialRes);
8660 }
8661
8662 if (unsigned PartialCheck = Test & fcInf) {
8663 if (PartialCheck == fcPosInf)
8664 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8665 else if (PartialCheck == fcInf)
8666 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8667 else { // ISD::fcNegInf
8668 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8669 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8670 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8671 }
8672 appendResult(PartialRes);
8673 }
8674
8675 if (unsigned PartialCheck = Test & fcNan) {
8676 APInt InfWithQnanBit = Inf | QNaNBitMask;
8677 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8678 if (PartialCheck == fcNan) {
8679 // isnan(V) ==> abs(V) > int(inf)
8680 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8681 if (IsF80) {
8682 // Recognize unsupported values as NaNs for compatibility with glibc.
8683 // In them (exp(V)==0) == int_bit.
8684 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8685 SDValue ExpIsZero =
8686 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8687 SDValue IsPseudo =
8688 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8689 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8690 }
8691 } else if (PartialCheck == fcQNan) {
8692 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8693 PartialRes =
8694 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8695 } else { // ISD::fcSNan
8696 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8697 // abs(V) < (unsigned(Inf) | quiet_bit)
8698 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8699 SDValue IsNotQnan =
8700 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8701 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8702 }
8703 appendResult(PartialRes);
8704 }
8705
8706 if (unsigned PartialCheck = Test & fcNormal) {
8707 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8708 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8709 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8710 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8711 APInt ExpLimit = ExpMask - ExpLSB;
8712 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8713 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8714 if (PartialCheck == fcNegNormal)
8715 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8716 else if (PartialCheck == fcPosNormal) {
8717 SDValue PosSignV =
8718 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8719 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8720 }
8721 if (IsF80)
8722 PartialRes =
8723 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8724 appendResult(PartialRes);
8725 }
8726
8727 if (!Res)
8728 return DAG.getConstant(IsInverted, DL, ResultVT);
8729 if (IsInverted)
8730 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8731 return Res;
8732}
8733
8734// Only expand vector types if we have the appropriate vector bit operations.
8735static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8736 assert(VT.isVector() && "Expected vector type");
8737 unsigned Len = VT.getScalarSizeInBits();
8738 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8741 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8743}
8744
8746 SDLoc dl(Node);
8747 EVT VT = Node->getValueType(0);
8748 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8749 SDValue Op = Node->getOperand(0);
8750 unsigned Len = VT.getScalarSizeInBits();
8751 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8752
8753 // TODO: Add support for irregular type lengths.
8754 if (!(Len <= 128 && Len % 8 == 0))
8755 return SDValue();
8756
8757 // Only expand vector types if we have the appropriate vector bit operations.
8758 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8759 return SDValue();
8760
8761 // This is the "best" algorithm from
8762 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8763 SDValue Mask55 =
8764 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8765 SDValue Mask33 =
8766 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8767 SDValue Mask0F =
8768 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8769
8770 // v = v - ((v >> 1) & 0x55555555...)
8771 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8772 DAG.getNode(ISD::AND, dl, VT,
8773 DAG.getNode(ISD::SRL, dl, VT, Op,
8774 DAG.getConstant(1, dl, ShVT)),
8775 Mask55));
8776 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8777 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8778 DAG.getNode(ISD::AND, dl, VT,
8779 DAG.getNode(ISD::SRL, dl, VT, Op,
8780 DAG.getConstant(2, dl, ShVT)),
8781 Mask33));
8782 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8783 Op = DAG.getNode(ISD::AND, dl, VT,
8784 DAG.getNode(ISD::ADD, dl, VT, Op,
8785 DAG.getNode(ISD::SRL, dl, VT, Op,
8786 DAG.getConstant(4, dl, ShVT))),
8787 Mask0F);
8788
8789 if (Len <= 8)
8790 return Op;
8791
8792 // Avoid the multiply if we only have 2 bytes to add.
8793 // TODO: Only doing this for scalars because vectors weren't as obviously
8794 // improved.
8795 if (Len == 16 && !VT.isVector()) {
8796 // v = (v + (v >> 8)) & 0x00FF;
8797 return DAG.getNode(ISD::AND, dl, VT,
8798 DAG.getNode(ISD::ADD, dl, VT, Op,
8799 DAG.getNode(ISD::SRL, dl, VT, Op,
8800 DAG.getConstant(8, dl, ShVT))),
8801 DAG.getConstant(0xFF, dl, VT));
8802 }
8803
8804 // v = (v * 0x01010101...) >> (Len - 8)
8805 SDValue V;
8808 SDValue Mask01 =
8809 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8810 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8811 } else {
8812 V = Op;
8813 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8814 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8815 V = DAG.getNode(ISD::ADD, dl, VT, V,
8816 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8817 }
8818 }
8819 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
8820}
8821
8823 SDLoc dl(Node);
8824 EVT VT = Node->getValueType(0);
8825 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8826 SDValue Op = Node->getOperand(0);
8827 SDValue Mask = Node->getOperand(1);
8828 SDValue VL = Node->getOperand(2);
8829 unsigned Len = VT.getScalarSizeInBits();
8830 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8831
8832 // TODO: Add support for irregular type lengths.
8833 if (!(Len <= 128 && Len % 8 == 0))
8834 return SDValue();
8835
8836 // This is same algorithm of expandCTPOP from
8837 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8838 SDValue Mask55 =
8839 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8840 SDValue Mask33 =
8841 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8842 SDValue Mask0F =
8843 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8844
8845 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8846
8847 // v = v - ((v >> 1) & 0x55555555...)
8848 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8849 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8850 DAG.getConstant(1, dl, ShVT), Mask, VL),
8851 Mask55, Mask, VL);
8852 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8853
8854 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8855 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8856 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8857 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8858 DAG.getConstant(2, dl, ShVT), Mask, VL),
8859 Mask33, Mask, VL);
8860 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8861
8862 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8863 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8864 Mask, VL),
8865 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8866 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8867
8868 if (Len <= 8)
8869 return Op;
8870
8871 // v = (v * 0x01010101...) >> (Len - 8)
8872 SDValue V;
8874 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8875 SDValue Mask01 =
8876 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8877 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
8878 } else {
8879 V = Op;
8880 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8881 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8882 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
8883 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
8884 Mask, VL);
8885 }
8886 }
8887 return DAG.getNode(ISD::VP_LSHR, dl, VT, V,
8888 DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8889}
8890
8892 SDLoc dl(Node);
8893 EVT VT = Node->getValueType(0);
8894 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8895 SDValue Op = Node->getOperand(0);
8896 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8897
8898 // If the non-ZERO_UNDEF version is supported we can use that instead.
8899 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8901 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8902
8903 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8905 EVT SetCCVT =
8906 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8907 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8908 SDValue Zero = DAG.getConstant(0, dl, VT);
8909 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8910 return DAG.getSelect(dl, VT, SrcIsZero,
8911 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8912 }
8913
8914 // Only expand vector types if we have the appropriate vector bit operations.
8915 // This includes the operations needed to expand CTPOP if it isn't supported.
8916 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8918 !canExpandVectorCTPOP(*this, VT)) ||
8921 return SDValue();
8922
8923 // for now, we do this:
8924 // x = x | (x >> 1);
8925 // x = x | (x >> 2);
8926 // ...
8927 // x = x | (x >>16);
8928 // x = x | (x >>32); // for 64-bit input
8929 // return popcount(~x);
8930 //
8931 // Ref: "Hacker's Delight" by Henry Warren
8932 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8933 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8934 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8935 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8936 }
8937 Op = DAG.getNOT(dl, Op, VT);
8938 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8939}
8940
8942 SDLoc dl(Node);
8943 EVT VT = Node->getValueType(0);
8944 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8945 SDValue Op = Node->getOperand(0);
8946 SDValue Mask = Node->getOperand(1);
8947 SDValue VL = Node->getOperand(2);
8948 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8949
8950 // do this:
8951 // x = x | (x >> 1);
8952 // x = x | (x >> 2);
8953 // ...
8954 // x = x | (x >>16);
8955 // x = x | (x >>32); // for 64-bit input
8956 // return popcount(~x);
8957 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8958 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8959 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8960 DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8961 VL);
8962 }
8963 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8964 VL);
8965 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8966}
8967
8969 const SDLoc &DL, EVT VT, SDValue Op,
8970 unsigned BitWidth) const {
8971 if (BitWidth != 32 && BitWidth != 64)
8972 return SDValue();
8973 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8974 : APInt(64, 0x0218A392CD3D5DBFULL);
8975 const DataLayout &TD = DAG.getDataLayout();
8976 MachinePointerInfo PtrInfo =
8978 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8979 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8980 SDValue Lookup = DAG.getNode(
8981 ISD::SRL, DL, VT,
8982 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8983 DAG.getConstant(DeBruijn, DL, VT)),
8984 DAG.getConstant(ShiftAmt, DL, VT));
8986
8988 for (unsigned i = 0; i < BitWidth; i++) {
8989 APInt Shl = DeBruijn.shl(i);
8990 APInt Lshr = Shl.lshr(ShiftAmt);
8991 Table[Lshr.getZExtValue()] = i;
8992 }
8993
8994 // Create a ConstantArray in Constant Pool
8995 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8996 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8997 TD.getPrefTypeAlign(CA->getType()));
8998 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8999 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
9000 PtrInfo, MVT::i8);
9001 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
9002 return ExtLoad;
9003
9004 EVT SetCCVT =
9005 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9006 SDValue Zero = DAG.getConstant(0, DL, VT);
9007 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
9008 return DAG.getSelect(DL, VT, SrcIsZero,
9009 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
9010}
9011
9013 SDLoc dl(Node);
9014 EVT VT = Node->getValueType(0);
9015 SDValue Op = Node->getOperand(0);
9016 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
9017
9018 // If the non-ZERO_UNDEF version is supported we can use that instead.
9019 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
9021 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
9022
9023 // If the ZERO_UNDEF version is supported use that and handle the zero case.
9025 EVT SetCCVT =
9026 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9027 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
9028 SDValue Zero = DAG.getConstant(0, dl, VT);
9029 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
9030 return DAG.getSelect(dl, VT, SrcIsZero,
9031 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
9032 }
9033
9034 // Only expand vector types if we have the appropriate vector bit operations.
9035 // This includes the operations needed to expand CTPOP if it isn't supported.
9036 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
9039 !canExpandVectorCTPOP(*this, VT)) ||
9043 return SDValue();
9044
9045 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9046 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9048 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
9049 return V;
9050
9051 // for now, we use: { return popcount(~x & (x - 1)); }
9052 // unless the target has ctlz but not ctpop, in which case we use:
9053 // { return 32 - nlz(~x & (x-1)); }
9054 // Ref: "Hacker's Delight" by Henry Warren
9055 SDValue Tmp = DAG.getNode(
9056 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
9057 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
9058
9059 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
9061 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
9062 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
9063 }
9064
9065 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9066}
9067
9069 SDValue Op = Node->getOperand(0);
9070 SDValue Mask = Node->getOperand(1);
9071 SDValue VL = Node->getOperand(2);
9072 SDLoc dl(Node);
9073 EVT VT = Node->getValueType(0);
9074
9075 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9076 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9077 DAG.getConstant(-1, dl, VT), Mask, VL);
9078 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9079 DAG.getConstant(1, dl, VT), Mask, VL);
9080 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9081 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9082}
9083
9085 SelectionDAG &DAG) const {
9086 // %cond = to_bool_vec %source
9087 // %splat = splat /*val=*/VL
9088 // %tz = step_vector
9089 // %v = vp.select %cond, /*true=*/tz, /*false=*/%splat
9090 // %r = vp.reduce.umin %v
9091 SDLoc DL(N);
9092 SDValue Source = N->getOperand(0);
9093 SDValue Mask = N->getOperand(1);
9094 SDValue EVL = N->getOperand(2);
9095 EVT SrcVT = Source.getValueType();
9096 EVT ResVT = N->getValueType(0);
9097 EVT ResVecVT =
9098 EVT::getVectorVT(*DAG.getContext(), ResVT, SrcVT.getVectorElementCount());
9099
9100 // Convert to boolean vector.
9101 if (SrcVT.getScalarType() != MVT::i1) {
9102 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
9103 SrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
9104 SrcVT.getVectorElementCount());
9105 Source = DAG.getNode(ISD::VP_SETCC, DL, SrcVT, Source, AllZero,
9106 DAG.getCondCode(ISD::SETNE), Mask, EVL);
9107 }
9108
9109 SDValue ExtEVL = DAG.getZExtOrTrunc(EVL, DL, ResVT);
9110 SDValue Splat = DAG.getSplat(ResVecVT, DL, ExtEVL);
9111 SDValue StepVec = DAG.getStepVector(DL, ResVecVT);
9112 SDValue Select =
9113 DAG.getNode(ISD::VP_SELECT, DL, ResVecVT, Source, StepVec, Splat, EVL);
9114 return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
9115}
9116
9118 bool IsNegative) const {
9119 SDLoc dl(N);
9120 EVT VT = N->getValueType(0);
9121 SDValue Op = N->getOperand(0);
9122
9123 // abs(x) -> smax(x,sub(0,x))
9124 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9126 SDValue Zero = DAG.getConstant(0, dl, VT);
9127 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9128 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9129 }
9130
9131 // abs(x) -> umin(x,sub(0,x))
9132 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9134 SDValue Zero = DAG.getConstant(0, dl, VT);
9135 Op = DAG.getFreeze(Op);
9136 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9137 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9138 }
9139
9140 // 0 - abs(x) -> smin(x, sub(0,x))
9141 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9143 Op = DAG.getFreeze(Op);
9144 SDValue Zero = DAG.getConstant(0, dl, VT);
9145 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9146 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9147 }
9148
9149 // Only expand vector types if we have the appropriate vector operations.
9150 if (VT.isVector() &&
9152 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9153 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9155 return SDValue();
9156
9157 Op = DAG.getFreeze(Op);
9158 SDValue Shift = DAG.getNode(
9159 ISD::SRA, dl, VT, Op,
9160 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9161 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9162
9163 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9164 if (!IsNegative)
9165 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9166
9167 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9168 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9169}
9170
9172 SDLoc dl(N);
9173 EVT VT = N->getValueType(0);
9174 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9175 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9176 bool IsSigned = N->getOpcode() == ISD::ABDS;
9177
9178 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9179 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9180 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9181 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9182 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9183 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9184 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9185 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9186 }
9187
9188 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9189 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9190 return DAG.getNode(ISD::OR, dl, VT,
9191 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9192 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9193
9194 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9195 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9196 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9198 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9199 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9200 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9201}
9202
9204 SDLoc dl(N);
9205 EVT VT = N->getValueType(0);
9206 SDValue Op = N->getOperand(0);
9207
9208 if (!VT.isSimple())
9209 return SDValue();
9210
9211 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9212 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9213 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9214 default:
9215 return SDValue();
9216 case MVT::i16:
9217 // Use a rotate by 8. This can be further expanded if necessary.
9218 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9219 case MVT::i32:
9220 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9221 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9222 DAG.getConstant(0xFF00, dl, VT));
9223 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9224 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9225 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9226 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9227 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9228 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9229 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9230 case MVT::i64:
9231 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9232 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9233 DAG.getConstant(255ULL<<8, dl, VT));
9234 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9235 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9236 DAG.getConstant(255ULL<<16, dl, VT));
9237 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9238 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9239 DAG.getConstant(255ULL<<24, dl, VT));
9240 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9241 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9242 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9243 DAG.getConstant(255ULL<<24, dl, VT));
9244 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9245 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9246 DAG.getConstant(255ULL<<16, dl, VT));
9247 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9248 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9249 DAG.getConstant(255ULL<<8, dl, VT));
9250 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9251 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9252 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9253 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9254 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9255 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9256 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9257 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9258 }
9259}
9260
9262 SDLoc dl(N);
9263 EVT VT = N->getValueType(0);
9264 SDValue Op = N->getOperand(0);
9265 SDValue Mask = N->getOperand(1);
9266 SDValue EVL = N->getOperand(2);
9267
9268 if (!VT.isSimple())
9269 return SDValue();
9270
9271 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9272 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9273 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9274 default:
9275 return SDValue();
9276 case MVT::i16:
9277 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9278 Mask, EVL);
9279 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9280 Mask, EVL);
9281 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9282 case MVT::i32:
9283 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9284 Mask, EVL);
9285 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9286 Mask, EVL);
9287 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9288 Mask, EVL);
9289 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9290 Mask, EVL);
9291 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9292 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9293 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9294 Mask, EVL);
9295 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9296 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9297 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9298 case MVT::i64:
9299 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9300 Mask, EVL);
9301 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9302 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9303 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9304 Mask, EVL);
9305 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9306 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9307 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9308 Mask, EVL);
9309 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9310 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9311 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9312 Mask, EVL);
9313 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9314 Mask, EVL);
9315 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9316 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9317 Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9318 Mask, EVL);
9319 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9320 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9321 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9322 Mask, EVL);
9323 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9324 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9325 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9326 Mask, EVL);
9327 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9328 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9329 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9330 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9331 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9332 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9333 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9334 }
9335}
9336
9338 SDLoc dl(N);
9339 EVT VT = N->getValueType(0);
9340 SDValue Op = N->getOperand(0);
9341 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9342 unsigned Sz = VT.getScalarSizeInBits();
9343
9344 SDValue Tmp, Tmp2, Tmp3;
9345
9346 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9347 // and finally the i1 pairs.
9348 // TODO: We can easily support i4/i2 legal types if any target ever does.
9349 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9350 // Create the masks - repeating the pattern every byte.
9351 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9352 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9353 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9354
9355 // BSWAP if the type is wider than a single byte.
9356 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9357
9358 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9359 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9360 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9361 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9362 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9363 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9364
9365 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9366 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9367 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9368 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9369 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9370 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9371
9372 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9373 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9374 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9375 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9376 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9377 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9378 return Tmp;
9379 }
9380
9381 Tmp = DAG.getConstant(0, dl, VT);
9382 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9383 if (I < J)
9384 Tmp2 =
9385 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9386 else
9387 Tmp2 =
9388 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9389
9390 APInt Shift = APInt::getOneBitSet(Sz, J);
9391 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9392 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9393 }
9394
9395 return Tmp;
9396}
9397
9399 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9400
9401 SDLoc dl(N);
9402 EVT VT = N->getValueType(0);
9403 SDValue Op = N->getOperand(0);
9404 SDValue Mask = N->getOperand(1);
9405 SDValue EVL = N->getOperand(2);
9406 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9407 unsigned Sz = VT.getScalarSizeInBits();
9408
9409 SDValue Tmp, Tmp2, Tmp3;
9410
9411 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9412 // and finally the i1 pairs.
9413 // TODO: We can easily support i4/i2 legal types if any target ever does.
9414 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9415 // Create the masks - repeating the pattern every byte.
9416 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9417 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9418 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9419
9420 // BSWAP if the type is wider than a single byte.
9421 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9422
9423 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9424 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9425 Mask, EVL);
9426 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9427 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9428 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9429 Mask, EVL);
9430 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9431 Mask, EVL);
9432 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9433
9434 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9435 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9436 Mask, EVL);
9437 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9438 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9439 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9440 Mask, EVL);
9441 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9442 Mask, EVL);
9443 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9444
9445 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9446 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9447 Mask, EVL);
9448 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9449 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9450 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9451 Mask, EVL);
9452 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9453 Mask, EVL);
9454 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9455 return Tmp;
9456 }
9457 return SDValue();
9458}
9459
9460std::pair<SDValue, SDValue>
9462 SelectionDAG &DAG) const {
9463 SDLoc SL(LD);
9464 SDValue Chain = LD->getChain();
9465 SDValue BasePTR = LD->getBasePtr();
9466 EVT SrcVT = LD->getMemoryVT();
9467 EVT DstVT = LD->getValueType(0);
9468 ISD::LoadExtType ExtType = LD->getExtensionType();
9469
9470 if (SrcVT.isScalableVector())
9471 report_fatal_error("Cannot scalarize scalable vector loads");
9472
9473 unsigned NumElem = SrcVT.getVectorNumElements();
9474
9475 EVT SrcEltVT = SrcVT.getScalarType();
9476 EVT DstEltVT = DstVT.getScalarType();
9477
9478 // A vector must always be stored in memory as-is, i.e. without any padding
9479 // between the elements, since various code depend on it, e.g. in the
9480 // handling of a bitcast of a vector type to int, which may be done with a
9481 // vector store followed by an integer load. A vector that does not have
9482 // elements that are byte-sized must therefore be stored as an integer
9483 // built out of the extracted vector elements.
9484 if (!SrcEltVT.isByteSized()) {
9485 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9486 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9487
9488 unsigned NumSrcBits = SrcVT.getSizeInBits();
9489 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9490
9491 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9492 SDValue SrcEltBitMask = DAG.getConstant(
9493 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9494
9495 // Load the whole vector and avoid masking off the top bits as it makes
9496 // the codegen worse.
9497 SDValue Load =
9498 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9499 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9500 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9501
9503 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9504 unsigned ShiftIntoIdx =
9505 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9506 SDValue ShiftAmount =
9507 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9508 LoadVT, SL, /*LegalTypes=*/false);
9509 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9510 SDValue Elt =
9511 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9512 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9513
9514 if (ExtType != ISD::NON_EXTLOAD) {
9515 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9516 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9517 }
9518
9519 Vals.push_back(Scalar);
9520 }
9521
9522 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9523 return std::make_pair(Value, Load.getValue(1));
9524 }
9525
9526 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9527 assert(SrcEltVT.isByteSized());
9528
9530 SmallVector<SDValue, 8> LoadChains;
9531
9532 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9533 SDValue ScalarLoad =
9534 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9535 LD->getPointerInfo().getWithOffset(Idx * Stride),
9536 SrcEltVT, LD->getOriginalAlign(),
9537 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9538
9539 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9540
9541 Vals.push_back(ScalarLoad.getValue(0));
9542 LoadChains.push_back(ScalarLoad.getValue(1));
9543 }
9544
9545 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9546 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9547
9548 return std::make_pair(Value, NewChain);
9549}
9550
9552 SelectionDAG &DAG) const {
9553 SDLoc SL(ST);
9554
9555 SDValue Chain = ST->getChain();
9556 SDValue BasePtr = ST->getBasePtr();
9557 SDValue Value = ST->getValue();
9558 EVT StVT = ST->getMemoryVT();
9559
9560 if (StVT.isScalableVector())
9561 report_fatal_error("Cannot scalarize scalable vector stores");
9562
9563 // The type of the data we want to save
9564 EVT RegVT = Value.getValueType();
9565 EVT RegSclVT = RegVT.getScalarType();
9566
9567 // The type of data as saved in memory.
9568 EVT MemSclVT = StVT.getScalarType();
9569
9570 unsigned NumElem = StVT.getVectorNumElements();
9571
9572 // A vector must always be stored in memory as-is, i.e. without any padding
9573 // between the elements, since various code depend on it, e.g. in the
9574 // handling of a bitcast of a vector type to int, which may be done with a
9575 // vector store followed by an integer load. A vector that does not have
9576 // elements that are byte-sized must therefore be stored as an integer
9577 // built out of the extracted vector elements.
9578 if (!MemSclVT.isByteSized()) {
9579 unsigned NumBits = StVT.getSizeInBits();
9580 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9581
9582 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9583
9584 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9585 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9586 DAG.getVectorIdxConstant(Idx, SL));
9587 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9588 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9589 unsigned ShiftIntoIdx =
9590 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9591 SDValue ShiftAmount =
9592 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9593 SDValue ShiftedElt =
9594 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9595 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9596 }
9597
9598 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9599 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9600 ST->getAAInfo());
9601 }
9602
9603 // Store Stride in bytes
9604 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9605 assert(Stride && "Zero stride!");
9606 // Extract each of the elements from the original vector and save them into
9607 // memory individually.
9609 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9610 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9611 DAG.getVectorIdxConstant(Idx, SL));
9612
9613 SDValue Ptr =
9614 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9615
9616 // This scalar TruncStore may be illegal, but we legalize it later.
9617 SDValue Store = DAG.getTruncStore(
9618 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9619 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9620 ST->getAAInfo());
9621
9622 Stores.push_back(Store);
9623 }
9624
9625 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9626}
9627
9628std::pair<SDValue, SDValue>
9630 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9631 "unaligned indexed loads not implemented!");
9632 SDValue Chain = LD->getChain();
9633 SDValue Ptr = LD->getBasePtr();
9634 EVT VT = LD->getValueType(0);
9635 EVT LoadedVT = LD->getMemoryVT();
9636 SDLoc dl(LD);
9637 auto &MF = DAG.getMachineFunction();
9638
9639 if (VT.isFloatingPoint() || VT.isVector()) {
9640 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9641 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9642 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9643 LoadedVT.isVector()) {
9644 // Scalarize the load and let the individual components be handled.
9645 return scalarizeVectorLoad(LD, DAG);
9646 }
9647
9648 // Expand to a (misaligned) integer load of the same size,
9649 // then bitconvert to floating point or vector.
9650 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9651 LD->getMemOperand());
9652 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9653 if (LoadedVT != VT)
9654 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9655 ISD::ANY_EXTEND, dl, VT, Result);
9656
9657 return std::make_pair(Result, newLoad.getValue(1));
9658 }
9659
9660 // Copy the value to a (aligned) stack slot using (unaligned) integer
9661 // loads and stores, then do a (aligned) load from the stack slot.
9662 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9663 unsigned LoadedBytes = LoadedVT.getStoreSize();
9664 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9665 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9666
9667 // Make sure the stack slot is also aligned for the register type.
9668 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9669 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9671 SDValue StackPtr = StackBase;
9672 unsigned Offset = 0;
9673
9674 EVT PtrVT = Ptr.getValueType();
9675 EVT StackPtrVT = StackPtr.getValueType();
9676
9677 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9678 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9679
9680 // Do all but one copies using the full register width.
9681 for (unsigned i = 1; i < NumRegs; i++) {
9682 // Load one integer register's worth from the original location.
9683 SDValue Load = DAG.getLoad(
9684 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9685 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9686 LD->getAAInfo());
9687 // Follow the load with a store to the stack slot. Remember the store.
9688 Stores.push_back(DAG.getStore(
9689 Load.getValue(1), dl, Load, StackPtr,
9690 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9691 // Increment the pointers.
9692 Offset += RegBytes;
9693
9694 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9695 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9696 }
9697
9698 // The last copy may be partial. Do an extending load.
9699 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9700 8 * (LoadedBytes - Offset));
9701 SDValue Load =
9702 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9703 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9704 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9705 LD->getAAInfo());
9706 // Follow the load with a store to the stack slot. Remember the store.
9707 // On big-endian machines this requires a truncating store to ensure
9708 // that the bits end up in the right place.
9709 Stores.push_back(DAG.getTruncStore(
9710 Load.getValue(1), dl, Load, StackPtr,
9711 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9712
9713 // The order of the stores doesn't matter - say it with a TokenFactor.
9714 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9715
9716 // Finally, perform the original load only redirected to the stack slot.
9717 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9718 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9719 LoadedVT);
9720
9721 // Callers expect a MERGE_VALUES node.
9722 return std::make_pair(Load, TF);
9723 }
9724
9725 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9726 "Unaligned load of unsupported type.");
9727
9728 // Compute the new VT that is half the size of the old one. This is an
9729 // integer MVT.
9730 unsigned NumBits = LoadedVT.getSizeInBits();
9731 EVT NewLoadedVT;
9732 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9733 NumBits >>= 1;
9734
9735 Align Alignment = LD->getOriginalAlign();
9736 unsigned IncrementSize = NumBits / 8;
9737 ISD::LoadExtType HiExtType = LD->getExtensionType();
9738
9739 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9740 if (HiExtType == ISD::NON_EXTLOAD)
9741 HiExtType = ISD::ZEXTLOAD;
9742
9743 // Load the value in two parts
9744 SDValue Lo, Hi;
9745 if (DAG.getDataLayout().isLittleEndian()) {
9746 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9747 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9748 LD->getAAInfo());
9749
9750 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9751 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9752 LD->getPointerInfo().getWithOffset(IncrementSize),
9753 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9754 LD->getAAInfo());
9755 } else {
9756 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9757 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9758 LD->getAAInfo());
9759
9760 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9761 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9762 LD->getPointerInfo().getWithOffset(IncrementSize),
9763 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9764 LD->getAAInfo());
9765 }
9766
9767 // aggregate the two parts
9768 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9769 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9770 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9771
9772 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9773 Hi.getValue(1));
9774
9775 return std::make_pair(Result, TF);
9776}
9777
9779 SelectionDAG &DAG) const {
9780 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9781 "unaligned indexed stores not implemented!");
9782 SDValue Chain = ST->getChain();
9783 SDValue Ptr = ST->getBasePtr();
9784 SDValue Val = ST->getValue();
9785 EVT VT = Val.getValueType();
9786 Align Alignment = ST->getOriginalAlign();
9787 auto &MF = DAG.getMachineFunction();
9788 EVT StoreMemVT = ST->getMemoryVT();
9789
9790 SDLoc dl(ST);
9791 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9792 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9793 if (isTypeLegal(intVT)) {
9794 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9795 StoreMemVT.isVector()) {
9796 // Scalarize the store and let the individual components be handled.
9797 SDValue Result = scalarizeVectorStore(ST, DAG);
9798 return Result;
9799 }
9800 // Expand to a bitconvert of the value to the integer type of the
9801 // same size, then a (misaligned) int store.
9802 // FIXME: Does not handle truncating floating point stores!
9803 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9804 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9805 Alignment, ST->getMemOperand()->getFlags());
9806 return Result;
9807 }
9808 // Do a (aligned) store to a stack slot, then copy from the stack slot
9809 // to the final destination using (unaligned) integer loads and stores.
9810 MVT RegVT = getRegisterType(
9811 *DAG.getContext(),
9812 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9813 EVT PtrVT = Ptr.getValueType();
9814 unsigned StoredBytes = StoreMemVT.getStoreSize();
9815 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9816 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9817
9818 // Make sure the stack slot is also aligned for the register type.
9819 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9820 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9821
9822 // Perform the original store, only redirected to the stack slot.
9823 SDValue Store = DAG.getTruncStore(
9824 Chain, dl, Val, StackPtr,
9825 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9826
9827 EVT StackPtrVT = StackPtr.getValueType();
9828
9829 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9830 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9832 unsigned Offset = 0;
9833
9834 // Do all but one copies using the full register width.
9835 for (unsigned i = 1; i < NumRegs; i++) {
9836 // Load one integer register's worth from the stack slot.
9837 SDValue Load = DAG.getLoad(
9838 RegVT, dl, Store, StackPtr,
9839 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9840 // Store it to the final location. Remember the store.
9841 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9842 ST->getPointerInfo().getWithOffset(Offset),
9843 ST->getOriginalAlign(),
9844 ST->getMemOperand()->getFlags()));
9845 // Increment the pointers.
9846 Offset += RegBytes;
9847 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9848 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9849 }
9850
9851 // The last store may be partial. Do a truncating store. On big-endian
9852 // machines this requires an extending load from the stack slot to ensure
9853 // that the bits are in the right place.
9854 EVT LoadMemVT =
9855 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9856
9857 // Load from the stack slot.
9858 SDValue Load = DAG.getExtLoad(
9859 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9860 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9861
9862 Stores.push_back(
9863 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9864 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9865 ST->getOriginalAlign(),
9866 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9867 // The order of the stores doesn't matter - say it with a TokenFactor.
9868 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9869 return Result;
9870 }
9871
9872 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9873 "Unaligned store of unknown type.");
9874 // Get the half-size VT
9875 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9876 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9877 unsigned IncrementSize = NumBits / 8;
9878
9879 // Divide the stored value in two parts.
9880 SDValue ShiftAmount =
9881 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
9882 SDValue Lo = Val;
9883 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9884 // fold and not use the upper bits. A smaller constant may be easier to
9885 // materialize.
9886 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9887 Lo = DAG.getNode(
9888 ISD::AND, dl, VT, Lo,
9889 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9890 VT));
9891 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9892
9893 // Store the two parts
9894 SDValue Store1, Store2;
9895 Store1 = DAG.getTruncStore(Chain, dl,
9896 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9897 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9898 ST->getMemOperand()->getFlags());
9899
9900 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9901 Store2 = DAG.getTruncStore(
9902 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9903 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9904 ST->getMemOperand()->getFlags(), ST->getAAInfo());
9905
9906 SDValue Result =
9907 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9908 return Result;
9909}
9910
9911SDValue
9913 const SDLoc &DL, EVT DataVT,
9914 SelectionDAG &DAG,
9915 bool IsCompressedMemory) const {
9916 SDValue Increment;
9917 EVT AddrVT = Addr.getValueType();
9918 EVT MaskVT = Mask.getValueType();
9919 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9920 "Incompatible types of Data and Mask");
9921 if (IsCompressedMemory) {
9922 if (DataVT.isScalableVector())
9924 "Cannot currently handle compressed memory with scalable vectors");
9925 // Incrementing the pointer according to number of '1's in the mask.
9926 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9927 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9928 if (MaskIntVT.getSizeInBits() < 32) {
9929 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9930 MaskIntVT = MVT::i32;
9931 }
9932
9933 // Count '1's with POPCNT.
9934 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9935 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9936 // Scale is an element size in bytes.
9937 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9938 AddrVT);
9939 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9940 } else if (DataVT.isScalableVector()) {
9941 Increment = DAG.getVScale(DL, AddrVT,
9942 APInt(AddrVT.getFixedSizeInBits(),
9943 DataVT.getStoreSize().getKnownMinValue()));
9944 } else
9945 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9946
9947 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9948}
9949
9951 EVT VecVT, const SDLoc &dl,
9952 ElementCount SubEC) {
9953 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9954 "Cannot index a scalable vector within a fixed-width vector");
9955
9956 unsigned NElts = VecVT.getVectorMinNumElements();
9957 unsigned NumSubElts = SubEC.getKnownMinValue();
9958 EVT IdxVT = Idx.getValueType();
9959
9960 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9961 // If this is a constant index and we know the value plus the number of the
9962 // elements in the subvector minus one is less than the minimum number of
9963 // elements then it's safe to return Idx.
9964 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9965 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9966 return Idx;
9967 SDValue VS =
9968 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9969 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9970 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9971 DAG.getConstant(NumSubElts, dl, IdxVT));
9972 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9973 }
9974 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9975 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9976 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9977 DAG.getConstant(Imm, dl, IdxVT));
9978 }
9979 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9980 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9981 DAG.getConstant(MaxIndex, dl, IdxVT));
9982}
9983
9985 SDValue VecPtr, EVT VecVT,
9986 SDValue Index) const {
9987 return getVectorSubVecPointer(
9988 DAG, VecPtr, VecVT,
9990 Index);
9991}
9992
9994 SDValue VecPtr, EVT VecVT,
9995 EVT SubVecVT,
9996 SDValue Index) const {
9997 SDLoc dl(Index);
9998 // Make sure the index type is big enough to compute in.
9999 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
10000
10001 EVT EltVT = VecVT.getVectorElementType();
10002
10003 // Calculate the element offset and add it to the pointer.
10004 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
10005 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
10006 "Converting bits to bytes lost precision");
10007 assert(SubVecVT.getVectorElementType() == EltVT &&
10008 "Sub-vector must be a vector with matching element type");
10009 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
10010 SubVecVT.getVectorElementCount());
10011
10012 EVT IdxVT = Index.getValueType();
10013 if (SubVecVT.isScalableVector())
10014 Index =
10015 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10016 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
10017
10018 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
10019 DAG.getConstant(EltSize, dl, IdxVT));
10020 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
10021}
10022
10023//===----------------------------------------------------------------------===//
10024// Implementation of Emulated TLS Model
10025//===----------------------------------------------------------------------===//
10026
10028 SelectionDAG &DAG) const {
10029 // Access to address of TLS varialbe xyz is lowered to a function call:
10030 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
10031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10032 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
10033 SDLoc dl(GA);
10034
10035 ArgListTy Args;
10036 ArgListEntry Entry;
10037 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
10038 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
10039 StringRef EmuTlsVarName(NameString);
10040 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
10041 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
10042 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
10043 Entry.Ty = VoidPtrType;
10044 Args.push_back(Entry);
10045
10046 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
10047
10049 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
10050 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
10051 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
10052
10053 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
10054 // At last for X86 targets, maybe good for other targets too?
10056 MFI.setAdjustsStack(true); // Is this only for X86 target?
10057 MFI.setHasCalls(true);
10058
10059 assert((GA->getOffset() == 0) &&
10060 "Emulated TLS must have zero offset in GlobalAddressSDNode");
10061 return CallResult.first;
10062}
10063
10065 SelectionDAG &DAG) const {
10066 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
10067 if (!isCtlzFast())
10068 return SDValue();
10069 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10070 SDLoc dl(Op);
10071 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
10072 EVT VT = Op.getOperand(0).getValueType();
10073 SDValue Zext = Op.getOperand(0);
10074 if (VT.bitsLT(MVT::i32)) {
10075 VT = MVT::i32;
10076 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
10077 }
10078 unsigned Log2b = Log2_32(VT.getSizeInBits());
10079 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
10080 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
10081 DAG.getConstant(Log2b, dl, MVT::i32));
10082 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
10083 }
10084 return SDValue();
10085}
10086
10088 SDValue Op0 = Node->getOperand(0);
10089 SDValue Op1 = Node->getOperand(1);
10090 EVT VT = Op0.getValueType();
10091 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10092 unsigned Opcode = Node->getOpcode();
10093 SDLoc DL(Node);
10094
10095 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
10096 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10098 Op0 = DAG.getFreeze(Op0);
10099 SDValue Zero = DAG.getConstant(0, DL, VT);
10100 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10101 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10102 }
10103
10104 // umin(x,y) -> sub(x,usubsat(x,y))
10105 // TODO: Missing freeze(Op0)?
10106 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10108 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10109 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10110 }
10111
10112 // umax(x,y) -> add(x,usubsat(y,x))
10113 // TODO: Missing freeze(Op0)?
10114 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10116 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10117 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10118 }
10119
10120 // FIXME: Should really try to split the vector in case it's legal on a
10121 // subvector.
10123 return DAG.UnrollVectorOp(Node);
10124
10125 // Attempt to find an existing SETCC node that we can reuse.
10126 // TODO: Do we need a generic doesSETCCNodeExist?
10127 // TODO: Missing freeze(Op0)/freeze(Op1)?
10128 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10129 ISD::CondCode PrefCommuteCC,
10130 ISD::CondCode AltCommuteCC) {
10131 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10132 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10133 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10134 {Op0, Op1, DAG.getCondCode(CC)})) {
10135 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10136 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10137 }
10138 }
10139 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10140 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10141 {Op0, Op1, DAG.getCondCode(CC)})) {
10142 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10143 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10144 }
10145 }
10146 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10147 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10148 };
10149
10150 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10151 // -> Y = (A < B) ? B : A
10152 // -> Y = (A >= B) ? A : B
10153 // -> Y = (A <= B) ? B : A
10154 switch (Opcode) {
10155 case ISD::SMAX:
10156 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10157 case ISD::SMIN:
10158 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10159 case ISD::UMAX:
10160 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10161 case ISD::UMIN:
10162 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10163 }
10164
10165 llvm_unreachable("How did we get here?");
10166}
10167
10169 unsigned Opcode = Node->getOpcode();
10170 SDValue LHS = Node->getOperand(0);
10171 SDValue RHS = Node->getOperand(1);
10172 EVT VT = LHS.getValueType();
10173 SDLoc dl(Node);
10174
10175 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10176 assert(VT.isInteger() && "Expected operands to be integers");
10177
10178 // usub.sat(a, b) -> umax(a, b) - b
10179 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10180 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10181 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10182 }
10183
10184 // uadd.sat(a, b) -> umin(a, ~b) + b
10185 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10186 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10187 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10188 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10189 }
10190
10191 unsigned OverflowOp;
10192 switch (Opcode) {
10193 case ISD::SADDSAT:
10194 OverflowOp = ISD::SADDO;
10195 break;
10196 case ISD::UADDSAT:
10197 OverflowOp = ISD::UADDO;
10198 break;
10199 case ISD::SSUBSAT:
10200 OverflowOp = ISD::SSUBO;
10201 break;
10202 case ISD::USUBSAT:
10203 OverflowOp = ISD::USUBO;
10204 break;
10205 default:
10206 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10207 "addition or subtraction node.");
10208 }
10209
10210 // FIXME: Should really try to split the vector in case it's legal on a
10211 // subvector.
10213 return DAG.UnrollVectorOp(Node);
10214
10215 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10216 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10217 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10218 SDValue SumDiff = Result.getValue(0);
10219 SDValue Overflow = Result.getValue(1);
10220 SDValue Zero = DAG.getConstant(0, dl, VT);
10221 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10222
10223 if (Opcode == ISD::UADDSAT) {
10225 // (LHS + RHS) | OverflowMask
10226 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10227 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10228 }
10229 // Overflow ? 0xffff.... : (LHS + RHS)
10230 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10231 }
10232
10233 if (Opcode == ISD::USUBSAT) {
10235 // (LHS - RHS) & ~OverflowMask
10236 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10237 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10238 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10239 }
10240 // Overflow ? 0 : (LHS - RHS)
10241 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10242 }
10243
10244 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10247
10248 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10249 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10250
10251 // If either of the operand signs are known, then they are guaranteed to
10252 // only saturate in one direction. If non-negative they will saturate
10253 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10254 //
10255 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10256 // sign of 'y' has to be flipped.
10257
10258 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10259 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10260 : KnownRHS.isNegative();
10261 if (LHSIsNonNegative || RHSIsNonNegative) {
10262 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10263 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10264 }
10265
10266 bool LHSIsNegative = KnownLHS.isNegative();
10267 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10268 : KnownRHS.isNonNegative();
10269 if (LHSIsNegative || RHSIsNegative) {
10270 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10271 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10272 }
10273 }
10274
10275 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10277 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10278 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10279 DAG.getConstant(BitWidth - 1, dl, VT));
10280 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10281 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10282}
10283
10285 unsigned Opcode = Node->getOpcode();
10286 bool IsSigned = Opcode == ISD::SSHLSAT;
10287 SDValue LHS = Node->getOperand(0);
10288 SDValue RHS = Node->getOperand(1);
10289 EVT VT = LHS.getValueType();
10290 SDLoc dl(Node);
10291
10292 assert((Node->getOpcode() == ISD::SSHLSAT ||
10293 Node->getOpcode() == ISD::USHLSAT) &&
10294 "Expected a SHLSAT opcode");
10295 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10296 assert(VT.isInteger() && "Expected operands to be integers");
10297
10299 return DAG.UnrollVectorOp(Node);
10300
10301 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10302
10303 unsigned BW = VT.getScalarSizeInBits();
10304 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10305 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10306 SDValue Orig =
10307 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10308
10309 SDValue SatVal;
10310 if (IsSigned) {
10311 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10312 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10313 SDValue Cond =
10314 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10315 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10316 } else {
10317 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10318 }
10319 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10320 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10321}
10322
10324 bool Signed, EVT WideVT,
10325 const SDValue LL, const SDValue LH,
10326 const SDValue RL, const SDValue RH,
10327 SDValue &Lo, SDValue &Hi) const {
10328 // We can fall back to a libcall with an illegal type for the MUL if we
10329 // have a libcall big enough.
10330 // Also, we can fall back to a division in some cases, but that's a big
10331 // performance hit in the general case.
10332 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10333 if (WideVT == MVT::i16)
10334 LC = RTLIB::MUL_I16;
10335 else if (WideVT == MVT::i32)
10336 LC = RTLIB::MUL_I32;
10337 else if (WideVT == MVT::i64)
10338 LC = RTLIB::MUL_I64;
10339 else if (WideVT == MVT::i128)
10340 LC = RTLIB::MUL_I128;
10341
10342 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10343 // We'll expand the multiplication by brute force because we have no other
10344 // options. This is a trivially-generalized version of the code from
10345 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10346 // 4.3.1).
10347 EVT VT = LL.getValueType();
10348 unsigned Bits = VT.getSizeInBits();
10349 unsigned HalfBits = Bits >> 1;
10350 SDValue Mask =
10351 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10352 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10353 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10354
10355 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10356 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10357
10358 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10359 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10360 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10361 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10362
10363 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10364 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10365 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10366 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10367
10368 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10369 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10370 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10371
10372 SDValue W =
10373 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10374 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10375 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10376 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10377
10378 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10379 DAG.getNode(ISD::ADD, dl, VT,
10380 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10381 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10382 } else {
10383 // Attempt a libcall.
10384 SDValue Ret;
10386 CallOptions.setSExt(Signed);
10387 CallOptions.setIsPostTypeLegalization(true);
10388 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10389 // Halves of WideVT are packed into registers in different order
10390 // depending on platform endianness. This is usually handled by
10391 // the C calling convention, but we can't defer to it in
10392 // the legalizer.
10393 SDValue Args[] = {LL, LH, RL, RH};
10394 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10395 } else {
10396 SDValue Args[] = {LH, LL, RH, RL};
10397 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10398 }
10399 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10400 "Ret value is a collection of constituent nodes holding result.");
10401 if (DAG.getDataLayout().isLittleEndian()) {
10402 // Same as above.
10403 Lo = Ret.getOperand(0);
10404 Hi = Ret.getOperand(1);
10405 } else {
10406 Lo = Ret.getOperand(1);
10407 Hi = Ret.getOperand(0);
10408 }
10409 }
10410}
10411
10413 bool Signed, const SDValue LHS,
10414 const SDValue RHS, SDValue &Lo,
10415 SDValue &Hi) const {
10416 EVT VT = LHS.getValueType();
10417 assert(RHS.getValueType() == VT && "Mismatching operand types");
10418
10419 SDValue HiLHS;
10420 SDValue HiRHS;
10421 if (Signed) {
10422 // The high part is obtained by SRA'ing all but one of the bits of low
10423 // part.
10424 unsigned LoSize = VT.getFixedSizeInBits();
10425 HiLHS = DAG.getNode(
10426 ISD::SRA, dl, VT, LHS,
10427 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10428 HiRHS = DAG.getNode(
10429 ISD::SRA, dl, VT, RHS,
10430 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10431 } else {
10432 HiLHS = DAG.getConstant(0, dl, VT);
10433 HiRHS = DAG.getConstant(0, dl, VT);
10434 }
10435 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10436 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10437}
10438
10439SDValue
10441 assert((Node->getOpcode() == ISD::SMULFIX ||
10442 Node->getOpcode() == ISD::UMULFIX ||
10443 Node->getOpcode() == ISD::SMULFIXSAT ||
10444 Node->getOpcode() == ISD::UMULFIXSAT) &&
10445 "Expected a fixed point multiplication opcode");
10446
10447 SDLoc dl(Node);
10448 SDValue LHS = Node->getOperand(0);
10449 SDValue RHS = Node->getOperand(1);
10450 EVT VT = LHS.getValueType();
10451 unsigned Scale = Node->getConstantOperandVal(2);
10452 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10453 Node->getOpcode() == ISD::UMULFIXSAT);
10454 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10455 Node->getOpcode() == ISD::SMULFIXSAT);
10456 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10457 unsigned VTSize = VT.getScalarSizeInBits();
10458
10459 if (!Scale) {
10460 // [us]mul.fix(a, b, 0) -> mul(a, b)
10461 if (!Saturating) {
10463 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10464 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10465 SDValue Result =
10466 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10467 SDValue Product = Result.getValue(0);
10468 SDValue Overflow = Result.getValue(1);
10469 SDValue Zero = DAG.getConstant(0, dl, VT);
10470
10471 APInt MinVal = APInt::getSignedMinValue(VTSize);
10472 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10473 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10474 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10475 // Xor the inputs, if resulting sign bit is 0 the product will be
10476 // positive, else negative.
10477 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10478 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10479 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10480 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10481 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10482 SDValue Result =
10483 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10484 SDValue Product = Result.getValue(0);
10485 SDValue Overflow = Result.getValue(1);
10486
10487 APInt MaxVal = APInt::getMaxValue(VTSize);
10488 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10489 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10490 }
10491 }
10492
10493 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10494 "Expected scale to be less than the number of bits if signed or at "
10495 "most the number of bits if unsigned.");
10496 assert(LHS.getValueType() == RHS.getValueType() &&
10497 "Expected both operands to be the same type");
10498
10499 // Get the upper and lower bits of the result.
10500 SDValue Lo, Hi;
10501 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10502 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10503 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10504 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10505 Lo = Result.getValue(0);
10506 Hi = Result.getValue(1);
10507 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10508 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10509 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10510 } else if (VT.isVector()) {
10511 return SDValue();
10512 } else {
10513 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10514 }
10515
10516 if (Scale == VTSize)
10517 // Result is just the top half since we'd be shifting by the width of the
10518 // operand. Overflow impossible so this works for both UMULFIX and
10519 // UMULFIXSAT.
10520 return Hi;
10521
10522 // The result will need to be shifted right by the scale since both operands
10523 // are scaled. The result is given to us in 2 halves, so we only want part of
10524 // both in the result.
10525 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10526 DAG.getShiftAmountConstant(Scale, VT, dl));
10527 if (!Saturating)
10528 return Result;
10529
10530 if (!Signed) {
10531 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10532 // widened multiplication) aren't all zeroes.
10533
10534 // Saturate to max if ((Hi >> Scale) != 0),
10535 // which is the same as if (Hi > ((1 << Scale) - 1))
10536 APInt MaxVal = APInt::getMaxValue(VTSize);
10537 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10538 dl, VT);
10539 Result = DAG.getSelectCC(dl, Hi, LowMask,
10540 DAG.getConstant(MaxVal, dl, VT), Result,
10541 ISD::SETUGT);
10542
10543 return Result;
10544 }
10545
10546 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10547 // widened multiplication) aren't all ones or all zeroes.
10548
10549 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10550 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10551
10552 if (Scale == 0) {
10553 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10554 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10555 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10556 // Saturated to SatMin if wide product is negative, and SatMax if wide
10557 // product is positive ...
10558 SDValue Zero = DAG.getConstant(0, dl, VT);
10559 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10560 ISD::SETLT);
10561 // ... but only if we overflowed.
10562 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10563 }
10564
10565 // We handled Scale==0 above so all the bits to examine is in Hi.
10566
10567 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10568 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10569 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10570 dl, VT);
10571 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10572 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10573 // which is the same as if (HI < (-1 << (Scale - 1))
10574 SDValue HighMask =
10575 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10576 dl, VT);
10577 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10578 return Result;
10579}
10580
10581SDValue
10583 SDValue LHS, SDValue RHS,
10584 unsigned Scale, SelectionDAG &DAG) const {
10585 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10586 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10587 "Expected a fixed point division opcode");
10588
10589 EVT VT = LHS.getValueType();
10590 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10591 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10592 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10593
10594 // If there is enough room in the type to upscale the LHS or downscale the
10595 // RHS before the division, we can perform it in this type without having to
10596 // resize. For signed operations, the LHS headroom is the number of
10597 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10598 // The headroom for the RHS is the number of trailing zeroes.
10599 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10601 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10602
10603 // For signed saturating operations, we need to be able to detect true integer
10604 // division overflow; that is, when you have MIN / -EPS. However, this
10605 // is undefined behavior and if we emit divisions that could take such
10606 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10607 // example).
10608 // Avoid this by requiring an extra bit so that we never get this case.
10609 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10610 // signed saturating division, we need to emit a whopping 32-bit division.
10611 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10612 return SDValue();
10613
10614 unsigned LHSShift = std::min(LHSLead, Scale);
10615 unsigned RHSShift = Scale - LHSShift;
10616
10617 // At this point, we know that if we shift the LHS up by LHSShift and the
10618 // RHS down by RHSShift, we can emit a regular division with a final scaling
10619 // factor of Scale.
10620
10621 if (LHSShift)
10622 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10623 DAG.getShiftAmountConstant(LHSShift, VT, dl));
10624 if (RHSShift)
10625 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10626 DAG.getShiftAmountConstant(RHSShift, VT, dl));
10627
10628 SDValue Quot;
10629 if (Signed) {
10630 // For signed operations, if the resulting quotient is negative and the
10631 // remainder is nonzero, subtract 1 from the quotient to round towards
10632 // negative infinity.
10633 SDValue Rem;
10634 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10635 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10636 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10637 if (isTypeLegal(VT) &&
10639 Quot = DAG.getNode(ISD::SDIVREM, dl,
10640 DAG.getVTList(VT, VT),
10641 LHS, RHS);
10642 Rem = Quot.getValue(1);
10643 Quot = Quot.getValue(0);
10644 } else {
10645 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10646 LHS, RHS);
10647 Rem = DAG.getNode(ISD::SREM, dl, VT,
10648 LHS, RHS);
10649 }
10650 SDValue Zero = DAG.getConstant(0, dl, VT);
10651 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10652 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10653 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10654 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10655 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10656 DAG.getConstant(1, dl, VT));
10657 Quot = DAG.getSelect(dl, VT,
10658 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10659 Sub1, Quot);
10660 } else
10661 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10662 LHS, RHS);
10663
10664 return Quot;
10665}
10666
10668 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10669 SDLoc dl(Node);
10670 SDValue LHS = Node->getOperand(0);
10671 SDValue RHS = Node->getOperand(1);
10672 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10673
10674 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10675 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10676 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10677 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10678 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10679 { LHS, RHS, CarryIn });
10680 Result = SDValue(NodeCarry.getNode(), 0);
10681 Overflow = SDValue(NodeCarry.getNode(), 1);
10682 return;
10683 }
10684
10685 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10686 LHS.getValueType(), LHS, RHS);
10687
10688 EVT ResultType = Node->getValueType(1);
10689 EVT SetCCType = getSetCCResultType(
10690 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10691 SDValue SetCC;
10692 if (IsAdd && isOneConstant(RHS)) {
10693 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10694 // the live range of X. We assume comparing with 0 is cheap.
10695 // The general case (X + C) < C is not necessarily beneficial. Although we
10696 // reduce the live range of X, we may introduce the materialization of
10697 // constant C.
10698 SetCC =
10699 DAG.getSetCC(dl, SetCCType, Result,
10700 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10701 } else if (IsAdd && isAllOnesConstant(RHS)) {
10702 // Special case: uaddo X, -1 overflows if X != 0.
10703 SetCC =
10704 DAG.getSetCC(dl, SetCCType, LHS,
10705 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10706 } else {
10708 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10709 }
10710 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10711}
10712
10714 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10715 SDLoc dl(Node);
10716 SDValue LHS = Node->getOperand(0);
10717 SDValue RHS = Node->getOperand(1);
10718 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10719
10720 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10721 LHS.getValueType(), LHS, RHS);
10722
10723 EVT ResultType = Node->getValueType(1);
10724 EVT OType = getSetCCResultType(
10725 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10726
10727 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10728 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10729 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10730 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10731 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10732 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10733 return;
10734 }
10735
10736 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10737
10738 // For an addition, the result should be less than one of the operands (LHS)
10739 // if and only if the other operand (RHS) is negative, otherwise there will
10740 // be overflow.
10741 // For a subtraction, the result should be less than one of the operands
10742 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10743 // otherwise there will be overflow.
10744 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10745 SDValue ConditionRHS =
10746 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10747
10748 Overflow = DAG.getBoolExtOrTrunc(
10749 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10750 ResultType, ResultType);
10751}
10752
10754 SDValue &Overflow, SelectionDAG &DAG) const {
10755 SDLoc dl(Node);
10756 EVT VT = Node->getValueType(0);
10757 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10758 SDValue LHS = Node->getOperand(0);
10759 SDValue RHS = Node->getOperand(1);
10760 bool isSigned = Node->getOpcode() == ISD::SMULO;
10761
10762 // For power-of-two multiplications we can use a simpler shift expansion.
10763 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10764 const APInt &C = RHSC->getAPIntValue();
10765 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10766 if (C.isPowerOf2()) {
10767 // smulo(x, signed_min) is same as umulo(x, signed_min).
10768 bool UseArithShift = isSigned && !C.isMinSignedValue();
10769 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10770 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10771 Overflow = DAG.getSetCC(dl, SetCCVT,
10772 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10773 dl, VT, Result, ShiftAmt),
10774 LHS, ISD::SETNE);
10775 return true;
10776 }
10777 }
10778
10779 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10780 if (VT.isVector())
10781 WideVT =
10783
10784 SDValue BottomHalf;
10785 SDValue TopHalf;
10786 static const unsigned Ops[2][3] =
10789 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10790 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10791 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10792 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10793 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10794 RHS);
10795 TopHalf = BottomHalf.getValue(1);
10796 } else if (isTypeLegal(WideVT)) {
10797 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10798 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10799 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10800 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10801 SDValue ShiftAmt =
10802 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
10803 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10804 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10805 } else {
10806 if (VT.isVector())
10807 return false;
10808
10809 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
10810 }
10811
10812 Result = BottomHalf;
10813 if (isSigned) {
10814 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10815 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
10816 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10817 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10818 } else {
10819 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10820 DAG.getConstant(0, dl, VT), ISD::SETNE);
10821 }
10822
10823 // Truncate the result if SetCC returns a larger type than needed.
10824 EVT RType = Node->getValueType(1);
10825 if (RType.bitsLT(Overflow.getValueType()))
10826 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10827
10828 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10829 "Unexpected result type for S/UMULO legalization");
10830 return true;
10831}
10832
10834 SDLoc dl(Node);
10835 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10836 SDValue Op = Node->getOperand(0);
10837 EVT VT = Op.getValueType();
10838
10839 if (VT.isScalableVector())
10841 "Expanding reductions for scalable vectors is undefined.");
10842
10843 // Try to use a shuffle reduction for power of two vectors.
10844 if (VT.isPow2VectorType()) {
10845 while (VT.getVectorNumElements() > 1) {
10846 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10847 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10848 break;
10849
10850 SDValue Lo, Hi;
10851 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10852 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
10853 VT = HalfVT;
10854 }
10855 }
10856
10857 EVT EltVT = VT.getVectorElementType();
10858 unsigned NumElts = VT.getVectorNumElements();
10859
10861 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10862
10863 SDValue Res = Ops[0];
10864 for (unsigned i = 1; i < NumElts; i++)
10865 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10866
10867 // Result type may be wider than element type.
10868 if (EltVT != Node->getValueType(0))
10869 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10870 return Res;
10871}
10872
10874 SDLoc dl(Node);
10875 SDValue AccOp = Node->getOperand(0);
10876 SDValue VecOp = Node->getOperand(1);
10877 SDNodeFlags Flags = Node->getFlags();
10878
10879 EVT VT = VecOp.getValueType();
10880 EVT EltVT = VT.getVectorElementType();
10881
10882 if (VT.isScalableVector())
10884 "Expanding reductions for scalable vectors is undefined.");
10885
10886 unsigned NumElts = VT.getVectorNumElements();
10887
10889 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10890
10891 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10892
10893 SDValue Res = AccOp;
10894 for (unsigned i = 0; i < NumElts; i++)
10895 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10896
10897 return Res;
10898}
10899
10901 SelectionDAG &DAG) const {
10902 EVT VT = Node->getValueType(0);
10903 SDLoc dl(Node);
10904 bool isSigned = Node->getOpcode() == ISD::SREM;
10905 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10906 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10907 SDValue Dividend = Node->getOperand(0);
10908 SDValue Divisor = Node->getOperand(1);
10909 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10910 SDVTList VTs = DAG.getVTList(VT, VT);
10911 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10912 return true;
10913 }
10914 if (isOperationLegalOrCustom(DivOpc, VT)) {
10915 // X % Y -> X-X/Y*Y
10916 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10917 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10918 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10919 return true;
10920 }
10921 return false;
10922}
10923
10925 SelectionDAG &DAG) const {
10926 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10927 SDLoc dl(SDValue(Node, 0));
10928 SDValue Src = Node->getOperand(0);
10929
10930 // DstVT is the result type, while SatVT is the size to which we saturate
10931 EVT SrcVT = Src.getValueType();
10932 EVT DstVT = Node->getValueType(0);
10933
10934 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10935 unsigned SatWidth = SatVT.getScalarSizeInBits();
10936 unsigned DstWidth = DstVT.getScalarSizeInBits();
10937 assert(SatWidth <= DstWidth &&
10938 "Expected saturation width smaller than result width");
10939
10940 // Determine minimum and maximum integer values and their corresponding
10941 // floating-point values.
10942 APInt MinInt, MaxInt;
10943 if (IsSigned) {
10944 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10945 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10946 } else {
10947 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10948 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10949 }
10950
10951 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10952 // libcall emission cannot handle this. Large result types will fail.
10953 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10954 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10955 SrcVT = Src.getValueType();
10956 }
10957
10958 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10959 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10960
10961 APFloat::opStatus MinStatus =
10962 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10963 APFloat::opStatus MaxStatus =
10964 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10965 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10966 !(MaxStatus & APFloat::opStatus::opInexact);
10967
10968 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10969 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10970
10971 // If the integer bounds are exactly representable as floats and min/max are
10972 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10973 // of comparisons and selects.
10974 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10976 if (AreExactFloatBounds && MinMaxLegal) {
10977 SDValue Clamped = Src;
10978
10979 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10980 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10981 // Clamp by MaxFloat from above. NaN cannot occur.
10982 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10983 // Convert clamped value to integer.
10984 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10985 dl, DstVT, Clamped);
10986
10987 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10988 // which will cast to zero.
10989 if (!IsSigned)
10990 return FpToInt;
10991
10992 // Otherwise, select 0 if Src is NaN.
10993 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10994 EVT SetCCVT =
10995 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10996 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10997 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
10998 }
10999
11000 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
11001 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
11002
11003 // Result of direct conversion. The assumption here is that the operation is
11004 // non-trapping and it's fine to apply it to an out-of-range value if we
11005 // select it away later.
11006 SDValue FpToInt =
11007 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
11008
11009 SDValue Select = FpToInt;
11010
11011 EVT SetCCVT =
11012 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
11013
11014 // If Src ULT MinFloat, select MinInt. In particular, this also selects
11015 // MinInt if Src is NaN.
11016 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
11017 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
11018 // If Src OGT MaxFloat, select MaxInt.
11019 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
11020 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
11021
11022 // In the unsigned case we are done, because we mapped NaN to MinInt, which
11023 // is already zero.
11024 if (!IsSigned)
11025 return Select;
11026
11027 // Otherwise, select 0 if Src is NaN.
11028 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
11029 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
11030 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
11031}
11032
11034 const SDLoc &dl,
11035 SelectionDAG &DAG) const {
11036 EVT OperandVT = Op.getValueType();
11037 if (OperandVT.getScalarType() == ResultVT.getScalarType())
11038 return Op;
11039 EVT ResultIntVT = ResultVT.changeTypeToInteger();
11040 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11041 // can induce double-rounding which may alter the results. We can
11042 // correct for this using a trick explained in: Boldo, Sylvie, and
11043 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11044 // World Congress. 2005.
11045 unsigned BitSize = OperandVT.getScalarSizeInBits();
11046 EVT WideIntVT = OperandVT.changeTypeToInteger();
11047 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
11048 SDValue SignBit =
11049 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
11050 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
11051 SDValue AbsWide;
11052 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
11053 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
11054 } else {
11055 SDValue ClearedSign = DAG.getNode(
11056 ISD::AND, dl, WideIntVT, OpAsInt,
11057 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
11058 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
11059 }
11060 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
11061 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
11062
11063 // We can keep the narrow value as-is if narrowing was exact (no
11064 // rounding error), the wide value was NaN (the narrow value is also
11065 // NaN and should be preserved) or if we rounded to the odd value.
11066 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
11067 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
11068 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
11069 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
11070 EVT ResultIntVTCCVT = getSetCCResultType(
11071 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
11072 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
11073 // The result is already odd so we don't need to do anything.
11074 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
11075
11076 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11077 AbsWide.getValueType());
11078 // We keep results which are exact, odd or NaN.
11079 SDValue KeepNarrow =
11080 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
11081 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
11082 // We morally performed a round-down if AbsNarrow is smaller than
11083 // AbsWide.
11084 SDValue NarrowIsRd =
11085 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
11086 // If the narrow value is odd or exact, pick it.
11087 // Otherwise, narrow is even and corresponds to either the rounded-up
11088 // or rounded-down value. If narrow is the rounded-down value, we want
11089 // the rounded-up value as it will be odd.
11090 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
11091 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
11092 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
11093 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
11094 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
11095 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
11096 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
11097 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11098 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11099}
11100
11102 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11103 SDValue Op = Node->getOperand(0);
11104 EVT VT = Node->getValueType(0);
11105 SDLoc dl(Node);
11106 if (VT.getScalarType() == MVT::bf16) {
11107 if (Node->getConstantOperandVal(1) == 1) {
11108 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11109 }
11110 EVT OperandVT = Op.getValueType();
11111 SDValue IsNaN = DAG.getSetCC(
11112 dl,
11113 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11114 Op, Op, ISD::SETUO);
11115
11116 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11117 // can induce double-rounding which may alter the results. We can
11118 // correct for this using a trick explained in: Boldo, Sylvie, and
11119 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11120 // World Congress. 2005.
11121 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11122 EVT I32 = F32.changeTypeToInteger();
11123 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11124 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11125
11126 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11127 // turning into infinities.
11128 SDValue NaN =
11129 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11130
11131 // Factor in the contribution of the low 16 bits.
11132 SDValue One = DAG.getConstant(1, dl, I32);
11133 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11134 DAG.getShiftAmountConstant(16, I32, dl));
11135 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11136 SDValue RoundingBias =
11137 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11138 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11139
11140 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11141 // 0x80000000.
11142 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11143
11144 // Now that we have rounded, shift the bits into position.
11145 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11146 DAG.getShiftAmountConstant(16, I32, dl));
11147 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11148 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11149 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11150 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11151 }
11152 return SDValue();
11153}
11154
11156 SelectionDAG &DAG) const {
11157 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11158 assert(Node->getValueType(0).isScalableVector() &&
11159 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11160
11161 EVT VT = Node->getValueType(0);
11162 SDValue V1 = Node->getOperand(0);
11163 SDValue V2 = Node->getOperand(1);
11164 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11165 SDLoc DL(Node);
11166
11167 // Expand through memory thusly:
11168 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11169 // Store V1, Ptr
11170 // Store V2, Ptr + sizeof(V1)
11171 // If (Imm < 0)
11172 // TrailingElts = -Imm
11173 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11174 // else
11175 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11176 // Res = Load Ptr
11177
11178 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11179
11181 VT.getVectorElementCount() * 2);
11182 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11183 EVT PtrVT = StackPtr.getValueType();
11184 auto &MF = DAG.getMachineFunction();
11185 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11186 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11187
11188 // Store the lo part of CONCAT_VECTORS(V1, V2)
11189 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11190 // Store the hi part of CONCAT_VECTORS(V1, V2)
11191 SDValue OffsetToV2 = DAG.getVScale(
11192 DL, PtrVT,
11194 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11195 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11196
11197 if (Imm >= 0) {
11198 // Load back the required element. getVectorElementPointer takes care of
11199 // clamping the index if it's out-of-bounds.
11200 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11201 // Load the spliced result
11202 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11204 }
11205
11206 uint64_t TrailingElts = -Imm;
11207
11208 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11209 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11210 SDValue TrailingBytes =
11211 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11212
11213 if (TrailingElts > VT.getVectorMinNumElements()) {
11214 SDValue VLBytes =
11215 DAG.getVScale(DL, PtrVT,
11216 APInt(PtrVT.getFixedSizeInBits(),
11218 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11219 }
11220
11221 // Calculate the start address of the spliced result.
11222 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11223
11224 // Load the spliced result
11225 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11227}
11228
11230 SDValue &LHS, SDValue &RHS,
11231 SDValue &CC, SDValue Mask,
11232 SDValue EVL, bool &NeedInvert,
11233 const SDLoc &dl, SDValue &Chain,
11234 bool IsSignaling) const {
11235 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11236 MVT OpVT = LHS.getSimpleValueType();
11237 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11238 NeedInvert = false;
11239 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11240 bool IsNonVP = !EVL;
11241 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11242 default:
11243 llvm_unreachable("Unknown condition code action!");
11245 // Nothing to do.
11246 break;
11249 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11250 std::swap(LHS, RHS);
11251 CC = DAG.getCondCode(InvCC);
11252 return true;
11253 }
11254 // Swapping operands didn't work. Try inverting the condition.
11255 bool NeedSwap = false;
11256 InvCC = getSetCCInverse(CCCode, OpVT);
11257 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11258 // If inverting the condition is not enough, try swapping operands
11259 // on top of it.
11260 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11261 NeedSwap = true;
11262 }
11263 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11264 CC = DAG.getCondCode(InvCC);
11265 NeedInvert = true;
11266 if (NeedSwap)
11267 std::swap(LHS, RHS);
11268 return true;
11269 }
11270
11272 unsigned Opc = 0;
11273 switch (CCCode) {
11274 default:
11275 llvm_unreachable("Don't know how to expand this condition!");
11276 case ISD::SETUO:
11277 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11278 CC1 = ISD::SETUNE;
11279 CC2 = ISD::SETUNE;
11280 Opc = ISD::OR;
11281 break;
11282 }
11283 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11284 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11285 NeedInvert = true;
11286 [[fallthrough]];
11287 case ISD::SETO:
11288 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11289 "If SETO is expanded, SETOEQ must be legal!");
11290 CC1 = ISD::SETOEQ;
11291 CC2 = ISD::SETOEQ;
11292 Opc = ISD::AND;
11293 break;
11294 case ISD::SETONE:
11295 case ISD::SETUEQ:
11296 // If the SETUO or SETO CC isn't legal, we might be able to use
11297 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11298 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11299 // the operands.
11300 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11301 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11302 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11303 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11304 CC1 = ISD::SETOGT;
11305 CC2 = ISD::SETOLT;
11306 Opc = ISD::OR;
11307 NeedInvert = ((unsigned)CCCode & 0x8U);
11308 break;
11309 }
11310 [[fallthrough]];
11311 case ISD::SETOEQ:
11312 case ISD::SETOGT:
11313 case ISD::SETOGE:
11314 case ISD::SETOLT:
11315 case ISD::SETOLE:
11316 case ISD::SETUNE:
11317 case ISD::SETUGT:
11318 case ISD::SETUGE:
11319 case ISD::SETULT:
11320 case ISD::SETULE:
11321 // If we are floating point, assign and break, otherwise fall through.
11322 if (!OpVT.isInteger()) {
11323 // We can use the 4th bit to tell if we are the unordered
11324 // or ordered version of the opcode.
11325 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11326 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11327 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11328 break;
11329 }
11330 // Fallthrough if we are unsigned integer.
11331 [[fallthrough]];
11332 case ISD::SETLE:
11333 case ISD::SETGT:
11334 case ISD::SETGE:
11335 case ISD::SETLT:
11336 case ISD::SETNE:
11337 case ISD::SETEQ:
11338 // If all combinations of inverting the condition and swapping operands
11339 // didn't work then we have no means to expand the condition.
11340 llvm_unreachable("Don't know how to expand this condition!");
11341 }
11342
11343 SDValue SetCC1, SetCC2;
11344 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11345 // If we aren't the ordered or unorder operation,
11346 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11347 if (IsNonVP) {
11348 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11349 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11350 } else {
11351 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11352 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11353 }
11354 } else {
11355 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11356 if (IsNonVP) {
11357 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11358 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11359 } else {
11360 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11361 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11362 }
11363 }
11364 if (Chain)
11365 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11366 SetCC2.getValue(1));
11367 if (IsNonVP)
11368 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11369 else {
11370 // Transform the binary opcode to the VP equivalent.
11371 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11372 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11373 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11374 }
11375 RHS = SDValue();
11376 CC = SDValue();
11377 return true;
11378 }
11379 }
11380 return false;
11381}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1026
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1543
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1318
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
void negate()
Negate this APInt in place.
Definition: APInt.h:1421
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1297
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:383
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1345
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1075
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:461
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const APInt * getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has constant shift amounts that are all less than the element bit-width of th...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
const APInt * getValidShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has a constant or splat constant shift amount that is less than the element b...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:563
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ_ELTS/VP_CTTZ_ELTS_ZERO_UNDEF nodes.
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:724
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2978
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:498
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:368
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:374
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:381
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1059
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:501
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:425
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:426
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:360
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:387
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:613
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1588
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1593
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1754
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1587
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1387
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:439
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:251
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:208
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
bool hasConflict() const
Returns true if there is conflicting information.
Definition: KnownBits.h:47
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:285
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:229
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:184
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:317
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:221
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:101
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:777
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:163
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:282
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:202
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...