LLVM 19.0.0git
TargetLowering.cpp
Go to the documentation of this file.
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements the TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/STLExtras.h"
25#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/LLVMContext.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCExpr.h"
36#include <cctype>
37using namespace llvm;
38
39/// NOTE: The TargetMachine owns TLOF.
41 : TargetLoweringBase(tm) {}
42
43const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45}
46
49}
50
51/// Check whether a given call node is in tail position within its function. If
52/// so, it sets Chain to the input chain of the tail call.
54 SDValue &Chain) const {
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80}
81
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107}
108
109/// Set CallLoweringInfo attribute flags based on a call instruction
110/// and called function attributes.
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125 Alignment = Call->getParamStackAlign(ArgIdx);
126 IndirectType = nullptr;
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgIdx);
140}
141
142/// Generate a libcall taking the given operands as arguments and returning a
143/// result of type RetVT.
144std::pair<SDValue, SDValue>
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
154 Args.reserve(Ops.size());
155
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162 CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error("Unsupported library call operation!");
176
177 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
179 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196}
197
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
244 Found = true;
245 else if (NewVT == MVT::i64 &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
268 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(VT);
282 Size -= VTSize;
283 }
284
285 return true;
286}
287
288/// Soften the operands of a comparison. This code is shared among BR_CC,
289/// SELECT_CC, and SETCC handlers.
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297 OldRHS, Chain);
298}
299
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
404 SDValue Ops[2] = {NewLHS, NewRHS};
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(0, dl, RetVT);
412
413 CCCode = getCmpLibcallCC(LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(CCCode, RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427 CCCode = getCmpLibcallCC(LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(CCCode, RetVT);
430 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435 Tmp.getValueType(), Tmp, NewLHS);
436 NewRHS = SDValue();
437 }
438}
439
440/// Return the entry encoding for a jump table in the current function. The
441/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
450
451 // Otherwise, use a label difference.
453}
454
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
463
464 return Table;
465}
466
467/// This returns the relocation base for the given PIC jumptable, the same as
468/// getPICJumpTableRelocBase, but as an MCExpr.
469const MCExpr *
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474}
475
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485}
486
487bool
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503}
504
505//===----------------------------------------------------------------------===//
506// Optimization Methods
507//===----------------------------------------------------------------------===//
508
509/// If the specified instruction has a constant integer operand and there are
510/// bits set in that constant that are not demanded, then clear those bits and
511/// return true.
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542 return false;
543
544 if (!C.isSubsetOf(DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC,
548 Op->getFlags());
549 return TLO.CombineTo(Op, NewOp);
550 }
551
552 break;
553 }
554 }
555
556 return false;
557}
558
560 const APInt &DemandedBits,
561 TargetLoweringOpt &TLO) const {
562 EVT VT = Op.getValueType();
563 APInt DemandedElts = VT.isVector()
565 : APInt(1, 1);
566 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
567}
568
569/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
570/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
571/// but it could be generalized for targets with other types of implicit
572/// widening casts.
574 const APInt &DemandedBits,
575 TargetLoweringOpt &TLO) const {
576 assert(Op.getNumOperands() == 2 &&
577 "ShrinkDemandedOp only supports binary operators!");
578 assert(Op.getNode()->getNumValues() == 1 &&
579 "ShrinkDemandedOp only supports nodes with one result!");
580
581 EVT VT = Op.getValueType();
582 SelectionDAG &DAG = TLO.DAG;
583 SDLoc dl(Op);
584
585 // Early return, as this function cannot handle vector types.
586 if (VT.isVector())
587 return false;
588
589 // Don't do this if the node has another user, which may require the
590 // full value.
591 if (!Op.getNode()->hasOneUse())
592 return false;
593
594 // Search for the smallest integer type with free casts to and from
595 // Op's type. For expedience, just check power-of-2 integer types.
596 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
597 unsigned DemandedSize = DemandedBits.getActiveBits();
598 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
599 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
600 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
601 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
602 // We found a type with free casts.
603 SDValue X = DAG.getNode(
604 Op.getOpcode(), dl, SmallVT,
605 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
606 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
607 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
608 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
609 return TLO.CombineTo(Op, Z);
610 }
611 }
612 return false;
613}
614
616 DAGCombinerInfo &DCI) const {
617 SelectionDAG &DAG = DCI.DAG;
618 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
619 !DCI.isBeforeLegalizeOps());
620 KnownBits Known;
621
622 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
623 if (Simplified) {
624 DCI.AddToWorklist(Op.getNode());
626 }
627 return Simplified;
628}
629
631 const APInt &DemandedElts,
632 DAGCombinerInfo &DCI) const {
633 SelectionDAG &DAG = DCI.DAG;
634 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
635 !DCI.isBeforeLegalizeOps());
636 KnownBits Known;
637
638 bool Simplified =
639 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
640 if (Simplified) {
641 DCI.AddToWorklist(Op.getNode());
643 }
644 return Simplified;
645}
646
648 KnownBits &Known,
650 unsigned Depth,
651 bool AssumeSingleUse) const {
652 EVT VT = Op.getValueType();
653
654 // Since the number of lanes in a scalable vector is unknown at compile time,
655 // we track one bit which is implicitly broadcast to all lanes. This means
656 // that all lanes in a scalable vector are considered demanded.
657 APInt DemandedElts = VT.isFixedLengthVector()
659 : APInt(1, 1);
660 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
661 AssumeSingleUse);
662}
663
664// TODO: Under what circumstances can we create nodes? Constant folding?
666 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
667 SelectionDAG &DAG, unsigned Depth) const {
668 EVT VT = Op.getValueType();
669
670 // Limit search depth.
672 return SDValue();
673
674 // Ignore UNDEFs.
675 if (Op.isUndef())
676 return SDValue();
677
678 // Not demanding any bits/elts from Op.
679 if (DemandedBits == 0 || DemandedElts == 0)
680 return DAG.getUNDEF(VT);
681
682 bool IsLE = DAG.getDataLayout().isLittleEndian();
683 unsigned NumElts = DemandedElts.getBitWidth();
684 unsigned BitWidth = DemandedBits.getBitWidth();
685 KnownBits LHSKnown, RHSKnown;
686 switch (Op.getOpcode()) {
687 case ISD::BITCAST: {
688 if (VT.isScalableVector())
689 return SDValue();
690
691 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
692 EVT SrcVT = Src.getValueType();
693 EVT DstVT = Op.getValueType();
694 if (SrcVT == DstVT)
695 return Src;
696
697 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
698 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
699 if (NumSrcEltBits == NumDstEltBits)
700 if (SDValue V = SimplifyMultipleUseDemandedBits(
701 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
702 return DAG.getBitcast(DstVT, V);
703
704 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
705 unsigned Scale = NumDstEltBits / NumSrcEltBits;
706 unsigned NumSrcElts = SrcVT.getVectorNumElements();
707 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
708 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
709 for (unsigned i = 0; i != Scale; ++i) {
710 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
711 unsigned BitOffset = EltOffset * NumSrcEltBits;
712 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
713 if (!Sub.isZero()) {
714 DemandedSrcBits |= Sub;
715 for (unsigned j = 0; j != NumElts; ++j)
716 if (DemandedElts[j])
717 DemandedSrcElts.setBit((j * Scale) + i);
718 }
719 }
720
721 if (SDValue V = SimplifyMultipleUseDemandedBits(
722 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
723 return DAG.getBitcast(DstVT, V);
724 }
725
726 // TODO - bigendian once we have test coverage.
727 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
728 unsigned Scale = NumSrcEltBits / NumDstEltBits;
729 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
730 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
731 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
732 for (unsigned i = 0; i != NumElts; ++i)
733 if (DemandedElts[i]) {
734 unsigned Offset = (i % Scale) * NumDstEltBits;
735 DemandedSrcBits.insertBits(DemandedBits, Offset);
736 DemandedSrcElts.setBit(i / Scale);
737 }
738
739 if (SDValue V = SimplifyMultipleUseDemandedBits(
740 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
741 return DAG.getBitcast(DstVT, V);
742 }
743
744 break;
745 }
746 case ISD::FREEZE: {
747 SDValue N0 = Op.getOperand(0);
748 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
749 /*PoisonOnly=*/false))
750 return N0;
751 break;
752 }
753 case ISD::AND: {
754 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
755 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
756
757 // If all of the demanded bits are known 1 on one side, return the other.
758 // These bits cannot contribute to the result of the 'and' in this
759 // context.
760 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
761 return Op.getOperand(0);
762 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
763 return Op.getOperand(1);
764 break;
765 }
766 case ISD::OR: {
767 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
768 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
769
770 // If all of the demanded bits are known zero on one side, return the
771 // other. These bits cannot contribute to the result of the 'or' in this
772 // context.
773 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
774 return Op.getOperand(0);
775 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
776 return Op.getOperand(1);
777 break;
778 }
779 case ISD::XOR: {
780 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
781 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
782
783 // If all of the demanded bits are known zero on one side, return the
784 // other.
785 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
786 return Op.getOperand(0);
787 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
788 return Op.getOperand(1);
789 break;
790 }
791 case ISD::SHL: {
792 // If we are only demanding sign bits then we can use the shift source
793 // directly.
794 if (const APInt *MaxSA =
795 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
796 SDValue Op0 = Op.getOperand(0);
797 unsigned ShAmt = MaxSA->getZExtValue();
798 unsigned NumSignBits =
799 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
800 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
801 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
802 return Op0;
803 }
804 break;
805 }
806 case ISD::SETCC: {
807 SDValue Op0 = Op.getOperand(0);
808 SDValue Op1 = Op.getOperand(1);
809 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
810 // If (1) we only need the sign-bit, (2) the setcc operands are the same
811 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
812 // -1, we may be able to bypass the setcc.
813 if (DemandedBits.isSignMask() &&
817 // If we're testing X < 0, then this compare isn't needed - just use X!
818 // FIXME: We're limiting to integer types here, but this should also work
819 // if we don't care about FP signed-zero. The use of SETLT with FP means
820 // that we don't care about NaNs.
821 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
823 return Op0;
824 }
825 break;
826 }
828 // If none of the extended bits are demanded, eliminate the sextinreg.
829 SDValue Op0 = Op.getOperand(0);
830 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
831 unsigned ExBits = ExVT.getScalarSizeInBits();
832 if (DemandedBits.getActiveBits() <= ExBits &&
834 return Op0;
835 // If the input is already sign extended, just drop the extension.
836 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
837 if (NumSignBits >= (BitWidth - ExBits + 1))
838 return Op0;
839 break;
840 }
844 if (VT.isScalableVector())
845 return SDValue();
846
847 // If we only want the lowest element and none of extended bits, then we can
848 // return the bitcasted source vector.
849 SDValue Src = Op.getOperand(0);
850 EVT SrcVT = Src.getValueType();
851 EVT DstVT = Op.getValueType();
852 if (IsLE && DemandedElts == 1 &&
853 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
854 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
855 return DAG.getBitcast(DstVT, Src);
856 }
857 break;
858 }
860 if (VT.isScalableVector())
861 return SDValue();
862
863 // If we don't demand the inserted element, return the base vector.
864 SDValue Vec = Op.getOperand(0);
865 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
866 EVT VecVT = Vec.getValueType();
867 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
868 !DemandedElts[CIdx->getZExtValue()])
869 return Vec;
870 break;
871 }
873 if (VT.isScalableVector())
874 return SDValue();
875
876 SDValue Vec = Op.getOperand(0);
877 SDValue Sub = Op.getOperand(1);
878 uint64_t Idx = Op.getConstantOperandVal(2);
879 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
880 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
881 // If we don't demand the inserted subvector, return the base vector.
882 if (DemandedSubElts == 0)
883 return Vec;
884 break;
885 }
886 case ISD::VECTOR_SHUFFLE: {
888 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
889
890 // If all the demanded elts are from one operand and are inline,
891 // then we can use the operand directly.
892 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
893 for (unsigned i = 0; i != NumElts; ++i) {
894 int M = ShuffleMask[i];
895 if (M < 0 || !DemandedElts[i])
896 continue;
897 AllUndef = false;
898 IdentityLHS &= (M == (int)i);
899 IdentityRHS &= ((M - NumElts) == i);
900 }
901
902 if (AllUndef)
903 return DAG.getUNDEF(Op.getValueType());
904 if (IdentityLHS)
905 return Op.getOperand(0);
906 if (IdentityRHS)
907 return Op.getOperand(1);
908 break;
909 }
910 default:
911 // TODO: Probably okay to remove after audit; here to reduce change size
912 // in initial enablement patch for scalable vectors
913 if (VT.isScalableVector())
914 return SDValue();
915
916 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
917 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
918 Op, DemandedBits, DemandedElts, DAG, Depth))
919 return V;
920 break;
921 }
922 return SDValue();
923}
924
927 unsigned Depth) const {
928 EVT VT = Op.getValueType();
929 // Since the number of lanes in a scalable vector is unknown at compile time,
930 // we track one bit which is implicitly broadcast to all lanes. This means
931 // that all lanes in a scalable vector are considered demanded.
932 APInt DemandedElts = VT.isFixedLengthVector()
934 : APInt(1, 1);
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937}
938
940 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
941 unsigned Depth) const {
942 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
943 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
944 Depth);
945}
946
947// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
948// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
950 const TargetLowering &TLI,
951 const APInt &DemandedBits,
952 const APInt &DemandedElts,
953 unsigned Depth) {
954 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
955 "SRL or SRA node is required here!");
956 // Is the right shift using an immediate value of 1?
957 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
958 if (!N1C || !N1C->isOne())
959 return SDValue();
960
961 // We are looking for an avgfloor
962 // add(ext, ext)
963 // or one of these as a avgceil
964 // add(add(ext, ext), 1)
965 // add(add(ext, 1), ext)
966 // add(ext, add(ext, 1))
967 SDValue Add = Op.getOperand(0);
968 if (Add.getOpcode() != ISD::ADD)
969 return SDValue();
970
971 SDValue ExtOpA = Add.getOperand(0);
972 SDValue ExtOpB = Add.getOperand(1);
973 SDValue Add2;
974 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
975 ConstantSDNode *ConstOp;
976 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
977 ConstOp->isOne()) {
978 ExtOpA = Op1;
979 ExtOpB = Op3;
980 Add2 = A;
981 return true;
982 }
983 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
984 ConstOp->isOne()) {
985 ExtOpA = Op1;
986 ExtOpB = Op2;
987 Add2 = A;
988 return true;
989 }
990 return false;
991 };
992 bool IsCeil =
993 (ExtOpA.getOpcode() == ISD::ADD &&
994 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
995 (ExtOpB.getOpcode() == ISD::ADD &&
996 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
997
998 // If the shift is signed (sra):
999 // - Needs >= 2 sign bit for both operands.
1000 // - Needs >= 2 zero bits.
1001 // If the shift is unsigned (srl):
1002 // - Needs >= 1 zero bit for both operands.
1003 // - Needs 1 demanded bit zero and >= 2 sign bits.
1004 unsigned ShiftOpc = Op.getOpcode();
1005 bool IsSigned = false;
1006 unsigned KnownBits;
1007 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1008 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1009 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1010 unsigned NumZeroA =
1011 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1012 unsigned NumZeroB =
1013 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1014 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1015
1016 switch (ShiftOpc) {
1017 default:
1018 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1019 case ISD::SRA: {
1020 if (NumZero >= 2 && NumSigned < NumZero) {
1021 IsSigned = false;
1022 KnownBits = NumZero;
1023 break;
1024 }
1025 if (NumSigned >= 1) {
1026 IsSigned = true;
1027 KnownBits = NumSigned;
1028 break;
1029 }
1030 return SDValue();
1031 }
1032 case ISD::SRL: {
1033 if (NumZero >= 1 && NumSigned < NumZero) {
1034 IsSigned = false;
1035 KnownBits = NumZero;
1036 break;
1037 }
1038 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1039 IsSigned = true;
1040 KnownBits = NumSigned;
1041 break;
1042 }
1043 return SDValue();
1044 }
1045 }
1046
1047 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1048 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1049
1050 // Find the smallest power-2 type that is legal for this vector size and
1051 // operation, given the original type size and the number of known sign/zero
1052 // bits.
1053 EVT VT = Op.getValueType();
1054 unsigned MinWidth =
1055 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1056 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1057 if (VT.isVector())
1058 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1059 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1060 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1061 // larger type size to do the transform.
1062 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1063 return SDValue();
1064 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1065 Add.getOperand(1)) &&
1066 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1067 Add2.getOperand(1))))
1068 NVT = VT;
1069 else
1070 return SDValue();
1071 }
1072
1073 SDLoc DL(Op);
1074 SDValue ResultAVG =
1075 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1076 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1077 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1078}
1079
1080/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1081/// result of Op are ever used downstream. If we can use this information to
1082/// simplify Op, create a new simplified DAG node and return true, returning the
1083/// original and new nodes in Old and New. Otherwise, analyze the expression and
1084/// return a mask of Known bits for the expression (used to simplify the
1085/// caller). The Known bits may only be accurate for those bits in the
1086/// OriginalDemandedBits and OriginalDemandedElts.
1088 SDValue Op, const APInt &OriginalDemandedBits,
1089 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1090 unsigned Depth, bool AssumeSingleUse) const {
1091 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1092 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1093 "Mask size mismatches value type size!");
1094
1095 // Don't know anything.
1096 Known = KnownBits(BitWidth);
1097
1098 EVT VT = Op.getValueType();
1099 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1100 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1101 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1102 "Unexpected vector size");
1103
1104 APInt DemandedBits = OriginalDemandedBits;
1105 APInt DemandedElts = OriginalDemandedElts;
1106 SDLoc dl(Op);
1107
1108 // Undef operand.
1109 if (Op.isUndef())
1110 return false;
1111
1112 // We can't simplify target constants.
1113 if (Op.getOpcode() == ISD::TargetConstant)
1114 return false;
1115
1116 if (Op.getOpcode() == ISD::Constant) {
1117 // We know all of the bits for a constant!
1118 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1119 return false;
1120 }
1121
1122 if (Op.getOpcode() == ISD::ConstantFP) {
1123 // We know all of the bits for a floating point constant!
1125 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1126 return false;
1127 }
1128
1129 // Other users may use these bits.
1130 bool HasMultiUse = false;
1131 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1133 // Limit search depth.
1134 return false;
1135 }
1136 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1138 DemandedElts = APInt::getAllOnes(NumElts);
1139 HasMultiUse = true;
1140 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1141 // Not demanding any bits/elts from Op.
1142 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1143 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1144 // Limit search depth.
1145 return false;
1146 }
1147
1148 KnownBits Known2;
1149 switch (Op.getOpcode()) {
1150 case ISD::SCALAR_TO_VECTOR: {
1151 if (VT.isScalableVector())
1152 return false;
1153 if (!DemandedElts[0])
1154 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1155
1156 KnownBits SrcKnown;
1157 SDValue Src = Op.getOperand(0);
1158 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1159 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1160 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1161 return true;
1162
1163 // Upper elements are undef, so only get the knownbits if we just demand
1164 // the bottom element.
1165 if (DemandedElts == 1)
1166 Known = SrcKnown.anyextOrTrunc(BitWidth);
1167 break;
1168 }
1169 case ISD::BUILD_VECTOR:
1170 // Collect the known bits that are shared by every demanded element.
1171 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1172 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1173 return false; // Don't fall through, will infinitely loop.
1174 case ISD::SPLAT_VECTOR: {
1175 SDValue Scl = Op.getOperand(0);
1176 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1177 KnownBits KnownScl;
1178 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1179 return true;
1180
1181 // Implicitly truncate the bits to match the official semantics of
1182 // SPLAT_VECTOR.
1183 Known = KnownScl.trunc(BitWidth);
1184 break;
1185 }
1186 case ISD::LOAD: {
1187 auto *LD = cast<LoadSDNode>(Op);
1188 if (getTargetConstantFromLoad(LD)) {
1189 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1190 return false; // Don't fall through, will infinitely loop.
1191 }
1192 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1193 // If this is a ZEXTLoad and we are looking at the loaded value.
1194 EVT MemVT = LD->getMemoryVT();
1195 unsigned MemBits = MemVT.getScalarSizeInBits();
1196 Known.Zero.setBitsFrom(MemBits);
1197 return false; // Don't fall through, will infinitely loop.
1198 }
1199 break;
1200 }
1202 if (VT.isScalableVector())
1203 return false;
1204 SDValue Vec = Op.getOperand(0);
1205 SDValue Scl = Op.getOperand(1);
1206 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1207 EVT VecVT = Vec.getValueType();
1208
1209 // If index isn't constant, assume we need all vector elements AND the
1210 // inserted element.
1211 APInt DemandedVecElts(DemandedElts);
1212 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1213 unsigned Idx = CIdx->getZExtValue();
1214 DemandedVecElts.clearBit(Idx);
1215
1216 // Inserted element is not required.
1217 if (!DemandedElts[Idx])
1218 return TLO.CombineTo(Op, Vec);
1219 }
1220
1221 KnownBits KnownScl;
1222 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1223 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1224 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1225 return true;
1226
1227 Known = KnownScl.anyextOrTrunc(BitWidth);
1228
1229 KnownBits KnownVec;
1230 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1231 Depth + 1))
1232 return true;
1233
1234 if (!!DemandedVecElts)
1235 Known = Known.intersectWith(KnownVec);
1236
1237 return false;
1238 }
1239 case ISD::INSERT_SUBVECTOR: {
1240 if (VT.isScalableVector())
1241 return false;
1242 // Demand any elements from the subvector and the remainder from the src its
1243 // inserted into.
1244 SDValue Src = Op.getOperand(0);
1245 SDValue Sub = Op.getOperand(1);
1246 uint64_t Idx = Op.getConstantOperandVal(2);
1247 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1248 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1249 APInt DemandedSrcElts = DemandedElts;
1250 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1251
1252 KnownBits KnownSub, KnownSrc;
1253 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1254 Depth + 1))
1255 return true;
1256 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1257 Depth + 1))
1258 return true;
1259
1260 Known.Zero.setAllBits();
1261 Known.One.setAllBits();
1262 if (!!DemandedSubElts)
1263 Known = Known.intersectWith(KnownSub);
1264 if (!!DemandedSrcElts)
1265 Known = Known.intersectWith(KnownSrc);
1266
1267 // Attempt to avoid multi-use src if we don't need anything from it.
1268 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1269 !DemandedSrcElts.isAllOnes()) {
1270 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1271 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1272 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1273 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1274 if (NewSub || NewSrc) {
1275 NewSub = NewSub ? NewSub : Sub;
1276 NewSrc = NewSrc ? NewSrc : Src;
1277 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1278 Op.getOperand(2));
1279 return TLO.CombineTo(Op, NewOp);
1280 }
1281 }
1282 break;
1283 }
1285 if (VT.isScalableVector())
1286 return false;
1287 // Offset the demanded elts by the subvector index.
1288 SDValue Src = Op.getOperand(0);
1289 if (Src.getValueType().isScalableVector())
1290 break;
1291 uint64_t Idx = Op.getConstantOperandVal(1);
1292 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1293 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1294
1295 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1296 Depth + 1))
1297 return true;
1298
1299 // Attempt to avoid multi-use src if we don't need anything from it.
1300 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1301 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1302 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1303 if (DemandedSrc) {
1304 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1305 Op.getOperand(1));
1306 return TLO.CombineTo(Op, NewOp);
1307 }
1308 }
1309 break;
1310 }
1311 case ISD::CONCAT_VECTORS: {
1312 if (VT.isScalableVector())
1313 return false;
1314 Known.Zero.setAllBits();
1315 Known.One.setAllBits();
1316 EVT SubVT = Op.getOperand(0).getValueType();
1317 unsigned NumSubVecs = Op.getNumOperands();
1318 unsigned NumSubElts = SubVT.getVectorNumElements();
1319 for (unsigned i = 0; i != NumSubVecs; ++i) {
1320 APInt DemandedSubElts =
1321 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1322 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1323 Known2, TLO, Depth + 1))
1324 return true;
1325 // Known bits are shared by every demanded subvector element.
1326 if (!!DemandedSubElts)
1327 Known = Known.intersectWith(Known2);
1328 }
1329 break;
1330 }
1331 case ISD::VECTOR_SHUFFLE: {
1332 assert(!VT.isScalableVector());
1333 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1334
1335 // Collect demanded elements from shuffle operands..
1336 APInt DemandedLHS, DemandedRHS;
1337 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1338 DemandedRHS))
1339 break;
1340
1341 if (!!DemandedLHS || !!DemandedRHS) {
1342 SDValue Op0 = Op.getOperand(0);
1343 SDValue Op1 = Op.getOperand(1);
1344
1345 Known.Zero.setAllBits();
1346 Known.One.setAllBits();
1347 if (!!DemandedLHS) {
1348 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1349 Depth + 1))
1350 return true;
1351 Known = Known.intersectWith(Known2);
1352 }
1353 if (!!DemandedRHS) {
1354 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1355 Depth + 1))
1356 return true;
1357 Known = Known.intersectWith(Known2);
1358 }
1359
1360 // Attempt to avoid multi-use ops if we don't need anything from them.
1361 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1362 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1363 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1364 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1365 if (DemandedOp0 || DemandedOp1) {
1366 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1367 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1368 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1369 return TLO.CombineTo(Op, NewOp);
1370 }
1371 }
1372 break;
1373 }
1374 case ISD::AND: {
1375 SDValue Op0 = Op.getOperand(0);
1376 SDValue Op1 = Op.getOperand(1);
1377
1378 // If the RHS is a constant, check to see if the LHS would be zero without
1379 // using the bits from the RHS. Below, we use knowledge about the RHS to
1380 // simplify the LHS, here we're using information from the LHS to simplify
1381 // the RHS.
1382 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1383 // Do not increment Depth here; that can cause an infinite loop.
1384 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1385 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1386 if ((LHSKnown.Zero & DemandedBits) ==
1387 (~RHSC->getAPIntValue() & DemandedBits))
1388 return TLO.CombineTo(Op, Op0);
1389
1390 // If any of the set bits in the RHS are known zero on the LHS, shrink
1391 // the constant.
1392 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1393 DemandedElts, TLO))
1394 return true;
1395
1396 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1397 // constant, but if this 'and' is only clearing bits that were just set by
1398 // the xor, then this 'and' can be eliminated by shrinking the mask of
1399 // the xor. For example, for a 32-bit X:
1400 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1401 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1402 LHSKnown.One == ~RHSC->getAPIntValue()) {
1403 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1404 return TLO.CombineTo(Op, Xor);
1405 }
1406 }
1407
1408 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1409 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1410 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1411 (Op0.getOperand(0).isUndef() ||
1413 Op0->hasOneUse()) {
1414 unsigned NumSubElts =
1416 unsigned SubIdx = Op0.getConstantOperandVal(2);
1417 APInt DemandedSub =
1418 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1419 KnownBits KnownSubMask =
1420 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1421 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1422 SDValue NewAnd =
1423 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1424 SDValue NewInsert =
1425 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1426 Op0.getOperand(1), Op0.getOperand(2));
1427 return TLO.CombineTo(Op, NewInsert);
1428 }
1429 }
1430
1431 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1432 Depth + 1))
1433 return true;
1434 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1435 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1436 Known2, TLO, Depth + 1))
1437 return true;
1438 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1439
1440 // If all of the demanded bits are known one on one side, return the other.
1441 // These bits cannot contribute to the result of the 'and'.
1442 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1443 return TLO.CombineTo(Op, Op0);
1444 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1445 return TLO.CombineTo(Op, Op1);
1446 // If all of the demanded bits in the inputs are known zeros, return zero.
1447 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1448 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1449 // If the RHS is a constant, see if we can simplify it.
1450 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1451 TLO))
1452 return true;
1453 // If the operation can be done in a smaller type, do so.
1454 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1455 return true;
1456
1457 // Attempt to avoid multi-use ops if we don't need anything from them.
1458 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1459 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1460 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1461 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1462 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1463 if (DemandedOp0 || DemandedOp1) {
1464 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1465 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1466 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1467 return TLO.CombineTo(Op, NewOp);
1468 }
1469 }
1470
1471 Known &= Known2;
1472 break;
1473 }
1474 case ISD::OR: {
1475 SDValue Op0 = Op.getOperand(0);
1476 SDValue Op1 = Op.getOperand(1);
1477 SDNodeFlags Flags = Op.getNode()->getFlags();
1478 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1479 Depth + 1)) {
1480 if (Flags.hasDisjoint()) {
1481 Flags.setDisjoint(false);
1482 Op->setFlags(Flags);
1483 }
1484 return true;
1485 }
1486 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1487 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1488 Known2, TLO, Depth + 1)) {
1489 if (Flags.hasDisjoint()) {
1490 Flags.setDisjoint(false);
1491 Op->setFlags(Flags);
1492 }
1493 return true;
1494 }
1495 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1496
1497 // If all of the demanded bits are known zero on one side, return the other.
1498 // These bits cannot contribute to the result of the 'or'.
1499 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1500 return TLO.CombineTo(Op, Op0);
1501 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1502 return TLO.CombineTo(Op, Op1);
1503 // If the RHS is a constant, see if we can simplify it.
1504 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1505 return true;
1506 // If the operation can be done in a smaller type, do so.
1507 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1508 return true;
1509
1510 // Attempt to avoid multi-use ops if we don't need anything from them.
1511 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1512 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1513 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1514 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1515 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1516 if (DemandedOp0 || DemandedOp1) {
1517 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1518 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1519 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1520 return TLO.CombineTo(Op, NewOp);
1521 }
1522 }
1523
1524 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1525 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1526 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1527 Op0->hasOneUse() && Op1->hasOneUse()) {
1528 // Attempt to match all commutations - m_c_Or would've been useful!
1529 for (int I = 0; I != 2; ++I) {
1530 SDValue X = Op.getOperand(I).getOperand(0);
1531 SDValue C1 = Op.getOperand(I).getOperand(1);
1532 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1533 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1534 if (Alt.getOpcode() == ISD::OR) {
1535 for (int J = 0; J != 2; ++J) {
1536 if (X == Alt.getOperand(J)) {
1537 SDValue Y = Alt.getOperand(1 - J);
1538 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1539 {C1, C2})) {
1540 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1541 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1542 return TLO.CombineTo(
1543 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1544 }
1545 }
1546 }
1547 }
1548 }
1549 }
1550
1551 Known |= Known2;
1552 break;
1553 }
1554 case ISD::XOR: {
1555 SDValue Op0 = Op.getOperand(0);
1556 SDValue Op1 = Op.getOperand(1);
1557
1558 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1559 Depth + 1))
1560 return true;
1561 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1562 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1563 Depth + 1))
1564 return true;
1565 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1566
1567 // If all of the demanded bits are known zero on one side, return the other.
1568 // These bits cannot contribute to the result of the 'xor'.
1569 if (DemandedBits.isSubsetOf(Known.Zero))
1570 return TLO.CombineTo(Op, Op0);
1571 if (DemandedBits.isSubsetOf(Known2.Zero))
1572 return TLO.CombineTo(Op, Op1);
1573 // If the operation can be done in a smaller type, do so.
1574 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1575 return true;
1576
1577 // If all of the unknown bits are known to be zero on one side or the other
1578 // turn this into an *inclusive* or.
1579 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1580 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1581 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1582
1583 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1584 if (C) {
1585 // If one side is a constant, and all of the set bits in the constant are
1586 // also known set on the other side, turn this into an AND, as we know
1587 // the bits will be cleared.
1588 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1589 // NB: it is okay if more bits are known than are requested
1590 if (C->getAPIntValue() == Known2.One) {
1591 SDValue ANDC =
1592 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1593 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1594 }
1595
1596 // If the RHS is a constant, see if we can change it. Don't alter a -1
1597 // constant because that's a 'not' op, and that is better for combining
1598 // and codegen.
1599 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1600 // We're flipping all demanded bits. Flip the undemanded bits too.
1601 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1602 return TLO.CombineTo(Op, New);
1603 }
1604
1605 unsigned Op0Opcode = Op0.getOpcode();
1606 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1607 if (ConstantSDNode *ShiftC =
1608 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1609 // Don't crash on an oversized shift. We can not guarantee that a
1610 // bogus shift has been simplified to undef.
1611 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1612 uint64_t ShiftAmt = ShiftC->getZExtValue();
1614 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1615 : Ones.lshr(ShiftAmt);
1616 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1617 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1618 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1619 // If the xor constant is a demanded mask, do a 'not' before the
1620 // shift:
1621 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1622 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1623 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1624 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1625 Op0.getOperand(1)));
1626 }
1627 }
1628 }
1629 }
1630 }
1631
1632 // If we can't turn this into a 'not', try to shrink the constant.
1633 if (!C || !C->isAllOnes())
1634 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1635 return true;
1636
1637 // Attempt to avoid multi-use ops if we don't need anything from them.
1638 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1639 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1640 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1641 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1642 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1643 if (DemandedOp0 || DemandedOp1) {
1644 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1645 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1646 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1647 return TLO.CombineTo(Op, NewOp);
1648 }
1649 }
1650
1651 Known ^= Known2;
1652 break;
1653 }
1654 case ISD::SELECT:
1655 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1656 Known, TLO, Depth + 1))
1657 return true;
1658 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1659 Known2, TLO, Depth + 1))
1660 return true;
1661 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1662 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1663
1664 // If the operands are constants, see if we can simplify them.
1665 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1666 return true;
1667
1668 // Only known if known in both the LHS and RHS.
1669 Known = Known.intersectWith(Known2);
1670 break;
1671 case ISD::VSELECT:
1672 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1673 Known, TLO, Depth + 1))
1674 return true;
1675 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1676 Known2, TLO, Depth + 1))
1677 return true;
1678 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1679 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1680
1681 // Only known if known in both the LHS and RHS.
1682 Known = Known.intersectWith(Known2);
1683 break;
1684 case ISD::SELECT_CC:
1685 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1686 Known, TLO, Depth + 1))
1687 return true;
1688 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1689 Known2, TLO, Depth + 1))
1690 return true;
1691 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1692 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1693
1694 // If the operands are constants, see if we can simplify them.
1695 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1696 return true;
1697
1698 // Only known if known in both the LHS and RHS.
1699 Known = Known.intersectWith(Known2);
1700 break;
1701 case ISD::SETCC: {
1702 SDValue Op0 = Op.getOperand(0);
1703 SDValue Op1 = Op.getOperand(1);
1704 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1705 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1706 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1707 // -1, we may be able to bypass the setcc.
1708 if (DemandedBits.isSignMask() &&
1712 // If we're testing X < 0, then this compare isn't needed - just use X!
1713 // FIXME: We're limiting to integer types here, but this should also work
1714 // if we don't care about FP signed-zero. The use of SETLT with FP means
1715 // that we don't care about NaNs.
1716 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1718 return TLO.CombineTo(Op, Op0);
1719
1720 // TODO: Should we check for other forms of sign-bit comparisons?
1721 // Examples: X <= -1, X >= 0
1722 }
1723 if (getBooleanContents(Op0.getValueType()) ==
1725 BitWidth > 1)
1726 Known.Zero.setBitsFrom(1);
1727 break;
1728 }
1729 case ISD::SHL: {
1730 SDValue Op0 = Op.getOperand(0);
1731 SDValue Op1 = Op.getOperand(1);
1732 EVT ShiftVT = Op1.getValueType();
1733
1734 if (const APInt *SA =
1735 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1736 unsigned ShAmt = SA->getZExtValue();
1737 if (ShAmt == 0)
1738 return TLO.CombineTo(Op, Op0);
1739
1740 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1741 // single shift. We can do this if the bottom bits (which are shifted
1742 // out) are never demanded.
1743 // TODO - support non-uniform vector amounts.
1744 if (Op0.getOpcode() == ISD::SRL) {
1745 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1746 if (const APInt *SA2 =
1747 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1748 unsigned C1 = SA2->getZExtValue();
1749 unsigned Opc = ISD::SHL;
1750 int Diff = ShAmt - C1;
1751 if (Diff < 0) {
1752 Diff = -Diff;
1753 Opc = ISD::SRL;
1754 }
1755 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1756 return TLO.CombineTo(
1757 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1758 }
1759 }
1760 }
1761
1762 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1763 // are not demanded. This will likely allow the anyext to be folded away.
1764 // TODO - support non-uniform vector amounts.
1765 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1766 SDValue InnerOp = Op0.getOperand(0);
1767 EVT InnerVT = InnerOp.getValueType();
1768 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1769 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1770 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1771 SDValue NarrowShl = TLO.DAG.getNode(
1772 ISD::SHL, dl, InnerVT, InnerOp,
1773 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1774 return TLO.CombineTo(
1775 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1776 }
1777
1778 // Repeat the SHL optimization above in cases where an extension
1779 // intervenes: (shl (anyext (shr x, c1)), c2) to
1780 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1781 // aren't demanded (as above) and that the shifted upper c1 bits of
1782 // x aren't demanded.
1783 // TODO - support non-uniform vector amounts.
1784 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1785 InnerOp.hasOneUse()) {
1786 if (const APInt *SA2 =
1787 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1788 unsigned InnerShAmt = SA2->getZExtValue();
1789 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1790 DemandedBits.getActiveBits() <=
1791 (InnerBits - InnerShAmt + ShAmt) &&
1792 DemandedBits.countr_zero() >= ShAmt) {
1793 SDValue NewSA =
1794 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1795 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1796 InnerOp.getOperand(0));
1797 return TLO.CombineTo(
1798 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1799 }
1800 }
1801 }
1802 }
1803
1804 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1805 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1806 Depth + 1)) {
1807 SDNodeFlags Flags = Op.getNode()->getFlags();
1808 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1809 // Disable the nsw and nuw flags. We can no longer guarantee that we
1810 // won't wrap after simplification.
1811 Flags.setNoSignedWrap(false);
1812 Flags.setNoUnsignedWrap(false);
1813 Op->setFlags(Flags);
1814 }
1815 return true;
1816 }
1817 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1818 Known.Zero <<= ShAmt;
1819 Known.One <<= ShAmt;
1820 // low bits known zero.
1821 Known.Zero.setLowBits(ShAmt);
1822
1823 // Attempt to avoid multi-use ops if we don't need anything from them.
1824 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1825 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1826 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1827 if (DemandedOp0) {
1828 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1829 return TLO.CombineTo(Op, NewOp);
1830 }
1831 }
1832
1833 // Try shrinking the operation as long as the shift amount will still be
1834 // in range.
1835 if ((ShAmt < DemandedBits.getActiveBits()) &&
1836 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1837 return true;
1838
1839 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1840 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1841 // Only do this if we demand the upper half so the knownbits are correct.
1842 unsigned HalfWidth = BitWidth / 2;
1843 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1844 DemandedBits.countLeadingOnes() >= HalfWidth) {
1845 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1846 if (isNarrowingProfitable(VT, HalfVT) &&
1847 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1848 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1849 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1850 // If we're demanding the upper bits at all, we must ensure
1851 // that the upper bits of the shift result are known to be zero,
1852 // which is equivalent to the narrow shift being NUW.
1853 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1854 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1855 SDNodeFlags Flags;
1856 Flags.setNoSignedWrap(IsNSW);
1857 Flags.setNoUnsignedWrap(IsNUW);
1858 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1859 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1860 ShAmt, HalfVT, dl, TLO.LegalTypes());
1861 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1862 NewShiftAmt, Flags);
1863 SDValue NewExt =
1864 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1865 return TLO.CombineTo(Op, NewExt);
1866 }
1867 }
1868 }
1869 } else {
1870 // This is a variable shift, so we can't shift the demand mask by a known
1871 // amount. But if we are not demanding high bits, then we are not
1872 // demanding those bits from the pre-shifted operand either.
1873 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1874 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1875 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1876 Depth + 1)) {
1877 SDNodeFlags Flags = Op.getNode()->getFlags();
1878 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1879 // Disable the nsw and nuw flags. We can no longer guarantee that we
1880 // won't wrap after simplification.
1881 Flags.setNoSignedWrap(false);
1882 Flags.setNoUnsignedWrap(false);
1883 Op->setFlags(Flags);
1884 }
1885 return true;
1886 }
1887 Known.resetAll();
1888 }
1889 }
1890
1891 // If we are only demanding sign bits then we can use the shift source
1892 // directly.
1893 if (const APInt *MaxSA =
1894 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1895 unsigned ShAmt = MaxSA->getZExtValue();
1896 unsigned NumSignBits =
1897 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1898 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1899 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1900 return TLO.CombineTo(Op, Op0);
1901 }
1902 break;
1903 }
1904 case ISD::SRL: {
1905 SDValue Op0 = Op.getOperand(0);
1906 SDValue Op1 = Op.getOperand(1);
1907 EVT ShiftVT = Op1.getValueType();
1908
1909 // Try to match AVG patterns.
1910 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1911 DemandedElts, Depth + 1))
1912 return TLO.CombineTo(Op, AVG);
1913
1914 if (const APInt *SA =
1915 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1916 unsigned ShAmt = SA->getZExtValue();
1917 if (ShAmt == 0)
1918 return TLO.CombineTo(Op, Op0);
1919
1920 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1921 // single shift. We can do this if the top bits (which are shifted out)
1922 // are never demanded.
1923 // TODO - support non-uniform vector amounts.
1924 if (Op0.getOpcode() == ISD::SHL) {
1925 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1926 if (const APInt *SA2 =
1927 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1928 unsigned C1 = SA2->getZExtValue();
1929 unsigned Opc = ISD::SRL;
1930 int Diff = ShAmt - C1;
1931 if (Diff < 0) {
1932 Diff = -Diff;
1933 Opc = ISD::SHL;
1934 }
1935 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1936 return TLO.CombineTo(
1937 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1938 }
1939 }
1940 }
1941
1942 APInt InDemandedMask = (DemandedBits << ShAmt);
1943
1944 // If the shift is exact, then it does demand the low bits (and knows that
1945 // they are zero).
1946 if (Op->getFlags().hasExact())
1947 InDemandedMask.setLowBits(ShAmt);
1948
1949 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1950 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1951 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1953 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1954 if (isNarrowingProfitable(VT, HalfVT) &&
1955 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1956 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1957 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1958 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1959 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1960 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1961 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1962 ShAmt, HalfVT, dl, TLO.LegalTypes());
1963 SDValue NewShift =
1964 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1965 return TLO.CombineTo(
1966 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1967 }
1968 }
1969
1970 // Compute the new bits that are at the top now.
1971 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1972 Depth + 1))
1973 return true;
1974 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1975 Known.Zero.lshrInPlace(ShAmt);
1976 Known.One.lshrInPlace(ShAmt);
1977 // High bits known zero.
1978 Known.Zero.setHighBits(ShAmt);
1979
1980 // Attempt to avoid multi-use ops if we don't need anything from them.
1981 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1982 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1983 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1984 if (DemandedOp0) {
1985 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1986 return TLO.CombineTo(Op, NewOp);
1987 }
1988 }
1989 } else {
1990 // Use generic knownbits computation as it has support for non-uniform
1991 // shift amounts.
1992 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1993 }
1994 break;
1995 }
1996 case ISD::SRA: {
1997 SDValue Op0 = Op.getOperand(0);
1998 SDValue Op1 = Op.getOperand(1);
1999 EVT ShiftVT = Op1.getValueType();
2000
2001 // If we only want bits that already match the signbit then we don't need
2002 // to shift.
2003 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
2004 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
2005 NumHiDemandedBits)
2006 return TLO.CombineTo(Op, Op0);
2007
2008 // If this is an arithmetic shift right and only the low-bit is set, we can
2009 // always convert this into a logical shr, even if the shift amount is
2010 // variable. The low bit of the shift cannot be an input sign bit unless
2011 // the shift amount is >= the size of the datatype, which is undefined.
2012 if (DemandedBits.isOne())
2013 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2014
2015 // Try to match AVG patterns.
2016 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2017 DemandedElts, Depth + 1))
2018 return TLO.CombineTo(Op, AVG);
2019
2020 if (const APInt *SA =
2021 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2022 unsigned ShAmt = SA->getZExtValue();
2023 if (ShAmt == 0)
2024 return TLO.CombineTo(Op, Op0);
2025
2026 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2027 // supports sext_inreg.
2028 if (Op0.getOpcode() == ISD::SHL) {
2029 if (const APInt *InnerSA =
2030 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2031 unsigned LowBits = BitWidth - ShAmt;
2032 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2033 if (VT.isVector())
2034 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2036
2037 if (*InnerSA == ShAmt) {
2038 if (!TLO.LegalOperations() ||
2040 return TLO.CombineTo(
2041 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2042 Op0.getOperand(0),
2043 TLO.DAG.getValueType(ExtVT)));
2044
2045 // Even if we can't convert to sext_inreg, we might be able to
2046 // remove this shift pair if the input is already sign extended.
2047 unsigned NumSignBits =
2048 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2049 if (NumSignBits > ShAmt)
2050 return TLO.CombineTo(Op, Op0.getOperand(0));
2051 }
2052 }
2053 }
2054
2055 APInt InDemandedMask = (DemandedBits << ShAmt);
2056
2057 // If the shift is exact, then it does demand the low bits (and knows that
2058 // they are zero).
2059 if (Op->getFlags().hasExact())
2060 InDemandedMask.setLowBits(ShAmt);
2061
2062 // If any of the demanded bits are produced by the sign extension, we also
2063 // demand the input sign bit.
2064 if (DemandedBits.countl_zero() < ShAmt)
2065 InDemandedMask.setSignBit();
2066
2067 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2068 Depth + 1))
2069 return true;
2070 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2071 Known.Zero.lshrInPlace(ShAmt);
2072 Known.One.lshrInPlace(ShAmt);
2073
2074 // If the input sign bit is known to be zero, or if none of the top bits
2075 // are demanded, turn this into an unsigned shift right.
2076 if (Known.Zero[BitWidth - ShAmt - 1] ||
2077 DemandedBits.countl_zero() >= ShAmt) {
2078 SDNodeFlags Flags;
2079 Flags.setExact(Op->getFlags().hasExact());
2080 return TLO.CombineTo(
2081 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2082 }
2083
2084 int Log2 = DemandedBits.exactLogBase2();
2085 if (Log2 >= 0) {
2086 // The bit must come from the sign.
2087 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2088 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2089 }
2090
2091 if (Known.One[BitWidth - ShAmt - 1])
2092 // New bits are known one.
2093 Known.One.setHighBits(ShAmt);
2094
2095 // Attempt to avoid multi-use ops if we don't need anything from them.
2096 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2097 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2098 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2099 if (DemandedOp0) {
2100 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2101 return TLO.CombineTo(Op, NewOp);
2102 }
2103 }
2104 }
2105 break;
2106 }
2107 case ISD::FSHL:
2108 case ISD::FSHR: {
2109 SDValue Op0 = Op.getOperand(0);
2110 SDValue Op1 = Op.getOperand(1);
2111 SDValue Op2 = Op.getOperand(2);
2112 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2113
2114 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2115 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2116
2117 // For fshl, 0-shift returns the 1st arg.
2118 // For fshr, 0-shift returns the 2nd arg.
2119 if (Amt == 0) {
2120 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2121 Known, TLO, Depth + 1))
2122 return true;
2123 break;
2124 }
2125
2126 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2127 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2128 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2129 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2130 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2131 Depth + 1))
2132 return true;
2133 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2134 Depth + 1))
2135 return true;
2136
2137 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2138 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2139 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2140 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2141 Known = Known.unionWith(Known2);
2142
2143 // Attempt to avoid multi-use ops if we don't need anything from them.
2144 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2145 !DemandedElts.isAllOnes()) {
2146 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2147 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2148 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2149 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2150 if (DemandedOp0 || DemandedOp1) {
2151 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2152 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2153 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2154 DemandedOp1, Op2);
2155 return TLO.CombineTo(Op, NewOp);
2156 }
2157 }
2158 }
2159
2160 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2161 if (isPowerOf2_32(BitWidth)) {
2162 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2163 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2164 Known2, TLO, Depth + 1))
2165 return true;
2166 }
2167 break;
2168 }
2169 case ISD::ROTL:
2170 case ISD::ROTR: {
2171 SDValue Op0 = Op.getOperand(0);
2172 SDValue Op1 = Op.getOperand(1);
2173 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2174
2175 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2176 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2177 return TLO.CombineTo(Op, Op0);
2178
2179 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2180 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2181 unsigned RevAmt = BitWidth - Amt;
2182
2183 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2184 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2185 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2186 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2187 Depth + 1))
2188 return true;
2189
2190 // rot*(x, 0) --> x
2191 if (Amt == 0)
2192 return TLO.CombineTo(Op, Op0);
2193
2194 // See if we don't demand either half of the rotated bits.
2195 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2196 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2197 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2198 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2199 }
2200 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2201 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2202 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2203 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2204 }
2205 }
2206
2207 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2208 if (isPowerOf2_32(BitWidth)) {
2209 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2210 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2211 Depth + 1))
2212 return true;
2213 }
2214 break;
2215 }
2216 case ISD::SMIN:
2217 case ISD::SMAX:
2218 case ISD::UMIN:
2219 case ISD::UMAX: {
2220 unsigned Opc = Op.getOpcode();
2221 SDValue Op0 = Op.getOperand(0);
2222 SDValue Op1 = Op.getOperand(1);
2223
2224 // If we're only demanding signbits, then we can simplify to OR/AND node.
2225 unsigned BitOp =
2226 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2227 unsigned NumSignBits =
2228 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2229 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2230 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2231 if (NumSignBits >= NumDemandedUpperBits)
2232 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2233
2234 // Check if one arg is always less/greater than (or equal) to the other arg.
2235 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2236 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2237 switch (Opc) {
2238 case ISD::SMIN:
2239 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2240 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2241 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2242 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2243 Known = KnownBits::smin(Known0, Known1);
2244 break;
2245 case ISD::SMAX:
2246 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2247 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2248 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2249 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2250 Known = KnownBits::smax(Known0, Known1);
2251 break;
2252 case ISD::UMIN:
2253 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2254 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2255 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2256 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2257 Known = KnownBits::umin(Known0, Known1);
2258 break;
2259 case ISD::UMAX:
2260 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2261 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2262 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2263 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2264 Known = KnownBits::umax(Known0, Known1);
2265 break;
2266 }
2267 break;
2268 }
2269 case ISD::BITREVERSE: {
2270 SDValue Src = Op.getOperand(0);
2271 APInt DemandedSrcBits = DemandedBits.reverseBits();
2272 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2273 Depth + 1))
2274 return true;
2275 Known.One = Known2.One.reverseBits();
2276 Known.Zero = Known2.Zero.reverseBits();
2277 break;
2278 }
2279 case ISD::BSWAP: {
2280 SDValue Src = Op.getOperand(0);
2281
2282 // If the only bits demanded come from one byte of the bswap result,
2283 // just shift the input byte into position to eliminate the bswap.
2284 unsigned NLZ = DemandedBits.countl_zero();
2285 unsigned NTZ = DemandedBits.countr_zero();
2286
2287 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2288 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2289 // have 14 leading zeros, round to 8.
2290 NLZ = alignDown(NLZ, 8);
2291 NTZ = alignDown(NTZ, 8);
2292 // If we need exactly one byte, we can do this transformation.
2293 if (BitWidth - NLZ - NTZ == 8) {
2294 // Replace this with either a left or right shift to get the byte into
2295 // the right place.
2296 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2297 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2298 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2299 SDValue ShAmt = TLO.DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
2300 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2301 return TLO.CombineTo(Op, NewOp);
2302 }
2303 }
2304
2305 APInt DemandedSrcBits = DemandedBits.byteSwap();
2306 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2307 Depth + 1))
2308 return true;
2309 Known.One = Known2.One.byteSwap();
2310 Known.Zero = Known2.Zero.byteSwap();
2311 break;
2312 }
2313 case ISD::CTPOP: {
2314 // If only 1 bit is demanded, replace with PARITY as long as we're before
2315 // op legalization.
2316 // FIXME: Limit to scalars for now.
2317 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2318 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2319 Op.getOperand(0)));
2320
2321 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2322 break;
2323 }
2325 SDValue Op0 = Op.getOperand(0);
2326 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2327 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2328
2329 // If we only care about the highest bit, don't bother shifting right.
2330 if (DemandedBits.isSignMask()) {
2331 unsigned MinSignedBits =
2332 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2333 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2334 // However if the input is already sign extended we expect the sign
2335 // extension to be dropped altogether later and do not simplify.
2336 if (!AlreadySignExtended) {
2337 // Compute the correct shift amount type, which must be getShiftAmountTy
2338 // for scalar types after legalization.
2339 SDValue ShiftAmt =
2340 TLO.DAG.getShiftAmountConstant(BitWidth - ExVTBits, VT, dl);
2341 return TLO.CombineTo(Op,
2342 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2343 }
2344 }
2345
2346 // If none of the extended bits are demanded, eliminate the sextinreg.
2347 if (DemandedBits.getActiveBits() <= ExVTBits)
2348 return TLO.CombineTo(Op, Op0);
2349
2350 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2351
2352 // Since the sign extended bits are demanded, we know that the sign
2353 // bit is demanded.
2354 InputDemandedBits.setBit(ExVTBits - 1);
2355
2356 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2357 Depth + 1))
2358 return true;
2359 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2360
2361 // If the sign bit of the input is known set or clear, then we know the
2362 // top bits of the result.
2363
2364 // If the input sign bit is known zero, convert this into a zero extension.
2365 if (Known.Zero[ExVTBits - 1])
2366 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2367
2368 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2369 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2370 Known.One.setBitsFrom(ExVTBits);
2371 Known.Zero &= Mask;
2372 } else { // Input sign bit unknown
2373 Known.Zero &= Mask;
2374 Known.One &= Mask;
2375 }
2376 break;
2377 }
2378 case ISD::BUILD_PAIR: {
2379 EVT HalfVT = Op.getOperand(0).getValueType();
2380 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2381
2382 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2383 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2384
2385 KnownBits KnownLo, KnownHi;
2386
2387 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2388 return true;
2389
2390 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2391 return true;
2392
2393 Known = KnownHi.concat(KnownLo);
2394 break;
2395 }
2397 if (VT.isScalableVector())
2398 return false;
2399 [[fallthrough]];
2400 case ISD::ZERO_EXTEND: {
2401 SDValue Src = Op.getOperand(0);
2402 EVT SrcVT = Src.getValueType();
2403 unsigned InBits = SrcVT.getScalarSizeInBits();
2404 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2405 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2406
2407 // If none of the top bits are demanded, convert this into an any_extend.
2408 if (DemandedBits.getActiveBits() <= InBits) {
2409 // If we only need the non-extended bits of the bottom element
2410 // then we can just bitcast to the result.
2411 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2412 VT.getSizeInBits() == SrcVT.getSizeInBits())
2413 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2414
2415 unsigned Opc =
2417 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2418 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2419 }
2420
2421 SDNodeFlags Flags = Op->getFlags();
2422 APInt InDemandedBits = DemandedBits.trunc(InBits);
2423 APInt InDemandedElts = DemandedElts.zext(InElts);
2424 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2425 Depth + 1)) {
2426 if (Flags.hasNonNeg()) {
2427 Flags.setNonNeg(false);
2428 Op->setFlags(Flags);
2429 }
2430 return true;
2431 }
2432 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2433 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2434 Known = Known.zext(BitWidth);
2435
2436 // Attempt to avoid multi-use ops if we don't need anything from them.
2437 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2438 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2439 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2440 break;
2441 }
2443 if (VT.isScalableVector())
2444 return false;
2445 [[fallthrough]];
2446 case ISD::SIGN_EXTEND: {
2447 SDValue Src = Op.getOperand(0);
2448 EVT SrcVT = Src.getValueType();
2449 unsigned InBits = SrcVT.getScalarSizeInBits();
2450 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2451 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2452
2453 APInt InDemandedElts = DemandedElts.zext(InElts);
2454 APInt InDemandedBits = DemandedBits.trunc(InBits);
2455
2456 // Since some of the sign extended bits are demanded, we know that the sign
2457 // bit is demanded.
2458 InDemandedBits.setBit(InBits - 1);
2459
2460 // If none of the top bits are demanded, convert this into an any_extend.
2461 if (DemandedBits.getActiveBits() <= InBits) {
2462 // If we only need the non-extended bits of the bottom element
2463 // then we can just bitcast to the result.
2464 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2465 VT.getSizeInBits() == SrcVT.getSizeInBits())
2466 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2467
2468 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2470 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2471 InBits) {
2472 unsigned Opc =
2474 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2475 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2476 }
2477 }
2478
2479 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2480 Depth + 1))
2481 return true;
2482 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2483 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2484
2485 // If the sign bit is known one, the top bits match.
2486 Known = Known.sext(BitWidth);
2487
2488 // If the sign bit is known zero, convert this to a zero extend.
2489 if (Known.isNonNegative()) {
2490 unsigned Opc =
2492 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2493 SDNodeFlags Flags;
2494 if (!IsVecInReg)
2495 Flags.setNonNeg(true);
2496 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2497 }
2498 }
2499
2500 // Attempt to avoid multi-use ops if we don't need anything from them.
2501 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2502 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2503 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2504 break;
2505 }
2507 if (VT.isScalableVector())
2508 return false;
2509 [[fallthrough]];
2510 case ISD::ANY_EXTEND: {
2511 SDValue Src = Op.getOperand(0);
2512 EVT SrcVT = Src.getValueType();
2513 unsigned InBits = SrcVT.getScalarSizeInBits();
2514 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2515 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2516
2517 // If we only need the bottom element then we can just bitcast.
2518 // TODO: Handle ANY_EXTEND?
2519 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2520 VT.getSizeInBits() == SrcVT.getSizeInBits())
2521 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2522
2523 APInt InDemandedBits = DemandedBits.trunc(InBits);
2524 APInt InDemandedElts = DemandedElts.zext(InElts);
2525 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2526 Depth + 1))
2527 return true;
2528 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2529 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2530 Known = Known.anyext(BitWidth);
2531
2532 // Attempt to avoid multi-use ops if we don't need anything from them.
2533 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2534 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2535 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2536 break;
2537 }
2538 case ISD::TRUNCATE: {
2539 SDValue Src = Op.getOperand(0);
2540
2541 // Simplify the input, using demanded bit information, and compute the known
2542 // zero/one bits live out.
2543 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2544 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2545 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2546 Depth + 1))
2547 return true;
2548 Known = Known.trunc(BitWidth);
2549
2550 // Attempt to avoid multi-use ops if we don't need anything from them.
2551 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2552 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2553 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2554
2555 // If the input is only used by this truncate, see if we can shrink it based
2556 // on the known demanded bits.
2557 switch (Src.getOpcode()) {
2558 default:
2559 break;
2560 case ISD::SRL:
2561 // Shrink SRL by a constant if none of the high bits shifted in are
2562 // demanded.
2563 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2564 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2565 // undesirable.
2566 break;
2567
2568 if (Src.getNode()->hasOneUse()) {
2569 const APInt *ShAmtC =
2570 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2571 if (!ShAmtC || ShAmtC->uge(BitWidth))
2572 break;
2573 uint64_t ShVal = ShAmtC->getZExtValue();
2574
2575 APInt HighBits =
2576 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2577 HighBits.lshrInPlace(ShVal);
2578 HighBits = HighBits.trunc(BitWidth);
2579
2580 if (!(HighBits & DemandedBits)) {
2581 // None of the shifted in bits are needed. Add a truncate of the
2582 // shift input, then shift it.
2583 SDValue NewShAmt =
2584 TLO.DAG.getShiftAmountConstant(ShVal, VT, dl, TLO.LegalTypes());
2585 SDValue NewTrunc =
2586 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2587 return TLO.CombineTo(
2588 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2589 }
2590 }
2591 break;
2592 }
2593
2594 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2595 break;
2596 }
2597 case ISD::AssertZext: {
2598 // AssertZext demands all of the high bits, plus any of the low bits
2599 // demanded by its users.
2600 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2602 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2603 TLO, Depth + 1))
2604 return true;
2605 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2606
2607 Known.Zero |= ~InMask;
2608 Known.One &= (~Known.Zero);
2609 break;
2610 }
2612 SDValue Src = Op.getOperand(0);
2613 SDValue Idx = Op.getOperand(1);
2614 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2615 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2616
2617 if (SrcEltCnt.isScalable())
2618 return false;
2619
2620 // Demand the bits from every vector element without a constant index.
2621 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2622 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2623 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2624 if (CIdx->getAPIntValue().ult(NumSrcElts))
2625 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2626
2627 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2628 // anything about the extended bits.
2629 APInt DemandedSrcBits = DemandedBits;
2630 if (BitWidth > EltBitWidth)
2631 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2632
2633 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2634 Depth + 1))
2635 return true;
2636
2637 // Attempt to avoid multi-use ops if we don't need anything from them.
2638 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2639 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2640 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2641 SDValue NewOp =
2642 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2643 return TLO.CombineTo(Op, NewOp);
2644 }
2645 }
2646
2647 Known = Known2;
2648 if (BitWidth > EltBitWidth)
2649 Known = Known.anyext(BitWidth);
2650 break;
2651 }
2652 case ISD::BITCAST: {
2653 if (VT.isScalableVector())
2654 return false;
2655 SDValue Src = Op.getOperand(0);
2656 EVT SrcVT = Src.getValueType();
2657 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2658
2659 // If this is an FP->Int bitcast and if the sign bit is the only
2660 // thing demanded, turn this into a FGETSIGN.
2661 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2662 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2663 SrcVT.isFloatingPoint()) {
2664 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2665 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2666 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2667 SrcVT != MVT::f128) {
2668 // Cannot eliminate/lower SHL for f128 yet.
2669 EVT Ty = OpVTLegal ? VT : MVT::i32;
2670 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2671 // place. We expect the SHL to be eliminated by other optimizations.
2672 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2673 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2674 if (!OpVTLegal && OpVTSizeInBits > 32)
2675 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2676 unsigned ShVal = Op.getValueSizeInBits() - 1;
2677 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2678 return TLO.CombineTo(Op,
2679 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2680 }
2681 }
2682
2683 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2684 // Demand the elt/bit if any of the original elts/bits are demanded.
2685 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2686 unsigned Scale = BitWidth / NumSrcEltBits;
2687 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2688 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2689 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2690 for (unsigned i = 0; i != Scale; ++i) {
2691 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2692 unsigned BitOffset = EltOffset * NumSrcEltBits;
2693 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2694 if (!Sub.isZero()) {
2695 DemandedSrcBits |= Sub;
2696 for (unsigned j = 0; j != NumElts; ++j)
2697 if (DemandedElts[j])
2698 DemandedSrcElts.setBit((j * Scale) + i);
2699 }
2700 }
2701
2702 APInt KnownSrcUndef, KnownSrcZero;
2703 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2704 KnownSrcZero, TLO, Depth + 1))
2705 return true;
2706
2707 KnownBits KnownSrcBits;
2708 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2709 KnownSrcBits, TLO, Depth + 1))
2710 return true;
2711 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2712 // TODO - bigendian once we have test coverage.
2713 unsigned Scale = NumSrcEltBits / BitWidth;
2714 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2715 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2716 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2717 for (unsigned i = 0; i != NumElts; ++i)
2718 if (DemandedElts[i]) {
2719 unsigned Offset = (i % Scale) * BitWidth;
2720 DemandedSrcBits.insertBits(DemandedBits, Offset);
2721 DemandedSrcElts.setBit(i / Scale);
2722 }
2723
2724 if (SrcVT.isVector()) {
2725 APInt KnownSrcUndef, KnownSrcZero;
2726 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2727 KnownSrcZero, TLO, Depth + 1))
2728 return true;
2729 }
2730
2731 KnownBits KnownSrcBits;
2732 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2733 KnownSrcBits, TLO, Depth + 1))
2734 return true;
2735
2736 // Attempt to avoid multi-use ops if we don't need anything from them.
2737 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2738 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2739 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2740 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2741 return TLO.CombineTo(Op, NewOp);
2742 }
2743 }
2744 }
2745
2746 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2747 // recursive call where Known may be useful to the caller.
2748 if (Depth > 0) {
2749 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2750 return false;
2751 }
2752 break;
2753 }
2754 case ISD::MUL:
2755 if (DemandedBits.isPowerOf2()) {
2756 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2757 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2758 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2759 unsigned CTZ = DemandedBits.countr_zero();
2760 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2761 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2762 SDValue AmtC = TLO.DAG.getShiftAmountConstant(CTZ, VT, dl);
2763 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2764 return TLO.CombineTo(Op, Shl);
2765 }
2766 }
2767 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2768 // X * X is odd iff X is odd.
2769 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2770 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2771 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2772 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2773 return TLO.CombineTo(Op, And1);
2774 }
2775 [[fallthrough]];
2776 case ISD::ADD:
2777 case ISD::SUB: {
2778 // Add, Sub, and Mul don't demand any bits in positions beyond that
2779 // of the highest bit demanded of them.
2780 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2781 SDNodeFlags Flags = Op.getNode()->getFlags();
2782 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2783 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2784 KnownBits KnownOp0, KnownOp1;
2785 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2786 Depth + 1) ||
2787 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2788 Depth + 1) ||
2789 // See if the operation should be performed at a smaller bit width.
2790 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2791 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2792 // Disable the nsw and nuw flags. We can no longer guarantee that we
2793 // won't wrap after simplification.
2794 Flags.setNoSignedWrap(false);
2795 Flags.setNoUnsignedWrap(false);
2796 Op->setFlags(Flags);
2797 }
2798 return true;
2799 }
2800
2801 // neg x with only low bit demanded is simply x.
2802 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2803 isNullConstant(Op0))
2804 return TLO.CombineTo(Op, Op1);
2805
2806 // Attempt to avoid multi-use ops if we don't need anything from them.
2807 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2808 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2809 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2810 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2811 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2812 if (DemandedOp0 || DemandedOp1) {
2813 Flags.setNoSignedWrap(false);
2814 Flags.setNoUnsignedWrap(false);
2815 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2816 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2817 SDValue NewOp =
2818 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2819 return TLO.CombineTo(Op, NewOp);
2820 }
2821 }
2822
2823 // If we have a constant operand, we may be able to turn it into -1 if we
2824 // do not demand the high bits. This can make the constant smaller to
2825 // encode, allow more general folding, or match specialized instruction
2826 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2827 // is probably not useful (and could be detrimental).
2829 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2830 if (C && !C->isAllOnes() && !C->isOne() &&
2831 (C->getAPIntValue() | HighMask).isAllOnes()) {
2832 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2833 // Disable the nsw and nuw flags. We can no longer guarantee that we
2834 // won't wrap after simplification.
2835 Flags.setNoSignedWrap(false);
2836 Flags.setNoUnsignedWrap(false);
2837 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2838 return TLO.CombineTo(Op, NewOp);
2839 }
2840
2841 // Match a multiply with a disguised negated-power-of-2 and convert to a
2842 // an equivalent shift-left amount.
2843 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2844 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2845 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2846 return 0;
2847
2848 // Don't touch opaque constants. Also, ignore zero and power-of-2
2849 // multiplies. Those will get folded later.
2850 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2851 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2852 !MulC->getAPIntValue().isPowerOf2()) {
2853 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2854 if (UnmaskedC.isNegatedPowerOf2())
2855 return (-UnmaskedC).logBase2();
2856 }
2857 return 0;
2858 };
2859
2860 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y,
2861 unsigned ShlAmt) {
2862 SDValue ShlAmtC = TLO.DAG.getShiftAmountConstant(ShlAmt, VT, dl);
2863 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2864 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2865 return TLO.CombineTo(Op, Res);
2866 };
2867
2869 if (Op.getOpcode() == ISD::ADD) {
2870 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2871 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2872 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2873 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2874 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2875 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2876 }
2877 if (Op.getOpcode() == ISD::SUB) {
2878 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2879 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2880 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2881 }
2882 }
2883
2884 if (Op.getOpcode() == ISD::MUL) {
2885 Known = KnownBits::mul(KnownOp0, KnownOp1);
2886 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2888 Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(),
2889 Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1);
2890 }
2891 break;
2892 }
2893 default:
2894 // We also ask the target about intrinsics (which could be specific to it).
2895 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2896 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2897 // TODO: Probably okay to remove after audit; here to reduce change size
2898 // in initial enablement patch for scalable vectors
2899 if (Op.getValueType().isScalableVector())
2900 break;
2901 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2902 Known, TLO, Depth))
2903 return true;
2904 break;
2905 }
2906
2907 // Just use computeKnownBits to compute output bits.
2908 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2909 break;
2910 }
2911
2912 // If we know the value of all of the demanded bits, return this as a
2913 // constant.
2914 if (!isTargetCanonicalConstantNode(Op) &&
2915 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2916 // Avoid folding to a constant if any OpaqueConstant is involved.
2917 const SDNode *N = Op.getNode();
2918 for (SDNode *Op :
2920 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2921 if (C->isOpaque())
2922 return false;
2923 }
2924 if (VT.isInteger())
2925 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2926 if (VT.isFloatingPoint())
2927 return TLO.CombineTo(
2928 Op,
2929 TLO.DAG.getConstantFP(
2930 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2931 }
2932
2933 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2934 // Try again just for the original demanded elts.
2935 // Ensure we do this AFTER constant folding above.
2936 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2937 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2938
2939 return false;
2940}
2941
2943 const APInt &DemandedElts,
2944 DAGCombinerInfo &DCI) const {
2945 SelectionDAG &DAG = DCI.DAG;
2946 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2947 !DCI.isBeforeLegalizeOps());
2948
2949 APInt KnownUndef, KnownZero;
2950 bool Simplified =
2951 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2952 if (Simplified) {
2953 DCI.AddToWorklist(Op.getNode());
2954 DCI.CommitTargetLoweringOpt(TLO);
2955 }
2956
2957 return Simplified;
2958}
2959
2960/// Given a vector binary operation and known undefined elements for each input
2961/// operand, compute whether each element of the output is undefined.
2963 const APInt &UndefOp0,
2964 const APInt &UndefOp1) {
2965 EVT VT = BO.getValueType();
2967 "Vector binop only");
2968
2969 EVT EltVT = VT.getVectorElementType();
2970 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2971 assert(UndefOp0.getBitWidth() == NumElts &&
2972 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2973
2974 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2975 const APInt &UndefVals) {
2976 if (UndefVals[Index])
2977 return DAG.getUNDEF(EltVT);
2978
2979 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2980 // Try hard to make sure that the getNode() call is not creating temporary
2981 // nodes. Ignore opaque integers because they do not constant fold.
2982 SDValue Elt = BV->getOperand(Index);
2983 auto *C = dyn_cast<ConstantSDNode>(Elt);
2984 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2985 return Elt;
2986 }
2987
2988 return SDValue();
2989 };
2990
2991 APInt KnownUndef = APInt::getZero(NumElts);
2992 for (unsigned i = 0; i != NumElts; ++i) {
2993 // If both inputs for this element are either constant or undef and match
2994 // the element type, compute the constant/undef result for this element of
2995 // the vector.
2996 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2997 // not handle FP constants. The code within getNode() should be refactored
2998 // to avoid the danger of creating a bogus temporary node here.
2999 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
3000 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
3001 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
3002 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
3003 KnownUndef.setBit(i);
3004 }
3005 return KnownUndef;
3006}
3007
3009 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3010 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3011 bool AssumeSingleUse) const {
3012 EVT VT = Op.getValueType();
3013 unsigned Opcode = Op.getOpcode();
3014 APInt DemandedElts = OriginalDemandedElts;
3015 unsigned NumElts = DemandedElts.getBitWidth();
3016 assert(VT.isVector() && "Expected vector op");
3017
3018 KnownUndef = KnownZero = APInt::getZero(NumElts);
3019
3020 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3021 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3022 return false;
3023
3024 // TODO: For now we assume we know nothing about scalable vectors.
3025 if (VT.isScalableVector())
3026 return false;
3027
3028 assert(VT.getVectorNumElements() == NumElts &&
3029 "Mask size mismatches value type element count!");
3030
3031 // Undef operand.
3032 if (Op.isUndef()) {
3033 KnownUndef.setAllBits();
3034 return false;
3035 }
3036
3037 // If Op has other users, assume that all elements are needed.
3038 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3039 DemandedElts.setAllBits();
3040
3041 // Not demanding any elements from Op.
3042 if (DemandedElts == 0) {
3043 KnownUndef.setAllBits();
3044 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3045 }
3046
3047 // Limit search depth.
3049 return false;
3050
3051 SDLoc DL(Op);
3052 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3053 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3054
3055 // Helper for demanding the specified elements and all the bits of both binary
3056 // operands.
3057 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3058 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3059 TLO.DAG, Depth + 1);
3060 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3061 TLO.DAG, Depth + 1);
3062 if (NewOp0 || NewOp1) {
3063 SDValue NewOp =
3064 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3065 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3066 return TLO.CombineTo(Op, NewOp);
3067 }
3068 return false;
3069 };
3070
3071 switch (Opcode) {
3072 case ISD::SCALAR_TO_VECTOR: {
3073 if (!DemandedElts[0]) {
3074 KnownUndef.setAllBits();
3075 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3076 }
3077 SDValue ScalarSrc = Op.getOperand(0);
3078 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3079 SDValue Src = ScalarSrc.getOperand(0);
3080 SDValue Idx = ScalarSrc.getOperand(1);
3081 EVT SrcVT = Src.getValueType();
3082
3083 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3084
3085 if (SrcEltCnt.isScalable())
3086 return false;
3087
3088 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3089 if (isNullConstant(Idx)) {
3090 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3091 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3092 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3093 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3094 TLO, Depth + 1))
3095 return true;
3096 }
3097 }
3098 KnownUndef.setHighBits(NumElts - 1);
3099 break;
3100 }
3101 case ISD::BITCAST: {
3102 SDValue Src = Op.getOperand(0);
3103 EVT SrcVT = Src.getValueType();
3104
3105 // We only handle vectors here.
3106 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3107 if (!SrcVT.isVector())
3108 break;
3109
3110 // Fast handling of 'identity' bitcasts.
3111 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3112 if (NumSrcElts == NumElts)
3113 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3114 KnownZero, TLO, Depth + 1);
3115
3116 APInt SrcDemandedElts, SrcZero, SrcUndef;
3117
3118 // Bitcast from 'large element' src vector to 'small element' vector, we
3119 // must demand a source element if any DemandedElt maps to it.
3120 if ((NumElts % NumSrcElts) == 0) {
3121 unsigned Scale = NumElts / NumSrcElts;
3122 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3123 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3124 TLO, Depth + 1))
3125 return true;
3126
3127 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3128 // of the large element.
3129 // TODO - bigendian once we have test coverage.
3130 if (IsLE) {
3131 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3132 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3133 for (unsigned i = 0; i != NumElts; ++i)
3134 if (DemandedElts[i]) {
3135 unsigned Ofs = (i % Scale) * EltSizeInBits;
3136 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3137 }
3138
3139 KnownBits Known;
3140 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3141 TLO, Depth + 1))
3142 return true;
3143
3144 // The bitcast has split each wide element into a number of
3145 // narrow subelements. We have just computed the Known bits
3146 // for wide elements. See if element splitting results in
3147 // some subelements being zero. Only for demanded elements!
3148 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3149 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3150 .isAllOnes())
3151 continue;
3152 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3153 unsigned Elt = Scale * SrcElt + SubElt;
3154 if (DemandedElts[Elt])
3155 KnownZero.setBit(Elt);
3156 }
3157 }
3158 }
3159
3160 // If the src element is zero/undef then all the output elements will be -
3161 // only demanded elements are guaranteed to be correct.
3162 for (unsigned i = 0; i != NumSrcElts; ++i) {
3163 if (SrcDemandedElts[i]) {
3164 if (SrcZero[i])
3165 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3166 if (SrcUndef[i])
3167 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3168 }
3169 }
3170 }
3171
3172 // Bitcast from 'small element' src vector to 'large element' vector, we
3173 // demand all smaller source elements covered by the larger demanded element
3174 // of this vector.
3175 if ((NumSrcElts % NumElts) == 0) {
3176 unsigned Scale = NumSrcElts / NumElts;
3177 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3178 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3179 TLO, Depth + 1))
3180 return true;
3181
3182 // If all the src elements covering an output element are zero/undef, then
3183 // the output element will be as well, assuming it was demanded.
3184 for (unsigned i = 0; i != NumElts; ++i) {
3185 if (DemandedElts[i]) {
3186 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3187 KnownZero.setBit(i);
3188 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3189 KnownUndef.setBit(i);
3190 }
3191 }
3192 }
3193 break;
3194 }
3195 case ISD::FREEZE: {
3196 SDValue N0 = Op.getOperand(0);
3197 if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts,
3198 /*PoisonOnly=*/false))
3199 return TLO.CombineTo(Op, N0);
3200
3201 // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE
3202 // freeze(op(x, ...)) -> op(freeze(x), ...).
3203 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1)
3204 return TLO.CombineTo(
3206 TLO.DAG.getFreeze(N0.getOperand(0))));
3207 break;
3208 }
3209 case ISD::BUILD_VECTOR: {
3210 // Check all elements and simplify any unused elements with UNDEF.
3211 if (!DemandedElts.isAllOnes()) {
3212 // Don't simplify BROADCASTS.
3213 if (llvm::any_of(Op->op_values(),
3214 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3215 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3216 bool Updated = false;
3217 for (unsigned i = 0; i != NumElts; ++i) {
3218 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3219 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3220 KnownUndef.setBit(i);
3221 Updated = true;
3222 }
3223 }
3224 if (Updated)
3225 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3226 }
3227 }
3228 for (unsigned i = 0; i != NumElts; ++i) {
3229 SDValue SrcOp = Op.getOperand(i);
3230 if (SrcOp.isUndef()) {
3231 KnownUndef.setBit(i);
3232 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3234 KnownZero.setBit(i);
3235 }
3236 }
3237 break;
3238 }
3239 case ISD::CONCAT_VECTORS: {
3240 EVT SubVT = Op.getOperand(0).getValueType();
3241 unsigned NumSubVecs = Op.getNumOperands();
3242 unsigned NumSubElts = SubVT.getVectorNumElements();
3243 for (unsigned i = 0; i != NumSubVecs; ++i) {
3244 SDValue SubOp = Op.getOperand(i);
3245 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3246 APInt SubUndef, SubZero;
3247 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3248 Depth + 1))
3249 return true;
3250 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3251 KnownZero.insertBits(SubZero, i * NumSubElts);
3252 }
3253
3254 // Attempt to avoid multi-use ops if we don't need anything from them.
3255 if (!DemandedElts.isAllOnes()) {
3256 bool FoundNewSub = false;
3257 SmallVector<SDValue, 2> DemandedSubOps;
3258 for (unsigned i = 0; i != NumSubVecs; ++i) {
3259 SDValue SubOp = Op.getOperand(i);
3260 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3261 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3262 SubOp, SubElts, TLO.DAG, Depth + 1);
3263 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3264 FoundNewSub = NewSubOp ? true : FoundNewSub;
3265 }
3266 if (FoundNewSub) {
3267 SDValue NewOp =
3268 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3269 return TLO.CombineTo(Op, NewOp);
3270 }
3271 }
3272 break;
3273 }
3274 case ISD::INSERT_SUBVECTOR: {
3275 // Demand any elements from the subvector and the remainder from the src its
3276 // inserted into.
3277 SDValue Src = Op.getOperand(0);
3278 SDValue Sub = Op.getOperand(1);
3279 uint64_t Idx = Op.getConstantOperandVal(2);
3280 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3281 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3282 APInt DemandedSrcElts = DemandedElts;
3283 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3284
3285 APInt SubUndef, SubZero;
3286 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3287 Depth + 1))
3288 return true;
3289
3290 // If none of the src operand elements are demanded, replace it with undef.
3291 if (!DemandedSrcElts && !Src.isUndef())
3292 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3293 TLO.DAG.getUNDEF(VT), Sub,
3294 Op.getOperand(2)));
3295
3296 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3297 TLO, Depth + 1))
3298 return true;
3299 KnownUndef.insertBits(SubUndef, Idx);
3300 KnownZero.insertBits(SubZero, Idx);
3301
3302 // Attempt to avoid multi-use ops if we don't need anything from them.
3303 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3304 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3305 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3306 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3307 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3308 if (NewSrc || NewSub) {
3309 NewSrc = NewSrc ? NewSrc : Src;
3310 NewSub = NewSub ? NewSub : Sub;
3311 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3312 NewSub, Op.getOperand(2));
3313 return TLO.CombineTo(Op, NewOp);
3314 }
3315 }
3316 break;
3317 }
3319 // Offset the demanded elts by the subvector index.
3320 SDValue Src = Op.getOperand(0);
3321 if (Src.getValueType().isScalableVector())
3322 break;
3323 uint64_t Idx = Op.getConstantOperandVal(1);
3324 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3325 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3326
3327 APInt SrcUndef, SrcZero;
3328 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3329 Depth + 1))
3330 return true;
3331 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3332 KnownZero = SrcZero.extractBits(NumElts, Idx);
3333
3334 // Attempt to avoid multi-use ops if we don't need anything from them.
3335 if (!DemandedElts.isAllOnes()) {
3336 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3337 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3338 if (NewSrc) {
3339 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3340 Op.getOperand(1));
3341 return TLO.CombineTo(Op, NewOp);
3342 }
3343 }
3344 break;
3345 }
3347 SDValue Vec = Op.getOperand(0);
3348 SDValue Scl = Op.getOperand(1);
3349 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3350
3351 // For a legal, constant insertion index, if we don't need this insertion
3352 // then strip it, else remove it from the demanded elts.
3353 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3354 unsigned Idx = CIdx->getZExtValue();
3355 if (!DemandedElts[Idx])
3356 return TLO.CombineTo(Op, Vec);
3357
3358 APInt DemandedVecElts(DemandedElts);
3359 DemandedVecElts.clearBit(Idx);
3360 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3361 KnownZero, TLO, Depth + 1))
3362 return true;
3363
3364 KnownUndef.setBitVal(Idx, Scl.isUndef());
3365
3366 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3367 break;
3368 }
3369
3370 APInt VecUndef, VecZero;
3371 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3372 Depth + 1))
3373 return true;
3374 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3375 break;
3376 }
3377 case ISD::VSELECT: {
3378 SDValue Sel = Op.getOperand(0);
3379 SDValue LHS = Op.getOperand(1);
3380 SDValue RHS = Op.getOperand(2);
3381
3382 // Try to transform the select condition based on the current demanded
3383 // elements.
3384 APInt UndefSel, ZeroSel;
3385 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3386 Depth + 1))
3387 return true;
3388
3389 // See if we can simplify either vselect operand.
3390 APInt DemandedLHS(DemandedElts);
3391 APInt DemandedRHS(DemandedElts);
3392 APInt UndefLHS, ZeroLHS;
3393 APInt UndefRHS, ZeroRHS;
3394 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3395 Depth + 1))
3396 return true;
3397 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3398 Depth + 1))
3399 return true;
3400
3401 KnownUndef = UndefLHS & UndefRHS;
3402 KnownZero = ZeroLHS & ZeroRHS;
3403
3404 // If we know that the selected element is always zero, we don't need the
3405 // select value element.
3406 APInt DemandedSel = DemandedElts & ~KnownZero;
3407 if (DemandedSel != DemandedElts)
3408 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3409 Depth + 1))
3410 return true;
3411
3412 break;
3413 }
3414 case ISD::VECTOR_SHUFFLE: {
3415 SDValue LHS = Op.getOperand(0);
3416 SDValue RHS = Op.getOperand(1);
3417 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3418
3419 // Collect demanded elements from shuffle operands..
3420 APInt DemandedLHS(NumElts, 0);
3421 APInt DemandedRHS(NumElts, 0);
3422 for (unsigned i = 0; i != NumElts; ++i) {
3423 int M = ShuffleMask[i];
3424 if (M < 0 || !DemandedElts[i])
3425 continue;
3426 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3427 if (M < (int)NumElts)
3428 DemandedLHS.setBit(M);
3429 else
3430 DemandedRHS.setBit(M - NumElts);
3431 }
3432
3433 // See if we can simplify either shuffle operand.
3434 APInt UndefLHS, ZeroLHS;
3435 APInt UndefRHS, ZeroRHS;
3436 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3437 Depth + 1))
3438 return true;
3439 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3440 Depth + 1))
3441 return true;
3442
3443 // Simplify mask using undef elements from LHS/RHS.
3444 bool Updated = false;
3445 bool IdentityLHS = true, IdentityRHS = true;
3446 SmallVector<int, 32> NewMask(ShuffleMask);
3447 for (unsigned i = 0; i != NumElts; ++i) {
3448 int &M = NewMask[i];
3449 if (M < 0)
3450 continue;
3451 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3452 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3453 Updated = true;
3454 M = -1;
3455 }
3456 IdentityLHS &= (M < 0) || (M == (int)i);
3457 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3458 }
3459
3460 // Update legal shuffle masks based on demanded elements if it won't reduce
3461 // to Identity which can cause premature removal of the shuffle mask.
3462 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3463 SDValue LegalShuffle =
3464 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3465 if (LegalShuffle)
3466 return TLO.CombineTo(Op, LegalShuffle);
3467 }
3468
3469 // Propagate undef/zero elements from LHS/RHS.
3470 for (unsigned i = 0; i != NumElts; ++i) {
3471 int M = ShuffleMask[i];
3472 if (M < 0) {
3473 KnownUndef.setBit(i);
3474 } else if (M < (int)NumElts) {
3475 if (UndefLHS[M])
3476 KnownUndef.setBit(i);
3477 if (ZeroLHS[M])
3478 KnownZero.setBit(i);
3479 } else {
3480 if (UndefRHS[M - NumElts])
3481 KnownUndef.setBit(i);
3482 if (ZeroRHS[M - NumElts])
3483 KnownZero.setBit(i);
3484 }
3485 }
3486 break;
3487 }
3491 APInt SrcUndef, SrcZero;
3492 SDValue Src = Op.getOperand(0);
3493 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3494 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3495 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3496 Depth + 1))
3497 return true;
3498 KnownZero = SrcZero.zextOrTrunc(NumElts);
3499 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3500
3501 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3502 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3503 DemandedSrcElts == 1) {
3504 // aext - if we just need the bottom element then we can bitcast.
3505 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3506 }
3507
3508 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3509 // zext(undef) upper bits are guaranteed to be zero.
3510 if (DemandedElts.isSubsetOf(KnownUndef))
3511 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3512 KnownUndef.clearAllBits();
3513
3514 // zext - if we just need the bottom element then we can mask:
3515 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3516 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3517 Op->isOnlyUserOf(Src.getNode()) &&
3518 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3519 SDLoc DL(Op);
3520 EVT SrcVT = Src.getValueType();
3521 EVT SrcSVT = SrcVT.getScalarType();
3522 SmallVector<SDValue> MaskElts;
3523 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3524 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3525 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3526 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3527 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3528 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3529 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3530 }
3531 }
3532 }
3533 break;
3534 }
3535
3536 // TODO: There are more binop opcodes that could be handled here - MIN,
3537 // MAX, saturated math, etc.
3538 case ISD::ADD: {
3539 SDValue Op0 = Op.getOperand(0);
3540 SDValue Op1 = Op.getOperand(1);
3541 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3542 APInt UndefLHS, ZeroLHS;
3543 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3544 Depth + 1, /*AssumeSingleUse*/ true))
3545 return true;
3546 }
3547 [[fallthrough]];
3548 }
3549 case ISD::AVGCEILS:
3550 case ISD::AVGCEILU:
3551 case ISD::AVGFLOORS:
3552 case ISD::AVGFLOORU:
3553 case ISD::OR:
3554 case ISD::XOR:
3555 case ISD::SUB:
3556 case ISD::FADD:
3557 case ISD::FSUB:
3558 case ISD::FMUL:
3559 case ISD::FDIV:
3560 case ISD::FREM: {
3561 SDValue Op0 = Op.getOperand(0);
3562 SDValue Op1 = Op.getOperand(1);
3563
3564 APInt UndefRHS, ZeroRHS;
3565 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3566 Depth + 1))
3567 return true;
3568 APInt UndefLHS, ZeroLHS;
3569 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3570 Depth + 1))
3571 return true;
3572
3573 KnownZero = ZeroLHS & ZeroRHS;
3574 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3575
3576 // Attempt to avoid multi-use ops if we don't need anything from them.
3577 // TODO - use KnownUndef to relax the demandedelts?
3578 if (!DemandedElts.isAllOnes())
3579 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3580 return true;
3581 break;
3582 }
3583 case ISD::SHL:
3584 case ISD::SRL:
3585 case ISD::SRA:
3586 case ISD::ROTL:
3587 case ISD::ROTR: {
3588 SDValue Op0 = Op.getOperand(0);
3589 SDValue Op1 = Op.getOperand(1);
3590
3591 APInt UndefRHS, ZeroRHS;
3592 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3593 Depth + 1))
3594 return true;
3595 APInt UndefLHS, ZeroLHS;
3596 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3597 Depth + 1))
3598 return true;
3599
3600 KnownZero = ZeroLHS;
3601 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3602
3603 // Attempt to avoid multi-use ops if we don't need anything from them.
3604 // TODO - use KnownUndef to relax the demandedelts?
3605 if (!DemandedElts.isAllOnes())
3606 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3607 return true;
3608 break;
3609 }
3610 case ISD::MUL:
3611 case ISD::MULHU:
3612 case ISD::MULHS:
3613 case ISD::AND: {
3614 SDValue Op0 = Op.getOperand(0);
3615 SDValue Op1 = Op.getOperand(1);
3616
3617 APInt SrcUndef, SrcZero;
3618 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3619 Depth + 1))
3620 return true;
3621 // If we know that a demanded element was zero in Op1 we don't need to
3622 // demand it in Op0 - its guaranteed to be zero.
3623 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3624 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3625 TLO, Depth + 1))
3626 return true;
3627
3628 KnownUndef &= DemandedElts0;
3629 KnownZero &= DemandedElts0;
3630
3631 // If every element pair has a zero/undef then just fold to zero.
3632 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3633 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3634 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3635 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3636
3637 // If either side has a zero element, then the result element is zero, even
3638 // if the other is an UNDEF.
3639 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3640 // and then handle 'and' nodes with the rest of the binop opcodes.
3641 KnownZero |= SrcZero;
3642 KnownUndef &= SrcUndef;
3643 KnownUndef &= ~KnownZero;
3644
3645 // Attempt to avoid multi-use ops if we don't need anything from them.
3646 if (!DemandedElts.isAllOnes())
3647 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3648 return true;
3649 break;
3650 }
3651 case ISD::TRUNCATE:
3652 case ISD::SIGN_EXTEND:
3653 case ISD::ZERO_EXTEND:
3654 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3655 KnownZero, TLO, Depth + 1))
3656 return true;
3657
3658 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3659 // zext(undef) upper bits are guaranteed to be zero.
3660 if (DemandedElts.isSubsetOf(KnownUndef))
3661 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3662 KnownUndef.clearAllBits();
3663 }
3664 break;
3665 default: {
3666 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3667 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3668 KnownZero, TLO, Depth))
3669 return true;
3670 } else {
3671 KnownBits Known;
3672 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3673 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3674 TLO, Depth, AssumeSingleUse))
3675 return true;
3676 }
3677 break;
3678 }
3679 }
3680 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3681
3682 // Constant fold all undef cases.
3683 // TODO: Handle zero cases as well.
3684 if (DemandedElts.isSubsetOf(KnownUndef))
3685 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3686
3687 return false;
3688}
3689
3690/// Determine which of the bits specified in Mask are known to be either zero or
3691/// one and return them in the Known.
3693 KnownBits &Known,
3694 const APInt &DemandedElts,
3695 const SelectionDAG &DAG,
3696 unsigned Depth) const {
3697 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3698 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3699 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3700 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3701 "Should use MaskedValueIsZero if you don't know whether Op"
3702 " is a target node!");
3703 Known.resetAll();
3704}
3705
3708 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3709 unsigned Depth) const {
3710 Known.resetAll();
3711}
3712
3714 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3715 // The low bits are known zero if the pointer is aligned.
3716 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3717}
3718
3721 unsigned Depth) const {
3722 return Align(1);
3723}
3724
3725/// This method can be implemented by targets that want to expose additional
3726/// information about sign bits to the DAG Combiner.
3728 const APInt &,
3729 const SelectionDAG &,
3730 unsigned Depth) const {
3731 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3732 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3733 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3734 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3735 "Should use ComputeNumSignBits if you don't know whether Op"
3736 " is a target node!");
3737 return 1;
3738}
3739
3741 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3742 const MachineRegisterInfo &MRI, unsigned Depth) const {
3743 return 1;
3744}
3745
3747 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3748 TargetLoweringOpt &TLO, unsigned Depth) const {
3749 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3753 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3754 " is a target node!");
3755 return false;
3756}
3757
3759 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3760 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3761 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3762 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3763 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3764 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3765 "Should use SimplifyDemandedBits if you don't know whether Op"
3766 " is a target node!");
3767 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3768 return false;
3769}
3770
3772 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3773 SelectionDAG &DAG, unsigned Depth) const {
3774 assert(
3775 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3776 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3777 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3778 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3779 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3780 " is a target node!");
3781 return SDValue();
3782}
3783
3784SDValue
3787 SelectionDAG &DAG) const {
3788 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3789 if (!LegalMask) {
3790 std::swap(N0, N1);
3792 LegalMask = isShuffleMaskLegal(Mask, VT);
3793 }
3794
3795 if (!LegalMask)
3796 return SDValue();
3797
3798 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3799}
3800
3802 return nullptr;
3803}
3804
3806 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3807 bool PoisonOnly, unsigned Depth) const {
3808 assert(
3809 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3810 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3811 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3812 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3813 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3814 " is a target node!");
3815
3816 // If Op can't create undef/poison and none of its operands are undef/poison
3817 // then Op is never undef/poison.
3818 return !canCreateUndefOrPoisonForTargetNode(Op, DemandedElts, DAG, PoisonOnly,
3819 /*ConsiderFlags*/ true, Depth) &&
3820 all_of(Op->ops(), [&](SDValue V) {
3821 return DAG.isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly,
3822 Depth + 1);
3823 });
3824}
3825
3827 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3828 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3829 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3830 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3831 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3832 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3833 "Should use canCreateUndefOrPoison if you don't know whether Op"
3834 " is a target node!");
3835 // Be conservative and return true.
3836 return true;
3837}
3838
3840 const SelectionDAG &DAG,
3841 bool SNaN,
3842 unsigned Depth) const {
3843 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3844 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3845 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3846 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3847 "Should use isKnownNeverNaN if you don't know whether Op"
3848 " is a target node!");
3849 return false;
3850}
3851
3853 const APInt &DemandedElts,
3854 APInt &UndefElts,
3855 const SelectionDAG &DAG,
3856 unsigned Depth) const {
3857 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3858 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3859 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3860 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3861 "Should use isSplatValue if you don't know whether Op"
3862 " is a target node!");
3863 return false;
3864}
3865
3866// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3867// work with truncating build vectors and vectors with elements of less than
3868// 8 bits.
3870 if (!N)
3871 return false;
3872
3873 unsigned EltWidth;
3874 APInt CVal;
3875 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3876 /*AllowTruncation=*/true)) {
3877 CVal = CN->getAPIntValue();
3878 EltWidth = N.getValueType().getScalarSizeInBits();
3879 } else
3880 return false;
3881
3882 // If this is a truncating splat, truncate the splat value.
3883 // Otherwise, we may fail to match the expected values below.
3884 if (EltWidth < CVal.getBitWidth())
3885 CVal = CVal.trunc(EltWidth);
3886
3887 switch (getBooleanContents(N.getValueType())) {
3889 return CVal[0];
3891 return CVal.isOne();
3893 return CVal.isAllOnes();
3894 }
3895
3896 llvm_unreachable("Invalid boolean contents");
3897}
3898
3900 if (!N)
3901 return false;
3902
3903 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3904 if (!CN) {
3905 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3906 if (!BV)
3907 return false;
3908
3909 // Only interested in constant splats, we don't care about undef
3910 // elements in identifying boolean constants and getConstantSplatNode
3911 // returns NULL if all ops are undef;
3912 CN = BV->getConstantSplatNode();
3913 if (!CN)
3914 return false;
3915 }
3916
3917 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3918 return !CN->getAPIntValue()[0];
3919
3920 return CN->isZero();
3921}
3922
3924 bool SExt) const {
3925 if (VT == MVT::i1)
3926 return N->isOne();
3927
3929 switch (Cnt) {
3931 // An extended value of 1 is always true, unless its original type is i1,
3932 // in which case it will be sign extended to -1.
3933 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3936 return N->isAllOnes() && SExt;
3937 }
3938 llvm_unreachable("Unexpected enumeration.");
3939}
3940
3941/// This helper function of SimplifySetCC tries to optimize the comparison when
3942/// either operand of the SetCC node is a bitwise-and instruction.
3943SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3944 ISD::CondCode Cond, const SDLoc &DL,
3945 DAGCombinerInfo &DCI) const {
3946 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3947 std::swap(N0, N1);
3948
3949 SelectionDAG &DAG = DCI.DAG;
3950 EVT OpVT = N0.getValueType();
3951 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3952 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3953 return SDValue();
3954
3955 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3956 // iff everything but LSB is known zero:
3957 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3960 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3961 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3962 if (DAG.MaskedValueIsZero(N0, UpperBits))
3963 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3964 }
3965
3966 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3967 // test in a narrow type that we can truncate to with no cost. Examples:
3968 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3969 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3970 // TODO: This conservatively checks for type legality on the source and
3971 // destination types. That may inhibit optimizations, but it also
3972 // allows setcc->shift transforms that may be more beneficial.
3973 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3974 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3975 isTypeLegal(OpVT) && N0.hasOneUse()) {
3976 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3977 AndC->getAPIntValue().getActiveBits());
3978 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3979 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3980 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3981 return DAG.getSetCC(DL, VT, Trunc, Zero,
3983 }
3984 }
3985
3986 // Match these patterns in any of their permutations:
3987 // (X & Y) == Y
3988 // (X & Y) != Y
3989 SDValue X, Y;
3990 if (N0.getOperand(0) == N1) {
3991 X = N0.getOperand(1);
3992 Y = N0.getOperand(0);
3993 } else if (N0.getOperand(1) == N1) {
3994 X = N0.getOperand(0);
3995 Y = N0.getOperand(1);
3996 } else {
3997 return SDValue();
3998 }
3999
4000 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
4001 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
4002 // its liable to create and infinite loop.
4003 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4004 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
4006 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
4007 // Note that where Y is variable and is known to have at most one bit set
4008 // (for example, if it is Z & 1) we cannot do this; the expressions are not
4009 // equivalent when Y == 0.
4010 assert(OpVT.isInteger());
4012 if (DCI.isBeforeLegalizeOps() ||
4014 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
4015 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
4016 // If the target supports an 'and-not' or 'and-complement' logic operation,
4017 // try to use that to make a comparison operation more efficient.
4018 // But don't do this transform if the mask is a single bit because there are
4019 // more efficient ways to deal with that case (for example, 'bt' on x86 or
4020 // 'rlwinm' on PPC).
4021
4022 // Bail out if the compare operand that we want to turn into a zero is
4023 // already a zero (otherwise, infinite loop).
4024 if (isNullConstant(Y))
4025 return SDValue();
4026
4027 // Transform this into: ~X & Y == 0.
4028 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
4029 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
4030 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4031 }
4032
4033 return SDValue();
4034}
4035
4036/// There are multiple IR patterns that could be checking whether certain
4037/// truncation of a signed number would be lossy or not. The pattern which is
4038/// best at IR level, may not lower optimally. Thus, we want to unfold it.
4039/// We are looking for the following pattern: (KeptBits is a constant)
4040/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4041/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4042/// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4043/// We will unfold it into the natural trunc+sext pattern:
4044/// ((%x << C) a>> C) dstcond %x
4045/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
4046SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4047 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4048 const SDLoc &DL) const {
4049 // We must be comparing with a constant.
4050 ConstantSDNode *C1;
4051 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4052 return SDValue();
4053
4054 // N0 should be: add %x, (1 << (KeptBits-1))
4055 if (N0->getOpcode() != ISD::ADD)
4056 return SDValue();
4057
4058 // And we must be 'add'ing a constant.
4059 ConstantSDNode *C01;
4060 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4061 return SDValue();
4062
4063 SDValue X = N0->getOperand(0);
4064 EVT XVT = X.getValueType();
4065
4066 // Validate constants ...
4067
4068 APInt I1 = C1->getAPIntValue();
4069
4070 ISD::CondCode NewCond;
4071 if (Cond == ISD::CondCode::SETULT) {
4072 NewCond = ISD::CondCode::SETEQ;
4073 } else if (Cond == ISD::CondCode::SETULE) {
4074 NewCond = ISD::CondCode::SETEQ;
4075 // But need to 'canonicalize' the constant.
4076 I1 += 1;
4077 } else if (Cond == ISD::CondCode::SETUGT) {
4078 NewCond = ISD::CondCode::SETNE;
4079 // But need to 'canonicalize' the constant.
4080 I1 += 1;
4081 } else if (Cond == ISD::CondCode::SETUGE) {
4082 NewCond = ISD::CondCode::SETNE;
4083 } else
4084 return SDValue();
4085
4086 APInt I01 = C01->getAPIntValue();
4087
4088 auto checkConstants = [&I1, &I01]() -> bool {
4089 // Both of them must be power-of-two, and the constant from setcc is bigger.
4090 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4091 };
4092
4093 if (checkConstants()) {
4094 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4095 } else {
4096 // What if we invert constants? (and the target predicate)
4097 I1.negate();
4098 I01.negate();
4099 assert(XVT.isInteger());
4100 NewCond = getSetCCInverse(NewCond, XVT);
4101 if (!checkConstants())
4102 return SDValue();
4103 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4104 }
4105
4106 // They are power-of-two, so which bit is set?
4107 const unsigned KeptBits = I1.logBase2();
4108 const unsigned KeptBitsMinusOne = I01.logBase2();
4109
4110 // Magic!
4111 if (KeptBits != (KeptBitsMinusOne + 1))
4112 return SDValue();
4113 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4114
4115 // We don't want to do this in every single case.
4116 SelectionDAG &DAG = DCI.DAG;
4118 XVT, KeptBits))
4119 return SDValue();
4120
4121 // Unfold into: sext_inreg(%x) cond %x
4122 // Where 'cond' will be either 'eq' or 'ne'.
4123 SDValue SExtInReg = DAG.getNode(
4125 DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), KeptBits)));
4126 return DAG.getSetCC(DL, SCCVT, SExtInReg, X, NewCond);
4127}
4128
4129// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4130SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4131 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4132 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4134 "Should be a comparison with 0.");
4135 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4136 "Valid only for [in]equality comparisons.");
4137
4138 unsigned NewShiftOpcode;
4139 SDValue X, C, Y;
4140
4141 SelectionDAG &DAG = DCI.DAG;
4142 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4143
4144 // Look for '(C l>>/<< Y)'.
4145 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4146 // The shift should be one-use.
4147 if (!V.hasOneUse())
4148 return false;
4149 unsigned OldShiftOpcode = V.getOpcode();
4150 switch (OldShiftOpcode) {
4151 case ISD::SHL:
4152 NewShiftOpcode = ISD::SRL;
4153 break;
4154 case ISD::SRL:
4155 NewShiftOpcode = ISD::SHL;
4156 break;
4157 default:
4158 return false; // must be a logical shift.
4159 }
4160 // We should be shifting a constant.
4161 // FIXME: best to use isConstantOrConstantVector().
4162 C = V.getOperand(0);
4164 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4165 if (!CC)
4166 return false;
4167 Y = V.getOperand(1);
4168
4170 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4171 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4172 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4173 };
4174
4175 // LHS of comparison should be an one-use 'and'.
4176 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4177 return SDValue();
4178
4179 X = N0.getOperand(0);
4180 SDValue Mask = N0.getOperand(1);
4181
4182 // 'and' is commutative!
4183 if (!Match(Mask)) {
4184 std::swap(X, Mask);
4185 if (!Match(Mask))
4186 return SDValue();
4187 }
4188
4189 EVT VT = X.getValueType();
4190
4191 // Produce:
4192 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4193 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4194 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4195 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4196 return T2;
4197}
4198
4199/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4200/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4201/// handle the commuted versions of these patterns.
4202SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4203 ISD::CondCode Cond, const SDLoc &DL,
4204 DAGCombinerInfo &DCI) const {
4205 unsigned BOpcode = N0.getOpcode();
4206 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4207 "Unexpected binop");
4208 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4209
4210 // (X + Y) == X --> Y == 0
4211 // (X - Y) == X --> Y == 0
4212 // (X ^ Y) == X --> Y == 0
4213 SelectionDAG &DAG = DCI.DAG;
4214 EVT OpVT = N0.getValueType();
4215 SDValue X = N0.getOperand(0);
4216 SDValue Y = N0.getOperand(1);
4217 if (X == N1)
4218 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4219
4220 if (Y != N1)
4221 return SDValue();
4222
4223 // (X + Y) == Y --> X == 0
4224 // (X ^ Y) == Y --> X == 0
4225 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4226 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4227
4228 // The shift would not be valid if the operands are boolean (i1).
4229 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4230 return SDValue();
4231
4232 // (X - Y) == Y --> X == Y << 1
4233 SDValue One =
4234 DAG.getShiftAmountConstant(1, OpVT, DL, !DCI.isBeforeLegalize());
4235 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4236 if (!DCI.isCalledByLegalizer())
4237 DCI.AddToWorklist(YShl1.getNode());
4238 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4239}
4240
4242 SDValue N0, const APInt &C1,
4243 ISD::CondCode Cond, const SDLoc &dl,
4244 SelectionDAG &DAG) {
4245 // Look through truncs that don't change the value of a ctpop.
4246 // FIXME: Add vector support? Need to be careful with setcc result type below.
4247 SDValue CTPOP = N0;
4248 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4250 CTPOP = N0.getOperand(0);
4251
4252 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4253 return SDValue();
4254
4255 EVT CTVT = CTPOP.getValueType();
4256 SDValue CTOp = CTPOP.getOperand(0);
4257
4258 // Expand a power-of-2-or-zero comparison based on ctpop:
4259 // (ctpop x) u< 2 -> (x & x-1) == 0
4260 // (ctpop x) u> 1 -> (x & x-1) != 0
4261 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4262 // Keep the CTPOP if it is a cheap vector op.
4263 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4264 return SDValue();
4265
4266 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4267 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4268 return SDValue();
4269 if (C1 == 0 && (Cond == ISD::SETULT))
4270 return SDValue(); // This is handled elsewhere.
4271
4272 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4273
4274 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4275 SDValue Result = CTOp;
4276 for (unsigned i = 0; i < Passes; i++) {
4277 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4278 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4279 }
4281 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4282 }
4283
4284 // Expand a power-of-2 comparison based on ctpop
4285 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4286 // Keep the CTPOP if it is cheap.
4287 if (TLI.isCtpopFast(CTVT))
4288 return SDValue();
4289
4290 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4291 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4292 assert(CTVT.isInteger());
4293 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4294
4295 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4296 // check before emitting a potentially unnecessary op.
4297 if (DAG.isKnownNeverZero(CTOp)) {
4298 // (ctpop x) == 1 --> (x & x-1) == 0
4299 // (ctpop x) != 1 --> (x & x-1) != 0
4300 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4301 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4302 return RHS;
4303 }
4304
4305 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4306 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4307 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4309 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4310 }
4311
4312 return SDValue();
4313}
4314
4316 ISD::CondCode Cond, const SDLoc &dl,
4317 SelectionDAG &DAG) {
4318 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4319 return SDValue();
4320
4321 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4322 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4323 return SDValue();
4324
4325 auto getRotateSource = [](SDValue X) {
4326 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4327 return X.getOperand(0);
4328 return SDValue();
4329 };
4330
4331 // Peek through a rotated value compared against 0 or -1:
4332 // (rot X, Y) == 0/-1 --> X == 0/-1
4333 // (rot X, Y) != 0/-1 --> X != 0/-1
4334 if (SDValue R = getRotateSource(N0))
4335 return DAG.getSetCC(dl, VT, R, N1, Cond);
4336
4337 // Peek through an 'or' of a rotated value compared against 0:
4338 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4339 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4340 //
4341 // TODO: Add the 'and' with -1 sibling.
4342 // TODO: Recurse through a series of 'or' ops to find the rotate.
4343 EVT OpVT = N0.getValueType();
4344 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4345 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4346 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4347 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4348 }
4349 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4350 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4351 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4352 }
4353 }
4354
4355 return SDValue();
4356}
4357
4359 ISD::CondCode Cond, const SDLoc &dl,
4360 SelectionDAG &DAG) {
4361 // If we are testing for all-bits-clear, we might be able to do that with
4362 // less shifting since bit-order does not matter.
4363 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4364 return SDValue();
4365
4366 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4367 if (!C1 || !C1->isZero())
4368 return SDValue();
4369
4370 if (!N0.hasOneUse() ||
4371 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4372 return SDValue();
4373
4374 unsigned BitWidth = N0.getScalarValueSizeInBits();
4375 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4376 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4377 return SDValue();
4378
4379 // Canonicalize fshr as fshl to reduce pattern-matching.
4380 unsigned ShAmt = ShAmtC->getZExtValue();
4381 if (N0.getOpcode() == ISD::FSHR)
4382 ShAmt = BitWidth - ShAmt;
4383
4384 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4385 SDValue X, Y;
4386 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4387 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4388 return false;
4389 if (Or.getOperand(0) == Other) {
4390 X = Or.getOperand(0);
4391 Y = Or.getOperand(1);
4392 return true;
4393 }
4394 if (Or.getOperand(1) == Other) {
4395 X = Or.getOperand(1);
4396 Y = Or.getOperand(0);
4397 return true;
4398 }
4399 return false;
4400 };
4401
4402 EVT OpVT = N0.getValueType();
4403 EVT ShAmtVT = N0.getOperand(2).getValueType();
4404 SDValue F0 = N0.getOperand(0);
4405 SDValue F1 = N0.getOperand(1);
4406 if (matchOr(F0, F1)) {
4407 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4408 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4409 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4410 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4411 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4412 }
4413 if (matchOr(F1, F0)) {
4414 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4415 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4416 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4417 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4418 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4419 }
4420
4421 return SDValue();
4422}
4423
4424/// Try to simplify a setcc built with the specified operands and cc. If it is
4425/// unable to simplify it, return a null SDValue.
4427 ISD::CondCode Cond, bool foldBooleans,
4428 DAGCombinerInfo &DCI,
4429 const SDLoc &dl) const {
4430 SelectionDAG &DAG = DCI.DAG;
4431 const DataLayout &Layout = DAG.getDataLayout();
4432 EVT OpVT = N0.getValueType();
4434
4435 // Constant fold or commute setcc.
4436 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4437 return Fold;
4438
4439 bool N0ConstOrSplat =
4440 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4441 bool N1ConstOrSplat =
4442 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4443
4444 // Canonicalize toward having the constant on the RHS.
4445 // TODO: Handle non-splat vector constants. All undef causes trouble.
4446 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4447 // infinite loop here when we encounter one.
4449 if (N0ConstOrSplat && !N1ConstOrSplat &&
4450 (DCI.isBeforeLegalizeOps() ||
4451 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4452 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4453
4454 // If we have a subtract with the same 2 non-constant operands as this setcc
4455 // -- but in reverse order -- then try to commute the operands of this setcc
4456 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4457 // instruction on some targets.
4458 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4459 (DCI.isBeforeLegalizeOps() ||
4460 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4461 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4462 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4463 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4464
4465 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4466 return V;
4467
4468 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4469 return V;
4470
4471 if (auto *N1C = isConstOrConstSplat(N1)) {
4472 const APInt &C1 = N1C->getAPIntValue();
4473
4474 // Optimize some CTPOP cases.
4475 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4476 return V;
4477
4478 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4479 // X * Y == 0 --> (X == 0) || (Y == 0)
4480 // X * Y != 0 --> (X != 0) && (Y != 0)
4481 // TODO: This bails out if minsize is set, but if the target doesn't have a
4482 // single instruction multiply for this type, it would likely be
4483 // smaller to decompose.
4484 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4485 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4486 (N0->getFlags().hasNoUnsignedWrap() ||
4487 N0->getFlags().hasNoSignedWrap()) &&
4488 !Attr.hasFnAttr(Attribute::MinSize)) {
4489 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4490 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4491 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4492 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4493 }
4494
4495 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4496 // equality comparison, then we're just comparing whether X itself is
4497 // zero.
4498 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4499 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4500 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4501 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4502 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4503 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4504 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4505 // (srl (ctlz x), 5) == 0 -> X != 0
4506 // (srl (ctlz x), 5) != 1 -> X != 0
4507 Cond = ISD::SETNE;
4508 } else {
4509 // (srl (ctlz x), 5) != 0 -> X == 0
4510 // (srl (ctlz x), 5) == 1 -> X == 0
4511 Cond = ISD::SETEQ;
4512 }
4513 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4514 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4515 Cond);
4516 }
4517 }
4518 }
4519 }
4520
4521 // FIXME: Support vectors.
4522 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4523 const APInt &C1 = N1C->getAPIntValue();
4524
4525 // (zext x) == C --> x == (trunc C)
4526 // (sext x) == C --> x == (trunc C)
4527 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4528 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4529 unsigned MinBits = N0.getValueSizeInBits();
4530 SDValue PreExt;
4531 bool Signed = false;
4532 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4533 // ZExt
4534 MinBits = N0->getOperand(0).getValueSizeInBits();
4535 PreExt = N0->getOperand(0);
4536 } else if (N0->getOpcode() == ISD::AND) {
4537 // DAGCombine turns costly ZExts into ANDs
4538 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4539 if ((C->getAPIntValue()+1).isPowerOf2()) {
4540 MinBits = C->getAPIntValue().countr_one();
4541 PreExt = N0->getOperand(0);
4542 }
4543 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4544 // SExt
4545 MinBits = N0->getOperand(0).getValueSizeInBits();
4546 PreExt = N0->getOperand(0);
4547 Signed = true;
4548 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4549 // ZEXTLOAD / SEXTLOAD
4550 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4551 MinBits = LN0->getMemoryVT().getSizeInBits();
4552 PreExt = N0;
4553 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4554 Signed = true;
4555 MinBits = LN0->getMemoryVT().getSizeInBits();
4556 PreExt = N0;
4557 }
4558 }
4559
4560 // Figure out how many bits we need to preserve this constant.
4561 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4562
4563 // Make sure we're not losing bits from the constant.
4564 if (MinBits > 0 &&
4565 MinBits < C1.getBitWidth() &&
4566 MinBits >= ReqdBits) {
4567 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4568 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4569 // Will get folded away.
4570 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4571 if (MinBits == 1 && C1 == 1)
4572 // Invert the condition.
4573 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4575 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4576 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4577 }
4578
4579 // If truncating the setcc operands is not desirable, we can still
4580 // simplify the expression in some cases:
4581 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4582 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4583 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4584 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4585 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4586 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4587 SDValue TopSetCC = N0->getOperand(0);
4588 unsigned N0Opc = N0->getOpcode();
4589 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4590 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4591 TopSetCC.getOpcode() == ISD::SETCC &&
4592 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4593 (isConstFalseVal(N1) ||
4594 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4595
4596 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4597 (!N1C->isZero() && Cond == ISD::SETNE);
4598
4599 if (!Inverse)
4600 return TopSetCC;
4601
4603 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4604 TopSetCC.getOperand(0).getValueType());
4605 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4606 TopSetCC.getOperand(1),
4607 InvCond);
4608 }
4609 }
4610 }
4611
4612 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4613 // equality or unsigned, and all 1 bits of the const are in the same
4614 // partial word, see if we can shorten the load.
4615 if (DCI.isBeforeLegalize() &&
4617 N0.getOpcode() == ISD::AND && C1 == 0 &&
4618 N0.getNode()->hasOneUse() &&
4619 isa<LoadSDNode>(N0.getOperand(0)) &&
4620 N0.getOperand(0).getNode()->hasOneUse() &&
4621 isa<ConstantSDNode>(N0.getOperand(1))) {
4622 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4623 APInt bestMask;
4624 unsigned bestWidth = 0, bestOffset = 0;
4625 if (Lod->isSimple() && Lod->isUnindexed() &&
4626 (Lod->getMemoryVT().isByteSized() ||
4628 unsigned memWidth = Lod->getMemoryVT().getStoreSizeInBits();
4629 unsigned origWidth = N0.getValueSizeInBits();
4630 unsigned maskWidth = origWidth;
4631 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4632 // 8 bits, but have to be careful...
4633 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4634 origWidth = Lod->getMemoryVT().getSizeInBits();
4635 const APInt &Mask = N0.getConstantOperandAPInt(1);
4636 // Only consider power-of-2 widths (and at least one byte) as candiates
4637 // for the narrowed load.
4638 for (unsigned width = 8; width < origWidth; width *= 2) {
4639 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), width);
4640 if (!shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT))
4641 continue;
4642 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4643 // Avoid accessing any padding here for now (we could use memWidth
4644 // instead of origWidth here otherwise).
4645 unsigned maxOffset = origWidth - width;
4646 for (unsigned offset = 0; offset <= maxOffset; offset += 8) {
4647 if (Mask.isSubsetOf(newMask)) {
4648 unsigned ptrOffset =
4649 Layout.isLittleEndian() ? offset : memWidth - width - offset;
4650 unsigned IsFast = 0;
4651 Align NewAlign = commonAlignment(Lod->getAlign(), ptrOffset / 8);
4653 *DAG.getContext(), Layout, newVT, Lod->getAddressSpace(),
4654 NewAlign, Lod->getMemOperand()->getFlags(), &IsFast) &&
4655 IsFast) {
4656 bestOffset = ptrOffset / 8;
4657 bestMask = Mask.lshr(offset);
4658 bestWidth = width;
4659 break;
4660 }
4661 }
4662 newMask <<= 8;
4663 }
4664 if (bestWidth)
4665 break;
4666 }
4667 }
4668 if (bestWidth) {
4669 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4670 SDValue Ptr = Lod->getBasePtr();
4671 if (bestOffset != 0)
4672 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(bestOffset));
4673 SDValue NewLoad =
4674 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4675 Lod->getPointerInfo().getWithOffset(bestOffset),
4676 Lod->getOriginalAlign());
4677 SDValue And =
4678 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4679 DAG.getConstant(bestMask.trunc(bestWidth), dl, newVT));
4680 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0LL, dl, newVT), Cond);
4681 }
4682 }
4683
4684 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4685 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4686 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4687
4688 // If the comparison constant has bits in the upper part, the
4689 // zero-extended value could never match.
4691 C1.getBitWidth() - InSize))) {
4692 switch (Cond) {
4693 case ISD::SETUGT:
4694 case ISD::SETUGE:
4695 case ISD::SETEQ:
4696 return DAG.getConstant(0, dl, VT);
4697 case ISD::SETULT:
4698 case ISD::SETULE:
4699 case ISD::SETNE:
4700 return DAG.getConstant(1, dl, VT);
4701 case ISD::SETGT:
4702 case ISD::SETGE:
4703 // True if the sign bit of C1 is set.
4704 return DAG.getConstant(C1.isNegative(), dl, VT);
4705 case ISD::SETLT:
4706 case ISD::SETLE:
4707 // True if the sign bit of C1 isn't set.
4708 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4709 default:
4710 break;
4711 }
4712 }
4713
4714 // Otherwise, we can perform the comparison with the low bits.
4715 switch (Cond) {
4716 case ISD::SETEQ:
4717 case ISD::SETNE:
4718 case ISD::SETUGT:
4719 case ISD::SETUGE:
4720 case ISD::SETULT:
4721 case ISD::SETULE: {
4722 EVT newVT = N0.getOperand(0).getValueType();
4723 if (DCI.isBeforeLegalizeOps() ||
4724 (isOperationLegal(ISD::SETCC, newVT) &&
4725 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4726 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4727 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4728
4729 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4730 NewConst, Cond);
4731 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4732 }
4733 break;
4734 }
4735 default:
4736 break; // todo, be more careful with signed comparisons
4737 }
4738 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4739 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4740 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4741 OpVT)) {
4742 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4743 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4744 EVT ExtDstTy = N0.getValueType();
4745 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4746
4747 // If the constant doesn't fit into the number of bits for the source of
4748 // the sign extension, it is impossible for both sides to be equal.
4749 if (C1.getSignificantBits() > ExtSrcTyBits)
4750 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4751
4752 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4753 ExtDstTy != ExtSrcTy && "Unexpected types!");
4754 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4755 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4756 DAG.getConstant(Imm, dl, ExtDstTy));
4757 if (!DCI.isCalledByLegalizer())
4758 DCI.AddToWorklist(ZextOp.getNode());
4759 // Otherwise, make this a use of a zext.
4760 return DAG.getSetCC(dl, VT, ZextOp,
4761 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4762 } else if ((N1C->isZero() || N1C->isOne()) &&
4763 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4764 // SETCC (X), [0|1], [EQ|NE] -> X if X is known 0/1. i1 types are
4765 // excluded as they are handled below whilst checking for foldBooleans.
4766 if ((N0.getOpcode() == ISD::SETCC || VT.getScalarType() != MVT::i1) &&
4767 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4768 (N0.getValueType() == MVT::i1 ||
4772 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4773 if (TrueWhenTrue)
4774 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4775 // Invert the condition.
4776 if (N0.getOpcode() == ISD::SETCC) {
4777 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4779 if (DCI.isBeforeLegalizeOps() ||
4781 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4782 }
4783 }
4784
4785 if ((N0.getOpcode() == ISD::XOR ||
4786 (N0.getOpcode() == ISD::AND &&
4787 N0.getOperand(0).getOpcode() == ISD::XOR &&
4788 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4789 isOneConstant(N0.getOperand(1))) {
4790 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4791 // can only do this if the top bits are known zero.
4792 unsigned BitWidth = N0.getValueSizeInBits();
4793 if (DAG.MaskedValueIsZero(N0,
4795 BitWidth-1))) {
4796 // Okay, get the un-inverted input value.
4797 SDValue Val;
4798 if (N0.getOpcode() == ISD::XOR) {
4799 Val = N0.getOperand(0);
4800 } else {
4801 assert(N0.getOpcode() == ISD::AND &&
4802 N0.getOperand(0).getOpcode() == ISD::XOR);
4803 // ((X^1)&1)^1 -> X & 1
4804 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4805 N0.getOperand(0).getOperand(0),
4806 N0.getOperand(1));
4807 }
4808
4809 return DAG.getSetCC(dl, VT, Val, N1,
4811 }
4812 } else if (N1C->isOne()) {
4813 SDValue Op0 = N0;
4814 if (Op0.getOpcode() == ISD::TRUNCATE)
4815 Op0 = Op0.getOperand(0);
4816
4817 if ((Op0.getOpcode() == ISD::XOR) &&
4818 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4819 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4820 SDValue XorLHS = Op0.getOperand(0);
4821 SDValue XorRHS = Op0.getOperand(1);
4822 // Ensure that the input setccs return an i1 type or 0/1 value.
4823 if (Op0.getValueType() == MVT::i1 ||
4828 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4830 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4831 }
4832 }
4833 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4834 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4835 if (Op0.getValueType().bitsGT(VT))
4836 Op0 = DAG.getNode(ISD::AND, dl, VT,
4837 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4838 DAG.getConstant(1, dl, VT));
4839 else if (Op0.getValueType().bitsLT(VT))
4840 Op0 = DAG.getNode(ISD::AND, dl, VT,
4841 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4842 DAG.getConstant(1, dl, VT));
4843
4844 return DAG.getSetCC(dl, VT, Op0,
4845 DAG.getConstant(0, dl, Op0.getValueType()),
4847 }
4848 if (Op0.getOpcode() == ISD::AssertZext &&
4849 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4850 return DAG.getSetCC(dl, VT, Op0,
4851 DAG.getConstant(0, dl, Op0.getValueType()),
4853 }
4854 }
4855
4856 // Given:
4857 // icmp eq/ne (urem %x, %y), 0
4858 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4859 // icmp eq/ne %x, 0
4860 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4861 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4862 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4863 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4864 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4865 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4866 }
4867
4868 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4869 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4870 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4871 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4872 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4873 N1C && N1C->isAllOnes()) {
4874 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4875 DAG.getConstant(0, dl, OpVT),
4877 }
4878
4879 if (SDValue V =
4880 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4881 return V;
4882 }
4883
4884 // These simplifications apply to splat vectors as well.
4885 // TODO: Handle more splat vector cases.
4886 if (auto *N1C = isConstOrConstSplat(N1)) {
4887 const APInt &C1 = N1C->getAPIntValue();
4888
4889 APInt MinVal, MaxVal;
4890 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4892 MinVal = APInt::getSignedMinValue(OperandBitSize);
4893 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4894 } else {
4895 MinVal = APInt::getMinValue(OperandBitSize);
4896 MaxVal = APInt::getMaxValue(OperandBitSize);
4897 }
4898
4899 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4900 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4901 // X >= MIN --> true
4902 if (C1 == MinVal)
4903 return DAG.getBoolConstant(true, dl, VT, OpVT);
4904
4905 if (!VT.isVector()) { // TODO: Support this for vectors.
4906 // X >= C0 --> X > (C0 - 1)
4907 APInt C = C1 - 1;
4909 if ((DCI.isBeforeLegalizeOps() ||
4910 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4911 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4912 isLegalICmpImmediate(C.getSExtValue())))) {
4913 return DAG.getSetCC(dl, VT, N0,
4914 DAG.getConstant(C, dl, N1.getValueType()),
4915 NewCC);
4916 }
4917 }
4918 }
4919
4920 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4921 // X <= MAX --> true
4922 if (C1 == MaxVal)
4923 return DAG.getBoolConstant(true, dl, VT, OpVT);
4924
4925 // X <= C0 --> X < (C0 + 1)
4926 if (!VT.isVector()) { // TODO: Support this for vectors.
4927 APInt C = C1 + 1;
4929 if ((DCI.isBeforeLegalizeOps() ||
4930 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4931 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4932 isLegalICmpImmediate(C.getSExtValue())))) {
4933 return DAG.getSetCC(dl, VT, N0,
4934 DAG.getConstant(C, dl, N1.getValueType()),
4935 NewCC);
4936 }
4937 }
4938 }
4939
4940 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4941 if (C1 == MinVal)
4942 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4943
4944 // TODO: Support this for vectors after legalize ops.
4945 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4946 // Canonicalize setlt X, Max --> setne X, Max
4947 if (C1 == MaxVal)
4948 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4949
4950 // If we have setult X, 1, turn it into seteq X, 0
4951 if (C1 == MinVal+1)
4952 return DAG.getSetCC(dl, VT, N0,
4953 DAG.getConstant(MinVal, dl, N0.getValueType()),
4954 ISD::SETEQ);
4955 }
4956 }
4957
4958 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4959 if (C1 == MaxVal)
4960 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4961
4962 // TODO: Support this for vectors after legalize ops.
4963 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4964 // Canonicalize setgt X, Min --> setne X, Min
4965 if (C1 == MinVal)
4966 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4967
4968 // If we have setugt X, Max-1, turn it into seteq X, Max
4969 if (C1 == MaxVal-1)
4970 return DAG.getSetCC(dl, VT, N0,
4971 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4972 ISD::SETEQ);
4973 }
4974 }
4975
4976 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4977 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4978 if (C1.isZero())
4979 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4980 VT, N0, N1, Cond, DCI, dl))
4981 return CC;
4982
4983 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4984 // For example, when high 32-bits of i64 X are known clear:
4985 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4986 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4987 bool CmpZero = N1C->isZero();
4988 bool CmpNegOne = N1C->isAllOnes();
4989 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4990 // Match or(lo,shl(hi,bw/2)) pattern.
4991 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4992 unsigned EltBits = V.getScalarValueSizeInBits();
4993 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4994 return false;
4995 SDValue LHS = V.getOperand(0);
4996 SDValue RHS = V.getOperand(1);
4997 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4998 // Unshifted element must have zero upperbits.
4999 if (RHS.getOpcode() == ISD::SHL &&
5000 isa<ConstantSDNode>(RHS.getOperand(1)) &&
5001 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5002 DAG.MaskedValueIsZero(LHS, HiBits)) {
5003 Lo = LHS;
5004 Hi = RHS.getOperand(0);
5005 return true;
5006 }
5007 if (LHS.getOpcode() == ISD::SHL &&
5008 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5009 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
5010 DAG.MaskedValueIsZero(RHS, HiBits)) {
5011 Lo = RHS;
5012 Hi = LHS.getOperand(0);
5013 return true;
5014 }
5015 return false;
5016 };
5017
5018 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
5019 unsigned EltBits = N0.getScalarValueSizeInBits();
5020 unsigned HalfBits = EltBits / 2;
5021 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
5022 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
5023 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
5024 SDValue NewN0 =
5025 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
5026 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
5027 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
5028 };
5029
5030 SDValue Lo, Hi;
5031 if (IsConcat(N0, Lo, Hi))
5032 return MergeConcat(Lo, Hi);
5033
5034 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
5035 SDValue Lo0, Lo1, Hi0, Hi1;
5036 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
5037 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
5038 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
5039 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
5040 }
5041 }
5042 }
5043 }
5044
5045 // If we have "setcc X, C0", check to see if we can shrink the immediate
5046 // by changing cc.
5047 // TODO: Support this for vectors after legalize ops.
5048 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5049 // SETUGT X, SINTMAX -> SETLT X, 0
5050 // SETUGE X, SINTMIN -> SETLT X, 0
5051 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5052 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5053 return DAG.getSetCC(dl, VT, N0,
5054 DAG.getConstant(0, dl, N1.getValueType()),
5055 ISD::SETLT);
5056
5057 // SETULT X, SINTMIN -> SETGT X, -1
5058 // SETULE X, SINTMAX -> SETGT X, -1
5059 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5060 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5061 return DAG.getSetCC(dl, VT, N0,
5062 DAG.getAllOnesConstant(dl, N1.getValueType()),
5063 ISD::SETGT);
5064 }
5065 }
5066
5067 // Back to non-vector simplifications.
5068 // TODO: Can we do these for vector splats?
5069 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5070 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5071 const APInt &C1 = N1C->getAPIntValue();
5072 EVT ShValTy = N0.getValueType();
5073
5074 // Fold bit comparisons when we can. This will result in an
5075 // incorrect value when boolean false is negative one, unless
5076 // the bitsize is 1 in which case the false value is the same
5077 // in practice regardless of the representation.
5078 if ((VT.getSizeInBits() == 1 ||
5080 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5081 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5082 N0.getOpcode() == ISD::AND) {
5083 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5084 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5085 // Perform the xform if the AND RHS is a single bit.
5086 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5087 if (AndRHS->getAPIntValue().isPowerOf2() &&
5088 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5089 return DAG.getNode(
5090 ISD::TRUNCATE, dl, VT,
5091 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5093 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5094 }
5095 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5096 // (X & 8) == 8 --> (X & 8) >> 3
5097 // Perform the xform if C1 is a single bit.
5098 unsigned ShCt = C1.logBase2();
5099 if (C1.isPowerOf2() &&
5100 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5101 return DAG.getNode(
5102 ISD::TRUNCATE, dl, VT,
5103 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5105 ShCt, ShValTy, dl, !DCI.isBeforeLegalize())));
5106 }
5107 }
5108 }
5109 }
5110
5111 if (C1.getSignificantBits() <= 64 &&
5113 // (X & -256) == 256 -> (X >> 8) == 1
5114 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5115 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5116 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5117 const APInt &AndRHSC = AndRHS->getAPIntValue();
5118 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5119 unsigned ShiftBits = AndRHSC.countr_zero();
5120 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5121 SDValue Shift = DAG.getNode(
5122 ISD::SRL, dl, ShValTy, N0.getOperand(0),
5123 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5124 !DCI.isBeforeLegalize()));
5125 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5126 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5127 }
5128 }
5129 }
5130 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5131 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5132 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5133 // X < 0x100000000 -> (X >> 32) < 1
5134 // X >= 0x100000000 -> (X >> 32) >= 1
5135 // X <= 0x0ffffffff -> (X >> 32) < 1
5136 // X > 0x0ffffffff -> (X >> 32) >= 1
5137 unsigned ShiftBits;
5138 APInt NewC = C1;
5139 ISD::CondCode NewCond = Cond;
5140 if (AdjOne) {
5141 ShiftBits = C1.countr_one();
5142 NewC = NewC + 1;
5143 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5144 } else {
5145 ShiftBits = C1.countr_zero();
5146 }
5147 NewC.lshrInPlace(ShiftBits);
5148 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5150 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5151 SDValue Shift =
5152 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5153 DAG.getShiftAmountConstant(ShiftBits, ShValTy, dl,
5154 !DCI.isBeforeLegalize()));
5155 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5156 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5157 }
5158 }
5159 }
5160 }
5161
5162 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5163 auto *CFP = cast<ConstantFPSDNode>(N1);
5164 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5165
5166 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5167 // constant if knowing that the operand is non-nan is enough. We prefer to
5168 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5169 // materialize 0.0.
5170 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5171 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5172
5173 // setcc (fneg x), C -> setcc swap(pred) x, -C
5174 if (N0.getOpcode() == ISD::FNEG) {
5176 if (DCI.isBeforeLegalizeOps() ||
5177 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5178 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5179 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5180 }
5181 }
5182
5183 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5185 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5186 bool IsFabs = N0.getOpcode() == ISD::FABS;
5187 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5188 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5189 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5190 : (IsFabs ? fcInf : fcPosInf);
5191 if (Cond == ISD::SETUEQ)
5192 Flag |= fcNan;
5193 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5194 DAG.getTargetConstant(Flag, dl, MVT::i32));
5195 }
5196 }
5197
5198 // If the condition is not legal, see if we can find an equivalent one
5199 // which is legal.
5201 // If the comparison was an awkward floating-point == or != and one of
5202 // the comparison operands is infinity or negative infinity, convert the
5203 // condition to a less-awkward <= or >=.
5204 if (CFP->getValueAPF().isInfinity()) {
5205 bool IsNegInf = CFP->getValueAPF().isNegative();
5207 switch (Cond) {
5208 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5209 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5210 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5211 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5212 default: break;
5213 }
5214 if (NewCond != ISD::SETCC_INVALID &&
5215 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5216 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5217 }
5218 }
5219 }
5220
5221 if (N0 == N1) {
5222 // The sext(setcc()) => setcc() optimization relies on the appropriate
5223 // constant being emitted.
5224 assert(!N0.getValueType().isInteger() &&
5225 "Integer types should be handled by FoldSetCC");
5226
5227 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5228 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5229 if (UOF == 2) // FP operators that are undefined on NaNs.
5230 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5231 if (UOF == unsigned(EqTrue))
5232 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5233 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5234 // if it is not already.
5235 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5236 if (NewCond != Cond &&
5237 (DCI.isBeforeLegalizeOps() ||
5238 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5239 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5240 }
5241
5242 // ~X > ~Y --> Y > X
5243 // ~X < ~Y --> Y < X
5244 // ~X < C --> X > ~C
5245 // ~X > C --> X < ~C
5246 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5247 N0.getValueType().isInteger()) {
5248 if (isBitwiseNot(N0)) {
5249 if (isBitwiseNot(N1))
5250 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5251
5254 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5255 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5256 }
5257 }
5258 }
5259
5260 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5261 N0.getValueType().isInteger()) {
5262 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5263 N0.getOpcode() == ISD::XOR) {
5264 // Simplify (X+Y) == (X+Z) --> Y == Z
5265 if (N0.getOpcode() == N1.getOpcode()) {
5266 if (N0.getOperand(0) == N1.getOperand(0))
5267 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5268 if (N0.getOperand(1) == N1.getOperand(1))
5269 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5270 if (isCommutativeBinOp(N0.getOpcode())) {
5271 // If X op Y == Y op X, try other combinations.
5272 if (N0.getOperand(0) == N1.getOperand(1))
5273 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5274 Cond);
5275 if (N0.getOperand(1) == N1.getOperand(0))
5276 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5277 Cond);
5278 }
5279 }
5280
5281 // If RHS is a legal immediate value for a compare instruction, we need
5282 // to be careful about increasing register pressure needlessly.
5283 bool LegalRHSImm = false;
5284
5285 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5286 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5287 // Turn (X+C1) == C2 --> X == C2-C1
5288 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5289 return DAG.getSetCC(
5290 dl, VT, N0.getOperand(0),
5291 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5292 dl, N0.getValueType()),
5293 Cond);
5294
5295 // Turn (X^C1) == C2 --> X == C1^C2
5296 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5297 return DAG.getSetCC(
5298 dl, VT, N0.getOperand(0),
5299 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5300 dl, N0.getValueType()),
5301 Cond);
5302 }
5303
5304 // Turn (C1-X) == C2 --> X == C1-C2
5305 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5306 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5307 return DAG.getSetCC(
5308 dl, VT, N0.getOperand(1),
5309 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5310 dl, N0.getValueType()),
5311 Cond);
5312
5313 // Could RHSC fold directly into a compare?
5314 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5315 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5316 }
5317
5318 // (X+Y) == X --> Y == 0 and similar folds.
5319 // Don't do this if X is an immediate that can fold into a cmp
5320 // instruction and X+Y has other uses. It could be an induction variable
5321 // chain, and the transform would increase register pressure.
5322 if (!LegalRHSImm || N0.hasOneUse())
5323 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5324 return V;
5325 }
5326
5327 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5328 N1.getOpcode() == ISD::XOR)
5329 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5330 return V;
5331
5332 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5333 return V;
5334 }
5335
5336 // Fold remainder of division by a constant.
5337 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5338 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5339 // When division is cheap or optimizing for minimum size,
5340 // fall through to DIVREM creation by skipping this fold.
5341 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5342 if (N0.getOpcode() == ISD::UREM) {
5343 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5344 return Folded;
5345 } else if (N0.getOpcode() == ISD::SREM) {
5346 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5347 return Folded;
5348 }
5349 }
5350 }
5351
5352 // Fold away ALL boolean setcc's.
5353 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5354 SDValue Temp;
5355 switch (Cond) {
5356 default: llvm_unreachable("Unknown integer setcc!");
5357 case ISD::SETEQ: // X == Y -> ~(X^Y)
5358 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5359 N0 = DAG.getNOT(dl, Temp, OpVT);
5360 if (!DCI.isCalledByLegalizer())
5361 DCI.AddToWorklist(Temp.getNode());
5362 break;
5363 case ISD::SETNE: // X != Y --> (X^Y)
5364 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5365 break;
5366 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5367 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5368 Temp = DAG.getNOT(dl, N0, OpVT);
5369 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5370 if (!DCI.isCalledByLegalizer())
5371 DCI.AddToWorklist(Temp.getNode());
5372 break;
5373 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5374 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5375 Temp = DAG.getNOT(dl, N1, OpVT);
5376 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5377 if (!DCI.isCalledByLegalizer())
5378 DCI.AddToWorklist(Temp.getNode());
5379 break;
5380 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5381 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5382 Temp = DAG.getNOT(dl, N0, OpVT);
5383 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5384 if (!DCI.isCalledByLegalizer())
5385 DCI.AddToWorklist(Temp.getNode());
5386 break;
5387 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5388 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5389 Temp = DAG.getNOT(dl, N1, OpVT);
5390 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5391 break;
5392 }
5393 if (VT.getScalarType() != MVT::i1) {
5394 if (!DCI.isCalledByLegalizer())
5395 DCI.AddToWorklist(N0.getNode());
5396 // FIXME: If running after legalize, we probably can't do this.
5398 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5399 }
5400 return N0;
5401 }
5402
5403 // Could not fold it.
5404 return SDValue();
5405}
5406
5407/// Returns true (and the GlobalValue and the offset) if the node is a
5408/// GlobalAddress + offset.
5410 int64_t &Offset) const {
5411
5412 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5413
5414 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5415 GA = GASD->getGlobal();
5416 Offset += GASD->getOffset();
5417 return true;
5418 }
5419
5420 if (N->getOpcode() == ISD::ADD) {
5421 SDValue N1 = N->getOperand(0);
5422 SDValue N2 = N->getOperand(1);
5423 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5424 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5425 Offset += V->getSExtValue();
5426 return true;
5427 }
5428 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5429 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5430 Offset += V->getSExtValue();
5431 return true;
5432 }
5433 }
5434 }
5435
5436 return false;
5437}
5438
5440 DAGCombinerInfo &DCI) const {
5441 // Default implementation: no optimization.
5442 return SDValue();
5443}
5444
5445//===----------------------------------------------------------------------===//
5446// Inline Assembler Implementation Methods
5447//===----------------------------------------------------------------------===//
5448
5451 unsigned S = Constraint.size();
5452
5453 if (S == 1) {
5454 switch (Constraint[0]) {
5455 default: break;
5456 case 'r':
5457 return C_RegisterClass;
5458 case 'm': // memory
5459 case 'o': // offsetable
5460 case 'V': // not offsetable
5461 return C_Memory;
5462 case 'p': // Address.
5463 return C_Address;
5464 case 'n': // Simple Integer
5465 case 'E': // Floating Point Constant
5466 case 'F': // Floating Point Constant
5467 return C_Immediate;
5468 case 'i': // Simple Integer or Relocatable Constant
5469 case 's': // Relocatable Constant
5470 case 'X': // Allow ANY value.
5471 case 'I': // Target registers.
5472 case 'J':
5473 case 'K':
5474 case 'L':
5475 case 'M':
5476 case 'N':
5477 case 'O':
5478 case 'P':
5479 case '<':
5480 case '>':
5481 return C_Other;
5482 }
5483 }
5484
5485 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5486 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5487 return C_Memory;
5488 return C_Register;
5489 }
5490 return C_Unknown;
5491}
5492
5493/// Try to replace an X constraint, which matches anything, with another that
5494/// has more specific requirements based on the type of the corresponding
5495/// operand.
5496const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5497 if (ConstraintVT.isInteger())
5498 return "r";
5499 if (ConstraintVT.isFloatingPoint())
5500 return "f"; // works for many targets
5501 return nullptr;
5502}
5503
5505 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5506 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5507 return SDValue();
5508}
5509
5510/// Lower the specified operand into the Ops vector.
5511/// If it is invalid, don't add anything to Ops.
5513 StringRef Constraint,
5514 std::vector<SDValue> &Ops,
5515 SelectionDAG &DAG) const {
5516
5517 if (Constraint.size() > 1)
5518 return;
5519
5520 char ConstraintLetter = Constraint[0];
5521 switch (ConstraintLetter) {
5522 default: break;
5523 case 'X': // Allows any operand
5524 case 'i': // Simple Integer or Relocatable Constant
5525 case 'n': // Simple Integer
5526 case 's': { // Relocatable Constant
5527
5529 uint64_t Offset = 0;
5530
5531 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5532 // etc., since getelementpointer is variadic. We can't use
5533 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5534 // while in this case the GA may be furthest from the root node which is
5535 // likely an ISD::ADD.
5536 while (true) {
5537 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5538 // gcc prints these as sign extended. Sign extend value to 64 bits
5539 // now; without this it would get ZExt'd later in
5540 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5541 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5542 BooleanContent BCont = getBooleanContents(MVT::i64);
5543 ISD::NodeType ExtOpc =
5544 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5545 int64_t ExtVal =
5546 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5547 Ops.push_back(
5548 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5549 return;
5550 }
5551 if (ConstraintLetter != 'n') {
5552 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5553 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5554 GA->getValueType(0),
5555 Offset + GA->getOffset()));
5556 return;
5557 }
5558 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5559 Ops.push_back(DAG.getTargetBlockAddress(
5560 BA->getBlockAddress(), BA->getValueType(0),
5561 Offset + BA->getOffset(), BA->getTargetFlags()));
5562 return;
5563 }
5564 if (isa<BasicBlockSDNode>(Op)) {
5565 Ops.push_back(Op);
5566 return;
5567 }
5568 }
5569 const unsigned OpCode = Op.getOpcode();
5570 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5571 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5572 Op = Op.getOperand(1);
5573 // Subtraction is not commutative.
5574 else if (OpCode == ISD::ADD &&
5575 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5576 Op = Op.getOperand(0);
5577 else
5578 return;
5579 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5580 continue;
5581 }
5582 return;
5583 }
5584 break;
5585 }
5586 }
5587}
5588
5590 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5591}
5592
5593std::pair<unsigned, const TargetRegisterClass *>
5595 StringRef Constraint,
5596 MVT VT) const {
5597 if (!Constraint.starts_with("{"))
5598 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5599 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5600
5601 // Remove the braces from around the name.
5602 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5603
5604 std::pair<unsigned, const TargetRegisterClass *> R =
5605 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5606
5607 // Figure out which register class contains this reg.
5608 for (const TargetRegisterClass *RC : RI->regclasses()) {
5609 // If none of the value types for this register class are valid, we
5610 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5611 if (!isLegalRC(*RI, *RC))
5612 continue;
5613
5614 for (const MCPhysReg &PR : *RC) {
5615 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5616 std::pair<unsigned, const TargetRegisterClass *> S =
5617 std::make_pair(PR, RC);
5618
5619 // If this register class has the requested value type, return it,
5620 // otherwise keep searching and return the first class found
5621 // if no other is found which explicitly has the requested type.
5622 if (RI->isTypeLegalForClass(*RC, VT))
5623 return S;
5624 if (!R.second)
5625 R = S;
5626 }
5627 }
5628 }
5629
5630 return R;
5631}
5632
5633//===----------------------------------------------------------------------===//
5634// Constraint Selection.
5635
5636/// Return true of this is an input operand that is a matching constraint like
5637/// "4".
5639 assert(!ConstraintCode.empty() && "No known constraint!");
5640 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5641}
5642
5643/// If this is an input matching constraint, this method returns the output
5644/// operand it matches.
5646 assert(!ConstraintCode.empty() && "No known constraint!");
5647 return atoi(ConstraintCode.c_str());
5648}
5649
5650/// Split up the constraint string from the inline assembly value into the
5651/// specific constraints and their prefixes, and also tie in the associated
5652/// operand values.
5653/// If this returns an empty vector, and if the constraint string itself
5654/// isn't empty, there was an error parsing.
5657 const TargetRegisterInfo *TRI,
5658 const CallBase &Call) const {
5659 /// Information about all of the constraints.
5660 AsmOperandInfoVector ConstraintOperands;
5661 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5662 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5663
5664 // Do a prepass over the constraints, canonicalizing them, and building up the
5665 // ConstraintOperands list.
5666 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5667 unsigned ResNo = 0; // ResNo - The result number of the next output.
5668 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5669
5670 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5671 ConstraintOperands.emplace_back(std::move(CI));
5672 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5673
5674 // Update multiple alternative constraint count.
5675 if (OpInfo.multipleAlternatives.size() > maCount)
5676 maCount = OpInfo.multipleAlternatives.size();
5677
5678 OpInfo.ConstraintVT = MVT::Other;
5679
5680 // Compute the value type for each operand.
5681 switch (OpInfo.Type) {
5683 // Indirect outputs just consume an argument.
5684 if (OpInfo.isIndirect) {
5685 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5686 break;
5687 }
5688
5689 // The return value of the call is this value. As such, there is no
5690 // corresponding argument.
5691 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5692 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5693 OpInfo.ConstraintVT =
5694 getSimpleValueType(DL, STy->getElementType(ResNo));
5695 } else {
5696 assert(ResNo == 0 && "Asm only has one result!");
5697 OpInfo.ConstraintVT =
5698 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5699 }
5700 ++ResNo;
5701 break;
5702 case InlineAsm::isInput:
5703 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5704 break;
5705 case InlineAsm::isLabel:
5706 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5707 ++LabelNo;
5708 continue;
5710 // Nothing to do.
5711 break;
5712 }
5713
5714 if (OpInfo.CallOperandVal) {
5715 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5716 if (OpInfo.isIndirect) {
5717 OpTy = Call.getParamElementType(ArgNo);
5718 assert(OpTy && "Indirect operand must have elementtype attribute");
5719 }
5720
5721 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5722 if (StructType *STy = dyn_cast<StructType>(OpTy))
5723 if (STy->getNumElements() == 1)
5724 OpTy = STy->getElementType(0);
5725
5726 // If OpTy is not a single value, it may be a struct/union that we
5727 // can tile with integers.
5728 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5729 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5730 switch (BitSize) {
5731 default: break;
5732 case 1:
5733 case 8:
5734 case 16:
5735 case 32:
5736 case 64:
5737 case 128:
5738 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5739 break;
5740 }
5741 }
5742
5743 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5744 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5745 ArgNo++;
5746 }
5747 }
5748
5749 // If we have multiple alternative constraints, select the best alternative.
5750 if (!ConstraintOperands.empty()) {
5751 if (maCount) {
5752 unsigned bestMAIndex = 0;
5753 int bestWeight = -1;
5754 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5755 int weight = -1;
5756 unsigned maIndex;
5757 // Compute the sums of the weights for each alternative, keeping track
5758 // of the best (highest weight) one so far.
5759 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5760 int weightSum = 0;
5761 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5762 cIndex != eIndex; ++cIndex) {
5763 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5764 if (OpInfo.Type == InlineAsm::isClobber)
5765 continue;
5766
5767 // If this is an output operand with a matching input operand,
5768 // look up the matching input. If their types mismatch, e.g. one
5769 // is an integer, the other is floating point, or their sizes are
5770 // different, flag it as an maCantMatch.
5771 if (OpInfo.hasMatchingInput()) {
5772 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5773 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5774 if ((OpInfo.ConstraintVT.isInteger() !=
5775 Input.ConstraintVT.isInteger()) ||
5776 (OpInfo.ConstraintVT.getSizeInBits() !=
5777 Input.ConstraintVT.getSizeInBits())) {
5778 weightSum = -1; // Can't match.
5779 break;
5780 }
5781 }
5782 }
5783 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5784 if (weight == -1) {
5785 weightSum = -1;
5786 break;
5787 }
5788 weightSum += weight;
5789 }
5790 // Update best.
5791 if (weightSum > bestWeight) {
5792 bestWeight = weightSum;
5793 bestMAIndex = maIndex;
5794 }
5795 }
5796
5797 // Now select chosen alternative in each constraint.
5798 for (AsmOperandInfo &cInfo : ConstraintOperands)
5799 if (cInfo.Type != InlineAsm::isClobber)
5800 cInfo.selectAlternative(bestMAIndex);
5801 }
5802 }
5803
5804 // Check and hook up tied operands, choose constraint code to use.
5805 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5806 cIndex != eIndex; ++cIndex) {
5807 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5808
5809 // If this is an output operand with a matching input operand, look up the
5810 // matching input. If their types mismatch, e.g. one is an integer, the
5811 // other is floating point, or their sizes are different, flag it as an
5812 // error.
5813 if (OpInfo.hasMatchingInput()) {
5814 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5815
5816 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5817 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5818 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5819 OpInfo.ConstraintVT);
5820 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5821 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5822 Input.ConstraintVT);
5823 if ((OpInfo.ConstraintVT.isInteger() !=
5824 Input.ConstraintVT.isInteger()) ||
5825 (MatchRC.second != InputRC.second)) {
5826 report_fatal_error("Unsupported asm: input constraint"
5827 " with a matching output constraint of"
5828 " incompatible type!");
5829 }
5830 }
5831 }
5832 }
5833
5834 return ConstraintOperands;
5835}
5836
5837/// Return a number indicating our preference for chosing a type of constraint
5838/// over another, for the purpose of sorting them. Immediates are almost always
5839/// preferrable (when they can be emitted). A higher return value means a
5840/// stronger preference for one constraint type relative to another.
5841/// FIXME: We should prefer registers over memory but doing so may lead to
5842/// unrecoverable register exhaustion later.
5843/// https://github.com/llvm/llvm-project/issues/20571
5845 switch (CT) {
5848 return 4;
5851 return 3;
5853 return 2;
5855 return 1;
5857 return 0;
5858 }
5859 llvm_unreachable("Invalid constraint type");
5860}
5861
5862/// Examine constraint type and operand type and determine a weight value.
5863/// This object must already have been set up with the operand type
5864/// and the current alternative constraint selected.
5867 AsmOperandInfo &info, int maIndex) const {
5869 if (maIndex >= (int)info.multipleAlternatives.size())
5870 rCodes = &info.Codes;
5871 else
5872 rCodes = &info.multipleAlternatives[maIndex].Codes;
5873 ConstraintWeight BestWeight = CW_Invalid;
5874
5875 // Loop over the options, keeping track of the most general one.
5876 for (const std::string &rCode : *rCodes) {
5877 ConstraintWeight weight =
5878 getSingleConstraintMatchWeight(info, rCode.c_str());
5879 if (weight > BestWeight)
5880 BestWeight = weight;
5881 }
5882
5883 return BestWeight;
5884}
5885
5886/// Examine constraint type and operand type and determine a weight value.
5887/// This object must already have been set up with the operand type
5888/// and the current alternative constraint selected.
5891 AsmOperandInfo &info, const char *constraint) const {
5892 ConstraintWeight weight = CW_Invalid;
5893 Value *CallOperandVal = info.CallOperandVal;
5894 // If we don't have a value, we can't do a match,
5895 // but allow it at the lowest weight.
5896 if (!CallOperandVal)
5897 return CW_Default;
5898 // Look at the constraint type.
5899 switch (*constraint) {
5900 case 'i': // immediate integer.
5901 case 'n': // immediate integer with a known value.
5902 if (isa<ConstantInt>(CallOperandVal))
5903 weight = CW_Constant;
5904 break;
5905 case 's': // non-explicit intregal immediate.
5906 if (isa<GlobalValue>(CallOperandVal))
5907 weight = CW_Constant;
5908 break;
5909 case 'E': // immediate float if host format.
5910 case 'F': // immediate float.
5911 if (isa<ConstantFP>(CallOperandVal))
5912 weight = CW_Constant;
5913 break;
5914 case '<': // memory operand with autodecrement.
5915 case '>': // memory operand with autoincrement.
5916 case 'm': // memory operand.
5917 case 'o': // offsettable memory operand
5918 case 'V': // non-offsettable memory operand
5919 weight = CW_Memory;
5920 break;
5921 case 'r': // general register.
5922 case 'g': // general register, memory operand or immediate integer.
5923 // note: Clang converts "g" to "imr".
5924 if (CallOperandVal->getType()->isIntegerTy())
5925 weight = CW_Register;
5926 break;
5927 case 'X': // any operand.
5928 default:
5929 weight = CW_Default;
5930 break;
5931 }
5932 return weight;
5933}
5934
5935/// If there are multiple different constraints that we could pick for this
5936/// operand (e.g. "imr") try to pick the 'best' one.
5937/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5938/// into seven classes:
5939/// Register -> one specific register
5940/// RegisterClass -> a group of regs
5941/// Memory -> memory
5942/// Address -> a symbolic memory reference
5943/// Immediate -> immediate values
5944/// Other -> magic values (such as "Flag Output Operands")
5945/// Unknown -> something we don't recognize yet and can't handle
5946/// Ideally, we would pick the most specific constraint possible: if we have
5947/// something that fits into a register, we would pick it. The problem here
5948/// is that if we have something that could either be in a register or in
5949/// memory that use of the register could cause selection of *other*
5950/// operands to fail: they might only succeed if we pick memory. Because of
5951/// this the heuristic we use is:
5952///
5953/// 1) If there is an 'other' constraint, and if the operand is valid for
5954/// that constraint, use it. This makes us take advantage of 'i'
5955/// constraints when available.
5956/// 2) Otherwise, pick the most general constraint present. This prefers
5957/// 'm' over 'r', for example.
5958///
5960 TargetLowering::AsmOperandInfo &OpInfo) const {
5961 ConstraintGroup Ret;
5962
5963 Ret.reserve(OpInfo.Codes.size());
5964 for (StringRef Code : OpInfo.Codes) {
5965 TargetLowering::ConstraintType CType = getConstraintType(Code);
5966
5967 // Indirect 'other' or 'immediate' constraints are not allowed.
5968 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5969 CType == TargetLowering::C_Register ||
5971 continue;
5972
5973 // Things with matching constraints can only be registers, per gcc
5974 // documentation. This mainly affects "g" constraints.
5975 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5976 continue;
5977
5978 Ret.emplace_back(Code, CType);
5979 }
5980
5981 std::stable_sort(
5982 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5983 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5984 });
5985
5986 return Ret;
5987}
5988
5989/// If we have an immediate, see if we can lower it. Return true if we can,
5990/// false otherwise.
5992 SDValue Op, SelectionDAG *DAG,
5993 const TargetLowering &TLI) {
5994
5995 assert((P.second == TargetLowering::C_Other ||
5996 P.second == TargetLowering::C_Immediate) &&
5997 "need immediate or other");
5998
5999 if (!Op.getNode())
6000 return false;
6001
6002 std::vector<SDValue> ResultOps;
6003 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
6004 return !ResultOps.empty();
6005}
6006
6007/// Determines the constraint code and constraint type to use for the specific
6008/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
6010 SDValue Op,
6011 SelectionDAG *DAG) const {
6012 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
6013
6014 // Single-letter constraints ('r') are very common.
6015 if (OpInfo.Codes.size() == 1) {
6016 OpInfo.ConstraintCode = OpInfo.Codes[0];
6017 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6018 } else {
6019 ConstraintGroup G = getConstraintPreferences(OpInfo);
6020 if (G.empty())
6021 return;
6022
6023 unsigned BestIdx = 0;
6024 for (const unsigned E = G.size();
6025 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
6026 G[BestIdx].second == TargetLowering::C_Immediate);
6027 ++BestIdx) {
6028 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
6029 break;
6030 // If we're out of constraints, just pick the first one.
6031 if (BestIdx + 1 == E) {
6032 BestIdx = 0;
6033 break;
6034 }
6035 }
6036
6037 OpInfo.ConstraintCode = G[BestIdx].first;
6038 OpInfo.ConstraintType = G[BestIdx].second;
6039 }
6040
6041 // 'X' matches anything.
6042 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
6043 // Constants are handled elsewhere. For Functions, the type here is the
6044 // type of the result, which is not what we want to look at; leave them
6045 // alone.
6046 Value *v = OpInfo.CallOperandVal;
6047 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6048 return;
6049 }
6050
6051 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6052 OpInfo.ConstraintCode = "i";
6053 return;
6054 }
6055
6056 // Otherwise, try to resolve it to something we know about by looking at
6057 // the actual operand type.
6058 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6059 OpInfo.ConstraintCode = Repl;
6060 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6061 }
6062 }
6063}
6064
6065/// Given an exact SDIV by a constant, create a multiplication
6066/// with the multiplicative inverse of the constant.
6068 const SDLoc &dl, SelectionDAG &DAG,
6069 SmallVectorImpl<SDNode *> &Created) {
6070 SDValue Op0 = N->getOperand(0);
6071 SDValue Op1 = N->getOperand(1);
6072 EVT VT = N->getValueType(0);
6073 EVT SVT = VT.getScalarType();
6074 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6075 EVT ShSVT = ShVT.getScalarType();
6076
6077 bool UseSRA = false;
6078 SmallVector<SDValue, 16> Shifts, Factors;
6079
6080 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6081 if (C->isZero())
6082 return false;
6083 APInt Divisor = C->getAPIntValue();
6084 unsigned Shift = Divisor.countr_zero();
6085 if (Shift) {
6086 Divisor.ashrInPlace(Shift);
6087 UseSRA = true;
6088 }
6089 APInt Factor = Divisor.multiplicativeInverse();
6090 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6091 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6092 return true;
6093 };
6094
6095 // Collect all magic values from the build vector.
6096 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6097 return SDValue();
6098
6099 SDValue Shift, Factor;
6100 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6101 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6102 Factor = DAG.getBuildVector(VT, dl, Factors);
6103 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6104 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6105 "Expected matchUnaryPredicate to return one element for scalable "
6106 "vectors");
6107 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6108 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6109 } else {
6110 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6111 Shift = Shifts[0];
6112 Factor = Factors[0];
6113 }
6114
6115 SDValue Res = Op0;
6116
6117 // Shift the value upfront if it is even, so the LSB is one.
6118 if (UseSRA) {
6119 // TODO: For UDIV use SRL instead of SRA.
6120 SDNodeFlags Flags;
6121 Flags.setExact(true);
6122 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6123 Created.push_back(Res.getNode());
6124 }
6125
6126 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6127}
6128
6130 SelectionDAG &DAG,
6131 SmallVectorImpl<SDNode *> &Created) const {
6133 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6134 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6135 return SDValue(N, 0); // Lower SDIV as SDIV
6136 return SDValue();
6137}
6138
6139SDValue
6141 SelectionDAG &DAG,
6142 SmallVectorImpl<SDNode *> &Created) const {
6144 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6145 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6146 return SDValue(N, 0); // Lower SREM as SREM
6147 return SDValue();
6148}
6149
6150/// Build sdiv by power-of-2 with conditional move instructions
6151/// Ref: "Hacker's Delight" by Henry Warren 10-1
6152/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6153/// bgez x, label
6154/// add x, x, 2**k-1
6155/// label:
6156/// sra res, x, k
6157/// neg res, res (when the divisor is negative)
6159 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6160 SmallVectorImpl<SDNode *> &Created) const {
6161 unsigned Lg2 = Divisor.countr_zero();
6162 EVT VT = N->getValueType(0);
6163
6164 SDLoc DL(N);
6165 SDValue N0 = N->getOperand(0);
6166 SDValue Zero = DAG.getConstant(0, DL, VT);
6167 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6168 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6169
6170 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6171 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6172 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6173 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6174 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6175
6176 Created.push_back(Cmp.getNode());
6177 Created.push_back(Add.getNode());
6178 Created.push_back(CMov.getNode());
6179
6180 // Divide by pow2.
6181 SDValue SRA =
6182 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6183
6184 // If we're dividing by a positive value, we're done. Otherwise, we must
6185 // negate the result.
6186 if (Divisor.isNonNegative())
6187 return SRA;
6188
6189 Created.push_back(SRA.getNode());
6190 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6191}
6192
6193/// Given an ISD::SDIV node expressing a divide by constant,
6194/// return a DAG expression to select that will generate the same value by
6195/// multiplying by a magic number.
6196/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6198 bool IsAfterLegalization,
6199 SmallVectorImpl<SDNode *> &Created) const {
6200 SDLoc dl(N);
6201 EVT VT = N->getValueType(0);
6202 EVT SVT = VT.getScalarType();
6203 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6204 EVT ShSVT = ShVT.getScalarType();
6205 unsigned EltBits = VT.getScalarSizeInBits();
6206 EVT MulVT;
6207
6208 // Check to see if we can do this.
6209 // FIXME: We should be more aggressive here.
6210 if (!isTypeLegal(VT)) {
6211 // Limit this to simple scalars for now.
6212 if (VT.isVector() || !VT.isSimple())
6213 return SDValue();
6214
6215 // If this type will be promoted to a large enough type with a legal
6216 // multiply operation, we can go ahead and do this transform.
6218 return SDValue();
6219
6220 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6221 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6222 !isOperationLegal(ISD::MUL, MulVT))
6223 return SDValue();
6224 }
6225
6226 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6227 if (N->getFlags().hasExact())
6228 return BuildExactSDIV(*this, N, dl, DAG, Created);
6229
6230 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6231
6232 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6233 if (C->isZero())
6234 return false;
6235
6236 const APInt &Divisor = C->getAPIntValue();
6238 int NumeratorFactor = 0;
6239 int ShiftMask = -1;
6240
6241 if (Divisor.isOne() || Divisor.isAllOnes()) {
6242 // If d is +1/-1, we just multiply the numerator by +1/-1.
6243 NumeratorFactor = Divisor.getSExtValue();
6244 magics.Magic = 0;
6245 magics.ShiftAmount = 0;
6246 ShiftMask = 0;
6247 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6248 // If d > 0 and m < 0, add the numerator.
6249 NumeratorFactor = 1;
6250 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6251 // If d < 0 and m > 0, subtract the numerator.
6252 NumeratorFactor = -1;
6253 }
6254
6255 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6256 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6257 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6258 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6259 return true;
6260 };
6261
6262 SDValue N0 = N->getOperand(0);
6263 SDValue N1 = N->getOperand(1);
6264
6265 // Collect the shifts / magic values from each element.
6266 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6267 return SDValue();
6268
6269 SDValue MagicFactor, Factor, Shift, ShiftMask;
6270 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6271 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6272 Factor = DAG.getBuildVector(VT, dl, Factors);
6273 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6274 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6275 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6276 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6277 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6278 "Expected matchUnaryPredicate to return one element for scalable "
6279 "vectors");
6280 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6281 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6282 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6283 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6284 } else {
6285 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6286 MagicFactor = MagicFactors[0];
6287 Factor = Factors[0];
6288 Shift = Shifts[0];
6289 ShiftMask = ShiftMasks[0];
6290 }
6291
6292 // Multiply the numerator (operand 0) by the magic value.
6293 // FIXME: We should support doing a MUL in a wider type.
6294 auto GetMULHS = [&](SDValue X, SDValue Y) {
6295 // If the type isn't legal, use a wider mul of the type calculated
6296 // earlier.
6297 if (!isTypeLegal(VT)) {
6298 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6299 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6300 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6301 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6302 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6303 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6304 }
6305
6306 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6307 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6308 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6309 SDValue LoHi =
6310 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6311 return SDValue(LoHi.getNode(), 1);
6312 }
6313 // If type twice as wide legal, widen and use a mul plus a shift.
6314 unsigned Size = VT.getScalarSizeInBits();
6315 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6316 if (VT.isVector())
6317 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6319 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6320 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6321 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6322 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6323 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6324 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6325 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6326 }
6327 return SDValue();
6328 };
6329
6330 SDValue Q = GetMULHS(N0, MagicFactor);
6331 if (!Q)
6332 return SDValue();
6333
6334 Created.push_back(Q.getNode());
6335
6336 // (Optionally) Add/subtract the numerator using Factor.
6337 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6338 Created.push_back(Factor.getNode());
6339 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6340 Created.push_back(Q.getNode());
6341
6342 // Shift right algebraic by shift value.
6343 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6344 Created.push_back(Q.getNode());
6345
6346 // Extract the sign bit, mask it and add it to the quotient.
6347 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6348 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6349 Created.push_back(T.getNode());
6350 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6351 Created.push_back(T.getNode());
6352 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6353}
6354
6355/// Given an ISD::UDIV node expressing a divide by constant,
6356/// return a DAG expression to select that will generate the same value by
6357/// multiplying by a magic number.
6358/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
6360 bool IsAfterLegalization,
6361 SmallVectorImpl<SDNode *> &Created) const {
6362 SDLoc dl(N);
6363 EVT VT = N->getValueType(0);
6364 EVT SVT = VT.getScalarType();
6365 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6366 EVT ShSVT = ShVT.getScalarType();
6367 unsigned EltBits = VT.getScalarSizeInBits();
6368 EVT MulVT;
6369
6370 // Check to see if we can do this.
6371 // FIXME: We should be more aggressive here.
6372 if (!isTypeLegal(VT)) {
6373 // Limit this to simple scalars for now.
6374 if (VT.isVector() || !VT.isSimple())
6375 return SDValue();
6376
6377 // If this type will be promoted to a large enough type with a legal
6378 // multiply operation, we can go ahead and do this transform.
6380 return SDValue();
6381
6382 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6383 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6384 !isOperationLegal(ISD::MUL, MulVT))
6385 return SDValue();
6386 }
6387
6388 SDValue N0 = N->getOperand(0);
6389 SDValue N1 = N->getOperand(1);
6390
6391 // Try to use leading zeros of the dividend to reduce the multiplier and
6392 // avoid expensive fixups.
6393 // TODO: Support vectors.
6394 unsigned LeadingZeros = 0;
6395 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6396 assert(!isOneConstant(N1) && "Unexpected divisor");
6397 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6398 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6399 // the dividend exceeds the leading zeros for the divisor.
6400 LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6401 }
6402
6403 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6404 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6405
6406 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6407 if (C->isZero())
6408 return false;
6409 const APInt& Divisor = C->getAPIntValue();
6410
6411 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6412
6413 // Magic algorithm doesn't work for division by 1. We need to emit a select
6414 // at the end.
6415 if (Divisor.isOne()) {
6416 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6417 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6418 } else {
6420 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6421
6422 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6423
6424 assert(magics.PreShift < Divisor.getBitWidth() &&
6425 "We shouldn't generate an undefined shift!");
6426 assert(magics.PostShift < Divisor.getBitWidth() &&
6427 "We shouldn't generate an undefined shift!");
6428 assert((!magics.IsAdd || magics.PreShift == 0) &&
6429 "Unexpected pre-shift");
6430 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6431 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6432 NPQFactor = DAG.getConstant(
6433 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6434 : APInt::getZero(EltBits),
6435 dl, SVT);
6436 UseNPQ |= magics.IsAdd;
6437 UsePreShift |= magics.PreShift != 0;
6438 UsePostShift |= magics.PostShift != 0;
6439 }
6440
6441 PreShifts.push_back(PreShift);
6442 MagicFactors.push_back(MagicFactor);
6443 NPQFactors.push_back(NPQFactor);
6444 PostShifts.push_back(PostShift);
6445 return true;
6446 };
6447
6448 // Collect the shifts/magic values from each element.
6449 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6450 return SDValue();
6451
6452 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6453 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6454 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6455 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6456 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6457 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6458 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6459 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6460 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6461 "Expected matchUnaryPredicate to return one for scalable vectors");
6462 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6463 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6464 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6465 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6466 } else {
6467 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6468 PreShift = PreShifts[0];
6469 MagicFactor = MagicFactors[0];
6470 PostShift = PostShifts[0];
6471 }
6472
6473 SDValue Q = N0;
6474 if (UsePreShift) {
6475 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6476 Created.push_back(Q.getNode());
6477 }
6478
6479 // FIXME: We should support doing a MUL in a wider type.
6480 auto GetMULHU = [&](SDValue X, SDValue Y) {
6481 // If the type isn't legal, use a wider mul of the type calculated
6482 // earlier.
6483 if (!isTypeLegal(VT)) {
6484 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6485 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6486 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6487 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6488 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6489 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6490 }
6491
6492 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6493 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6494 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6495 SDValue LoHi =
6496 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6497 return SDValue(LoHi.getNode(), 1);
6498 }
6499 // If type twice as wide legal, widen and use a mul plus a shift.
6500 unsigned Size = VT.getScalarSizeInBits();
6501 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6502 if (VT.isVector())
6503 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6505 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6506 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6507 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6508 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6509 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6510 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6511 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6512 }
6513 return SDValue(); // No mulhu or equivalent
6514 };
6515
6516 // Multiply the numerator (operand 0) by the magic value.
6517 Q = GetMULHU(Q, MagicFactor);
6518 if (!Q)
6519 return SDValue();
6520
6521 Created.push_back(Q.getNode());
6522
6523 if (UseNPQ) {
6524 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6525 Created.push_back(NPQ.getNode());
6526
6527 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6528 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6529 if (VT.isVector())
6530 NPQ = GetMULHU(NPQ, NPQFactor);
6531 else
6532 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6533
6534 Created.push_back(NPQ.getNode());
6535
6536 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6537 Created.push_back(Q.getNode());
6538 }
6539
6540 if (UsePostShift) {
6541 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6542 Created.push_back(Q.getNode());
6543 }
6544
6545 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6546
6547 SDValue One = DAG.getConstant(1, dl, VT);
6548 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6549 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6550}
6551
6552/// If all values in Values that *don't* match the predicate are same 'splat'
6553/// value, then replace all values with that splat value.
6554/// Else, if AlternativeReplacement was provided, then replace all values that
6555/// do match predicate with AlternativeReplacement value.
6556static void
6558 std::function<bool(SDValue)> Predicate,
6559 SDValue AlternativeReplacement = SDValue()) {
6560 SDValue Replacement;
6561 // Is there a value for which the Predicate does *NOT* match? What is it?
6562 auto SplatValue = llvm::find_if_not(Values, Predicate);
6563 if (SplatValue != Values.end()) {
6564 // Does Values consist only of SplatValue's and values matching Predicate?
6565 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6566 return Value == *SplatValue || Predicate(Value);
6567 })) // Then we shall replace values matching predicate with SplatValue.
6568 Replacement = *SplatValue;
6569 }
6570 if (!Replacement) {
6571 // Oops, we did not find the "baseline" splat value.
6572 if (!AlternativeReplacement)
6573 return; // Nothing to do.
6574 // Let's replace with provided value then.
6575 Replacement = AlternativeReplacement;
6576 }
6577 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6578}
6579
6580/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6581/// where the divisor is constant and the comparison target is zero,
6582/// return a DAG expression that will generate the same comparison result
6583/// using only multiplications, additions and shifts/rotations.
6584/// Ref: "Hacker's Delight" 10-17.
6585SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6586 SDValue CompTargetNode,
6588 DAGCombinerInfo &DCI,
6589 const SDLoc &DL) const {
6591 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6592 DCI, DL, Built)) {
6593 for (SDNode *N : Built)
6594 DCI.AddToWorklist(N);
6595 return Folded;
6596 }
6597
6598 return SDValue();
6599}
6600
6601SDValue
6602TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6603 SDValue CompTargetNode, ISD::CondCode Cond,
6604 DAGCombinerInfo &DCI, const SDLoc &DL,
6605 SmallVectorImpl<SDNode *> &Created) const {
6606 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6607 // - D must be constant, with D = D0 * 2^K where D0 is odd
6608 // - P is the multiplicative inverse of D0 modulo 2^W
6609 // - Q = floor(((2^W) - 1) / D)
6610 // where W is the width of the common type of N and D.
6611 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6612 "Only applicable for (in)equality comparisons.");
6613
6614 SelectionDAG &DAG = DCI.DAG;
6615
6616 EVT VT = REMNode.getValueType();
6617 EVT SVT = VT.getScalarType();
6618 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6619 EVT ShSVT = ShVT.getScalarType();
6620
6621 // If MUL is unavailable, we cannot proceed in any case.
6622 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6623 return SDValue();
6624
6625 bool ComparingWithAllZeros = true;
6626 bool AllComparisonsWithNonZerosAreTautological = true;
6627 bool HadTautologicalLanes = false;
6628 bool AllLanesAreTautological = true;
6629 bool HadEvenDivisor = false;
6630 bool AllDivisorsArePowerOfTwo = true;
6631 bool HadTautologicalInvertedLanes = false;
6632 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6633
6634 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6635 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6636 if (CDiv->isZero())
6637 return false;
6638
6639 const APInt &D = CDiv->getAPIntValue();
6640 const APInt &Cmp = CCmp->getAPIntValue();
6641
6642 ComparingWithAllZeros &= Cmp.isZero();
6643
6644 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6645 // if C2 is not less than C1, the comparison is always false.
6646 // But we will only be able to produce the comparison that will give the
6647 // opposive tautological answer. So this lane would need to be fixed up.
6648 bool TautologicalInvertedLane = D.ule(Cmp);
6649 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6650
6651 // If all lanes are tautological (either all divisors are ones, or divisor
6652 // is not greater than the constant we are comparing with),
6653 // we will prefer to avoid the fold.
6654 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6655 HadTautologicalLanes |= TautologicalLane;
6656 AllLanesAreTautological &= TautologicalLane;
6657
6658 // If we are comparing with non-zero, we need'll need to subtract said
6659 // comparison value from the LHS. But there is no point in doing that if
6660 // every lane where we are comparing with non-zero is tautological..
6661 if (!Cmp.isZero())
6662 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6663
6664 // Decompose D into D0 * 2^K
6665 unsigned K = D.countr_zero();
6666 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6667 APInt D0 = D.lshr(K);
6668
6669 // D is even if it has trailing zeros.
6670 HadEvenDivisor |= (K != 0);
6671 // D is a power-of-two if D0 is one.
6672 // If all divisors are power-of-two, we will prefer to avoid the fold.
6673 AllDivisorsArePowerOfTwo &= D0.isOne();
6674
6675 // P = inv(D0, 2^W)
6676 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6677 unsigned W = D.getBitWidth();
6679 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6680
6681 // Q = floor((2^W - 1) u/ D)
6682 // R = ((2^W - 1) u% D)
6683 APInt Q, R;
6685
6686 // If we are comparing with zero, then that comparison constant is okay,
6687 // else it may need to be one less than that.
6688 if (Cmp.ugt(R))
6689 Q -= 1;
6690
6692 "We are expecting that K is always less than all-ones for ShSVT");
6693
6694 // If the lane is tautological the result can be constant-folded.
6695 if (TautologicalLane) {
6696 // Set P and K amount to a bogus values so we can try to splat them.
6697 P = 0;
6698 K = -1;
6699 // And ensure that comparison constant is tautological,
6700 // it will always compare true/false.
6701 Q = -1;
6702 }
6703
6704 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6705 KAmts.push_back(
6706 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6707 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6708 return true;
6709 };
6710
6711 SDValue N = REMNode.getOperand(0);
6712 SDValue D = REMNode.getOperand(1);
6713
6714 // Collect the values from each element.
6715 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6716 return SDValue();
6717
6718 // If all lanes are tautological, the result can be constant-folded.
6719 if (AllLanesAreTautological)
6720 return SDValue();
6721
6722 // If this is a urem by a powers-of-two, avoid the fold since it can be
6723 // best implemented as a bit test.
6724 if (AllDivisorsArePowerOfTwo)
6725 return SDValue();
6726
6727 SDValue PVal, KVal, QVal;
6728 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6729 if (HadTautologicalLanes) {
6730 // Try to turn PAmts into a splat, since we don't care about the values
6731 // that are currently '0'. If we can't, just keep '0'`s.
6733 // Try to turn KAmts into a splat, since we don't care about the values
6734 // that are currently '-1'. If we can't, change them to '0'`s.
6736 DAG.getConstant(0, DL, ShSVT));
6737 }
6738
6739 PVal = DAG.getBuildVector(VT, DL, PAmts);
6740 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6741 QVal = DAG.getBuildVector(VT, DL, QAmts);
6742 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6743 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6744 "Expected matchBinaryPredicate to return one element for "
6745 "SPLAT_VECTORs");
6746 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6747 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6748 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6749 } else {
6750 PVal = PAmts[0];
6751 KVal = KAmts[0];
6752 QVal = QAmts[0];
6753 }
6754
6755 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6756 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6757 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6758 assert(CompTargetNode.getValueType() == N.getValueType() &&
6759 "Expecting that the types on LHS and RHS of comparisons match.");
6760 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6761 }
6762
6763 // (mul N, P)
6764 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6765 Created.push_back(Op0.getNode());
6766
6767 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6768 // divisors as a performance improvement, since rotating by 0 is a no-op.
6769 if (HadEvenDivisor) {
6770 // We need ROTR to do this.
6771 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6772 return SDValue();
6773 // UREM: (rotr (mul N, P), K)
6774 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6775 Created.push_back(Op0.getNode());
6776 }
6777
6778 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6779 SDValue NewCC =
6780 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6782 if (!HadTautologicalInvertedLanes)
6783 return NewCC;
6784
6785 // If any lanes previously compared always-false, the NewCC will give
6786 // always-true result for them, so we need to fixup those lanes.
6787 // Or the other way around for inequality predicate.
6788 assert(VT.isVector() && "Can/should only get here for vectors.");
6789 Created.push_back(NewCC.getNode());
6790
6791 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6792 // if C2 is not less than C1, the comparison is always false.
6793 // But we have produced the comparison that will give the
6794 // opposive tautological answer. So these lanes would need to be fixed up.
6795 SDValue TautologicalInvertedChannels =
6796 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6797 Created.push_back(TautologicalInvertedChannels.getNode());
6798
6799 // NOTE: we avoid letting illegal types through even if we're before legalize
6800 // ops – legalization has a hard time producing good code for this.
6801 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6802 // If we have a vector select, let's replace the comparison results in the
6803 // affected lanes with the correct tautological result.
6804 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6805 DL, SETCCVT, SETCCVT);
6806 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6807 Replacement, NewCC);
6808 }
6809
6810 // Else, we can just invert the comparison result in the appropriate lanes.
6811 //
6812 // NOTE: see the note above VSELECT above.
6813 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6814 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6815 TautologicalInvertedChannels);
6816
6817 return SDValue(); // Don't know how to lower.
6818}
6819
6820/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6821/// where the divisor is constant and the comparison target is zero,
6822/// return a DAG expression that will generate the same comparison result
6823/// using only multiplications, additions and shifts/rotations.
6824/// Ref: "Hacker's Delight" 10-17.
6825SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6826 SDValue CompTargetNode,
6828 DAGCombinerInfo &DCI,
6829 const SDLoc &DL) const {
6831 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6832 DCI, DL, Built)) {
6833 assert(Built.size() <= 7 && "Max size prediction failed.");
6834 for (SDNode *N : Built)
6835 DCI.AddToWorklist(N);
6836 return Folded;
6837 }
6838
6839 return SDValue();
6840}
6841
6842SDValue
6843TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6844 SDValue CompTargetNode, ISD::CondCode Cond,
6845 DAGCombinerInfo &DCI, const SDLoc &DL,
6846 SmallVectorImpl<SDNode *> &Created) const {
6847 // Derived from Hacker's Delight, 2nd Edition, by Hank Warren. Section 10-17.
6848 // Fold:
6849 // (seteq/ne (srem N, D), 0)
6850 // To:
6851 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6852 //
6853 // - D must be constant, with D = D0 * 2^K where D0 is odd
6854 // - P is the multiplicative inverse of D0 modulo 2^W
6855 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6856 // - Q = floor((2 * A) / (2^K))
6857 // where W is the width of the common type of N and D.
6858 //
6859 // When D is a power of two (and thus D0 is 1), the normal
6860 // formula for A and Q don't apply, because the derivation
6861 // depends on D not dividing 2^(W-1), and thus theorem ZRS
6862 // does not apply. This specifically fails when N = INT_MIN.
6863 //
6864 // Instead, for power-of-two D, we use:
6865 // - A = 2^(W-1)
6866 // |-> Order-preserving map from [-2^(W-1), 2^(W-1) - 1] to [0,2^W - 1])
6867 // - Q = 2^(W-K) - 1
6868 // |-> Test that the top K bits are zero after rotation
6869 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6870 "Only applicable for (in)equality comparisons.");
6871
6872 SelectionDAG &DAG = DCI.DAG;
6873
6874 EVT VT = REMNode.getValueType();
6875 EVT SVT = VT.getScalarType();
6876 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6877 EVT ShSVT = ShVT.getScalarType();
6878
6879 // If we are after ops legalization, and MUL is unavailable, we can not
6880 // proceed.
6881 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6882 return SDValue();
6883
6884 // TODO: Could support comparing with non-zero too.
6885 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6886 if (!CompTarget || !CompTarget->isZero())
6887 return SDValue();
6888
6889 bool HadIntMinDivisor = false;
6890 bool HadOneDivisor = false;
6891 bool AllDivisorsAreOnes = true;
6892 bool HadEvenDivisor = false;
6893 bool NeedToApplyOffset = false;
6894 bool AllDivisorsArePowerOfTwo = true;
6895 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6896
6897 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6898 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6899 if (C->isZero())
6900 return false;
6901
6902 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6903
6904 // WARNING: this fold is only valid for positive divisors!
6905 APInt D = C->getAPIntValue();
6906 if (D.isNegative())
6907 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6908
6909 HadIntMinDivisor |= D.isMinSignedValue();
6910
6911 // If all divisors are ones, we will prefer to avoid the fold.
6912 HadOneDivisor |= D.isOne();
6913 AllDivisorsAreOnes &= D.isOne();
6914
6915 // Decompose D into D0 * 2^K
6916 unsigned K = D.countr_zero();
6917 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6918 APInt D0 = D.lshr(K);
6919
6920 if (!D.isMinSignedValue()) {
6921 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6922 // we don't care about this lane in this fold, we'll special-handle it.
6923 HadEvenDivisor |= (K != 0);
6924 }
6925
6926 // D is a power-of-two if D0 is one. This includes INT_MIN.
6927 // If all divisors are power-of-two, we will prefer to avoid the fold.
6928 AllDivisorsArePowerOfTwo &= D0.isOne();
6929
6930 // P = inv(D0, 2^W)
6931 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6932 unsigned W = D.getBitWidth();
6934 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6935
6936 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6938 A.clearLowBits(K);
6939
6940 if (!D.isMinSignedValue()) {
6941 // If divisor INT_MIN, then we don't care about this lane in this fold,
6942 // we'll special-handle it.
6943 NeedToApplyOffset |= A != 0;
6944 }
6945
6946 // Q = floor((2 * A) / (2^K))
6947 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6948
6950 "We are expecting that A is always less than all-ones for SVT");
6952 "We are expecting that K is always less than all-ones for ShSVT");
6953
6954 // If D was a power of two, apply the alternate constant derivation.
6955 if (D0.isOne()) {
6956 // A = 2^(W-1)
6958 // - Q = 2^(W-K) - 1
6959 Q = APInt::getAllOnes(W - K).zext(W);
6960 }
6961
6962 // If the divisor is 1 the result can be constant-folded. Likewise, we
6963 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6964 if (D.isOne()) {
6965 // Set P, A and K to a bogus values so we can try to splat them.
6966 P = 0;
6967 A = -1;
6968 K = -1;
6969
6970 // x ?% 1 == 0 <--> true <--> x u<= -1
6971 Q = -1;
6972 }
6973
6974 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6975 AAmts.push_back(DAG.getConstant(A, DL, SVT));
6976 KAmts.push_back(
6977 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6978 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6979 return true;
6980 };
6981
6982 SDValue N = REMNode.getOperand(0);
6983 SDValue D = REMNode.getOperand(1);
6984
6985 // Collect the values from each element.
6986 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6987 return SDValue();
6988
6989 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6990 if (AllDivisorsAreOnes)
6991 return SDValue();
6992
6993 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6994 // since it can be best implemented as a bit test.
6995 if (AllDivisorsArePowerOfTwo)
6996 return SDValue();
6997
6998 SDValue PVal, AVal, KVal, QVal;
6999 if (D.getOpcode() == ISD::BUILD_VECTOR) {
7000 if (HadOneDivisor) {
7001 // Try to turn PAmts into a splat, since we don't care about the values
7002 // that are currently '0'. If we can't, just keep '0'`s.
7004 // Try to turn AAmts into a splat, since we don't care about the
7005 // values that are currently '-1'. If we can't, change them to '0'`s.
7007 DAG.getConstant(0, DL, SVT));
7008 // Try to turn KAmts into a splat, since we don't care about the values
7009 // that are currently '-1'. If we can't, change them to '0'`s.
7011 DAG.getConstant(0, DL, ShSVT));
7012 }
7013
7014 PVal = DAG.getBuildVector(VT, DL, PAmts);
7015 AVal = DAG.getBuildVector(VT, DL, AAmts);
7016 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
7017 QVal = DAG.getBuildVector(VT, DL, QAmts);
7018 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
7019 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
7020 QAmts.size() == 1 &&
7021 "Expected matchUnaryPredicate to return one element for scalable "
7022 "vectors");
7023 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
7024 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
7025 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
7026 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
7027 } else {
7028 assert(isa<ConstantSDNode>(D) && "Expected a constant");
7029 PVal = PAmts[0];
7030 AVal = AAmts[0];
7031 KVal = KAmts[0];
7032 QVal = QAmts[0];
7033 }
7034
7035 // (mul N, P)
7036 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
7037 Created.push_back(Op0.getNode());
7038
7039 if (NeedToApplyOffset) {
7040 // We need ADD to do this.
7041 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
7042 return SDValue();
7043
7044 // (add (mul N, P), A)
7045 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
7046 Created.push_back(Op0.getNode());
7047 }
7048
7049 // Rotate right only if any divisor was even. We avoid rotates for all-odd
7050 // divisors as a performance improvement, since rotating by 0 is a no-op.
7051 if (HadEvenDivisor) {
7052 // We need ROTR to do this.
7053 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
7054 return SDValue();
7055 // SREM: (rotr (add (mul N, P), A), K)
7056 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7057 Created.push_back(Op0.getNode());
7058 }
7059
7060 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7061 SDValue Fold =
7062 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7064
7065 // If we didn't have lanes with INT_MIN divisor, then we're done.
7066 if (!HadIntMinDivisor)
7067 return Fold;
7068
7069 // That fold is only valid for positive divisors. Which effectively means,
7070 // it is invalid for INT_MIN divisors. So if we have such a lane,
7071 // we must fix-up results for said lanes.
7072 assert(VT.isVector() && "Can/should only get here for vectors.");
7073
7074 // NOTE: we avoid letting illegal types through even if we're before legalize
7075 // ops – legalization has a hard time producing good code for the code that
7076 // follows.
7077 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7081 return SDValue();
7082
7083 Created.push_back(Fold.getNode());
7084
7085 SDValue IntMin = DAG.getConstant(
7087 SDValue IntMax = DAG.getConstant(
7089 SDValue Zero =
7091
7092 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7093 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7094 Created.push_back(DivisorIsIntMin.getNode());
7095
7096 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7097 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7098 Created.push_back(Masked.getNode());
7099 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7100 Created.push_back(MaskedIsZero.getNode());
7101
7102 // To produce final result we need to blend 2 vectors: 'SetCC' and
7103 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7104 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7105 // constant-folded, select can get lowered to a shuffle with constant mask.
7106 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7107 MaskedIsZero, Fold);
7108
7109 return Blended;
7110}
7111
7114 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7115 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7116 "be a constant integer");
7117 return true;
7118 }
7119
7120 return false;
7121}
7122
7124 const DenormalMode &Mode) const {
7125 SDLoc DL(Op);
7126 EVT VT = Op.getValueType();
7127 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7128 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7129
7130 // This is specifically a check for the handling of denormal inputs, not the
7131 // result.
7132 if (Mode.Input == DenormalMode::PreserveSign ||
7133 Mode.Input == DenormalMode::PositiveZero) {
7134 // Test = X == 0.0
7135 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7136 }
7137
7138 // Testing it with denormal inputs to avoid wrong estimate.
7139 //
7140 // Test = fabs(X) < SmallestNormal
7141 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7142 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7143 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7144 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7145 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7146}
7147
7149 bool LegalOps, bool OptForSize,
7151 unsigned Depth) const {
7152 // fneg is removable even if it has multiple uses.
7153 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7155 return Op.getOperand(0);
7156 }
7157
7158 // Don't recurse exponentially.
7160 return SDValue();
7161
7162 // Pre-increment recursion depth for use in recursive calls.
7163 ++Depth;
7164 const SDNodeFlags Flags = Op->getFlags();
7165 const TargetOptions &Options = DAG.getTarget().Options;
7166 EVT VT = Op.getValueType();
7167 unsigned Opcode = Op.getOpcode();
7168
7169 // Don't allow anything with multiple uses unless we know it is free.
7170 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7171 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7172 isFPExtFree(VT, Op.getOperand(0).getValueType());
7173 if (!IsFreeExtend)
7174 return SDValue();
7175 }
7176
7177 auto RemoveDeadNode = [&](SDValue N) {
7178 if (N && N.getNode()->use_empty())
7179 DAG.RemoveDeadNode(N.getNode());
7180 };
7181
7182 SDLoc DL(Op);
7183
7184 // Because getNegatedExpression can delete nodes we need a handle to keep
7185 // temporary nodes alive in case the recursion manages to create an identical
7186 // node.
7187 std::list<HandleSDNode> Handles;
7188
7189 switch (Opcode) {
7190 case ISD::ConstantFP: {
7191 // Don't invert constant FP values after legalization unless the target says
7192 // the negated constant is legal.
7193 bool IsOpLegal =
7195 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7196 OptForSize);
7197
7198 if (LegalOps && !IsOpLegal)
7199 break;
7200
7201 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7202 V.changeSign();
7203 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7204
7205 // If we already have the use of the negated floating constant, it is free
7206 // to negate it even it has multiple uses.
7207 if (!Op.hasOneUse() && CFP.use_empty())
7208 break;
7210 return CFP;
7211 }
7212 case ISD::BUILD_VECTOR: {
7213 // Only permit BUILD_VECTOR of constants.
7214 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7215 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7216 }))
7217 break;
7218
7219 bool IsOpLegal =
7222 llvm::all_of(Op->op_values(), [&](SDValue N) {
7223 return N.isUndef() ||
7224 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7225 OptForSize);
7226 });
7227
7228 if (LegalOps && !IsOpLegal)
7229 break;
7230
7232 for (SDValue C : Op->op_values()) {
7233 if (C.isUndef()) {
7234 Ops.push_back(C);
7235 continue;
7236 }
7237 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7238 V.changeSign();
7239 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7240 }
7242 return DAG.getBuildVector(VT, DL, Ops);
7243 }
7244 case ISD::FADD: {
7245 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7246 break;
7247
7248 // After operation legalization, it might not be legal to create new FSUBs.
7249 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7250 break;
7251 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7252
7253 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7255 SDValue NegX =
7256 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7257 // Prevent this node from being deleted by the next call.
7258 if (NegX)
7259 Handles.emplace_back(NegX);
7260
7261 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7263 SDValue NegY =
7264 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7265
7266 // We're done with the handles.
7267 Handles.clear();
7268
7269 // Negate the X if its cost is less or equal than Y.
7270 if (NegX && (CostX <= CostY)) {
7271 Cost = CostX;
7272 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7273 if (NegY != N)
7274 RemoveDeadNode(NegY);
7275 return N;
7276 }
7277
7278 // Negate the Y if it is not expensive.
7279 if (NegY) {
7280 Cost = CostY;
7281 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7282 if (NegX != N)
7283 RemoveDeadNode(NegX);
7284 return N;
7285 }
7286 break;
7287 }
7288 case ISD::FSUB: {
7289 // We can't turn -(A-B) into B-A when we honor signed zeros.
7290 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7291 break;
7292
7293 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7294 // fold (fneg (fsub 0, Y)) -> Y
7295 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7296 if (C->isZero()) {
7298 return Y;
7299 }
7300
7301 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7303 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7304 }
7305 case ISD::FMUL:
7306 case ISD::FDIV: {
7307 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7308
7309 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7311 SDValue NegX =
7312 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7313 // Prevent this node from being deleted by the next call.
7314 if (NegX)
7315 Handles.emplace_back(NegX);
7316
7317 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7319 SDValue NegY =
7320 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7321
7322 // We're done with the handles.
7323 Handles.clear();
7324
7325 // Negate the X if its cost is less or equal than Y.
7326 if (NegX && (CostX <= CostY)) {
7327 Cost = CostX;
7328 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7329 if (NegY != N)
7330 RemoveDeadNode(NegY);
7331 return N;
7332 }
7333
7334 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7335 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7336 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7337 break;
7338
7339 // Negate the Y if it is not expensive.
7340 if (NegY) {
7341 Cost = CostY;
7342 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7343 if (NegX != N)
7344 RemoveDeadNode(NegX);
7345 return N;
7346 }
7347 break;
7348 }
7349 case ISD::FMA:
7350 case ISD::FMAD: {
7351 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7352 break;
7353
7354 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7356 SDValue NegZ =
7357 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7358 // Give up if fail to negate the Z.
7359 if (!NegZ)
7360 break;
7361
7362 // Prevent this node from being deleted by the next two calls.
7363 Handles.emplace_back(NegZ);
7364
7365 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7367 SDValue NegX =
7368 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7369 // Prevent this node from being deleted by the next call.
7370 if (NegX)
7371 Handles.emplace_back(NegX);
7372
7373 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7375 SDValue NegY =
7376 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7377
7378 // We're done with the handles.
7379 Handles.clear();
7380
7381 // Negate the X if its cost is less or equal than Y.
7382 if (NegX && (CostX <= CostY)) {
7383 Cost = std::min(CostX, CostZ);
7384 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7385 if (NegY != N)
7386 RemoveDeadNode(NegY);
7387 return N;
7388 }
7389
7390 // Negate the Y if it is not expensive.
7391 if (NegY) {
7392 Cost = std::min(CostY, CostZ);
7393 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7394 if (NegX != N)
7395 RemoveDeadNode(NegX);
7396 return N;
7397 }
7398 break;
7399 }
7400
7401 case ISD::FP_EXTEND:
7402 case ISD::FSIN:
7403 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7404 OptForSize, Cost, Depth))
7405 return DAG.getNode(Opcode, DL, VT, NegV);
7406 break;
7407 case ISD::FP_ROUND:
7408 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7409 OptForSize, Cost, Depth))
7410 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7411 break;
7412 case ISD::SELECT:
7413 case ISD::VSELECT: {
7414 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7415 // iff at least one cost is cheaper and the other is neutral/cheaper
7416 SDValue LHS = Op.getOperand(1);
7418 SDValue NegLHS =
7419 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7420 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7421 RemoveDeadNode(NegLHS);
7422 break;
7423 }
7424
7425 // Prevent this node from being deleted by the next call.
7426 Handles.emplace_back(NegLHS);
7427
7428 SDValue RHS = Op.getOperand(2);
7430 SDValue NegRHS =
7431 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7432
7433 // We're done with the handles.
7434 Handles.clear();
7435
7436 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7437 (CostLHS != NegatibleCost::Cheaper &&
7438 CostRHS != NegatibleCost::Cheaper)) {
7439 RemoveDeadNode(NegLHS);
7440 RemoveDeadNode(NegRHS);
7441 break;
7442 }
7443
7444 Cost = std::min(CostLHS, CostRHS);
7445 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7446 }
7447 }
7448
7449 return SDValue();
7450}
7451
7452//===----------------------------------------------------------------------===//
7453// Legalization Utilities
7454//===----------------------------------------------------------------------===//
7455
7456bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7457 SDValue LHS, SDValue RHS,
7459 EVT HiLoVT, SelectionDAG &DAG,
7460 MulExpansionKind Kind, SDValue LL,
7461 SDValue LH, SDValue RL, SDValue RH) const {
7462 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7463 Opcode == ISD::SMUL_LOHI);
7464
7465 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7467 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7469 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7471 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7473
7474 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7475 return false;
7476
7477 unsigned OuterBitSize = VT.getScalarSizeInBits();
7478 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7479
7480 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7481 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7482 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7483
7484 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7485 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7486 bool Signed) -> bool {
7487 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7488 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7489 Hi = SDValue(Lo.getNode(), 1);
7490 return true;
7491 }
7492 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7493 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7494 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7495 return true;
7496 }
7497 return false;
7498 };
7499
7500 SDValue Lo, Hi;
7501
7502 if (!LL.getNode() && !RL.getNode() &&
7504 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7505 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7506 }
7507
7508 if (!LL.getNode())
7509 return false;
7510
7511 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7512 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7513 DAG.MaskedValueIsZero(RHS, HighMask)) {
7514 // The inputs are both zero-extended.
7515 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7516 Result.push_back(Lo);
7517 Result.push_back(Hi);
7518 if (Opcode != ISD::MUL) {
7519 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7520 Result.push_back(Zero);
7521 Result.push_back(Zero);
7522 }
7523 return true;
7524 }
7525 }
7526
7527 if (!VT.isVector() && Opcode == ISD::MUL &&
7528 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7529 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7530 // The input values are both sign-extended.
7531 // TODO non-MUL case?
7532 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7533 Result.push_back(Lo);
7534 Result.push_back(Hi);
7535 return true;
7536 }
7537 }
7538
7539 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7540 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7541
7542 if (!LH.getNode() && !RH.getNode() &&
7545 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7546 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7547 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7548 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7549 }
7550
7551 if (!LH.getNode())
7552 return false;
7553
7554 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7555 return false;
7556
7557 Result.push_back(Lo);
7558
7559 if (Opcode == ISD::MUL) {
7560 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7561 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7562 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7563 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7564 Result.push_back(Hi);
7565 return true;
7566 }
7567
7568 // Compute the full width result.
7569 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7570 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7571 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7572 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7573 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7574 };
7575
7576 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7577 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7578 return false;
7579
7580 // This is effectively the add part of a multiply-add of half-sized operands,
7581 // so it cannot overflow.
7582 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7583
7584 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7585 return false;
7586
7587 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7588 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7589
7590 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7592 if (UseGlue)
7593 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7594 Merge(Lo, Hi));
7595 else
7596 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7597 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7598
7599 SDValue Carry = Next.getValue(1);
7600 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7601 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7602
7603 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7604 return false;
7605
7606 if (UseGlue)
7607 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7608 Carry);
7609 else
7610 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7611 Zero, Carry);
7612
7613 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7614
7615 if (Opcode == ISD::SMUL_LOHI) {
7616 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7617 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7618 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7619
7620 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7621 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7622 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7623 }
7624
7625 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7626 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7627 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7628 return true;
7629}
7630
7632 SelectionDAG &DAG, MulExpansionKind Kind,
7633 SDValue LL, SDValue LH, SDValue RL,
7634 SDValue RH) const {
7636 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7637 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7638 DAG, Kind, LL, LH, RL, RH);
7639 if (Ok) {
7640 assert(Result.size() == 2);
7641 Lo = Result[0];
7642 Hi = Result[1];
7643 }
7644 return Ok;
7645}
7646
7647// Optimize unsigned division or remainder by constants for types twice as large
7648// as a legal VT.
7649//
7650// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7651// can be computed
7652// as:
7653// Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7654// Remainder = Sum % Constant
7655// This is based on "Remainder by Summing Digits" from Hacker's Delight.
7656//
7657// For division, we can compute the remainder using the algorithm described
7658// above, subtract it from the dividend to get an exact multiple of Constant.
7659// Then multiply that exact multiply by the multiplicative inverse modulo
7660// (1 << (BitWidth / 2)) to get the quotient.
7661
7662// If Constant is even, we can shift right the dividend and the divisor by the
7663// number of trailing zeros in Constant before applying the remainder algorithm.
7664// If we're after the quotient, we can subtract this value from the shifted
7665// dividend and multiply by the multiplicative inverse of the shifted divisor.
7666// If we want the remainder, we shift the value left by the number of trailing
7667// zeros and add the bits that were shifted out of the dividend.
7670 EVT HiLoVT, SelectionDAG &DAG,
7671 SDValue LL, SDValue LH) const {
7672 unsigned Opcode = N->getOpcode();
7673 EVT VT = N->getValueType(0);
7674
7675 // TODO: Support signed division/remainder.
7676 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7677 return false;
7678 assert(
7679 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7680 "Unexpected opcode");
7681
7682 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7683 if (!CN)
7684 return false;
7685
7686 APInt Divisor = CN->getAPIntValue();
7687 unsigned BitWidth = Divisor.getBitWidth();
7688 unsigned HBitWidth = BitWidth / 2;
7690 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7691
7692 // Divisor needs to less than (1 << HBitWidth).
7693 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7694 if (Divisor.uge(HalfMaxPlus1))
7695 return false;
7696
7697 // We depend on the UREM by constant optimization in DAGCombiner that requires
7698 // high multiply.
7699 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7701 return false;
7702
7703 // Don't expand if optimizing for size.
7704 if (DAG.shouldOptForSize())
7705 return false;
7706
7707 // Early out for 0 or 1 divisors.
7708 if (Divisor.ule(1))
7709 return false;
7710
7711 // If the divisor is even, shift it until it becomes odd.
7712 unsigned TrailingZeros = 0;
7713 if (!Divisor[0]) {
7714 TrailingZeros = Divisor.countr_zero();
7715 Divisor.lshrInPlace(TrailingZeros);
7716 }
7717
7718 SDLoc dl(N);
7719 SDValue Sum;
7720 SDValue PartialRem;
7721
7722 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7723 // then add in the carry.
7724 // TODO: If we can't split it in half, we might be able to split into 3 or
7725 // more pieces using a smaller bit width.
7726 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7727 assert(!LL == !LH && "Expected both input halves or no input halves!");
7728 if (!LL)
7729 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7730
7731 // Shift the input by the number of TrailingZeros in the divisor. The
7732 // shifted out bits will be added to the remainder later.
7733 if (TrailingZeros) {
7734 // Save the shifted off bits if we need the remainder.
7735 if (Opcode != ISD::UDIV) {
7736 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7737 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7738 DAG.getConstant(Mask, dl, HiLoVT));
7739 }
7740
7741 LL = DAG.getNode(
7742 ISD::OR, dl, HiLoVT,
7743 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7744 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7745 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7746 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7747 HiLoVT, dl)));
7748 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7749 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7750 }
7751
7752 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7753 EVT SetCCType =
7754 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7756 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7757 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7758 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7759 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7760 } else {
7761 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7762 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7763 // If the boolean for the target is 0 or 1, we can add the setcc result
7764 // directly.
7765 if (getBooleanContents(HiLoVT) ==
7767 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7768 else
7769 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7770 DAG.getConstant(0, dl, HiLoVT));
7771 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7772 }
7773 }
7774
7775 // If we didn't find a sum, we can't do the expansion.
7776 if (!Sum)
7777 return false;
7778
7779 // Perform a HiLoVT urem on the Sum using truncated divisor.
7780 SDValue RemL =
7781 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7782 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7783 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7784
7785 if (Opcode != ISD::UREM) {
7786 // Subtract the remainder from the shifted dividend.
7787 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7788 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7789
7790 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7791
7792 // Multiply by the multiplicative inverse of the divisor modulo
7793 // (1 << BitWidth).
7794 APInt MulFactor = Divisor.multiplicativeInverse();
7795
7796 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7797 DAG.getConstant(MulFactor, dl, VT));
7798
7799 // Split the quotient into low and high parts.
7800 SDValue QuotL, QuotH;
7801 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7802 Result.push_back(QuotL);
7803 Result.push_back(QuotH);
7804 }
7805
7806 if (Opcode != ISD::UDIV) {
7807 // If we shifted the input, shift the remainder left and add the bits we
7808 // shifted off the input.
7809 if (TrailingZeros) {
7810 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7811 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7812 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7813 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7814 }
7815 Result.push_back(RemL);
7816 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7817 }
7818
7819 return true;
7820}
7821
7822// Check that (every element of) Z is undef or not an exact multiple of BW.
7823static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7825 Z,
7826 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7827 true);
7828}
7829
7831 EVT VT = Node->getValueType(0);
7832 SDValue ShX, ShY;
7833 SDValue ShAmt, InvShAmt;
7834 SDValue X = Node->getOperand(0);
7835 SDValue Y = Node->getOperand(1);
7836 SDValue Z = Node->getOperand(2);
7837 SDValue Mask = Node->getOperand(3);
7838 SDValue VL = Node->getOperand(4);
7839
7840 unsigned BW = VT.getScalarSizeInBits();
7841 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7842 SDLoc DL(SDValue(Node, 0));
7843
7844 EVT ShVT = Z.getValueType();
7845 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7846 // fshl: X << C | Y >> (BW - C)
7847 // fshr: X << (BW - C) | Y >> C
7848 // where C = Z % BW is not zero
7849 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7850 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7851 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7852 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7853 VL);
7854 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7855 VL);
7856 } else {
7857 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7858 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7859 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7860 if (isPowerOf2_32(BW)) {
7861 // Z % BW -> Z & (BW - 1)
7862 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7863 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7864 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7865 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7866 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7867 } else {
7868 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7869 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7870 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7871 }
7872
7873 SDValue One = DAG.getConstant(1, DL, ShVT);
7874 if (IsFSHL) {
7875 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7876 SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7877 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7878 } else {
7879 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7880 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7881 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7882 }
7883 }
7884 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7885}
7886
7888 SelectionDAG &DAG) const {
7889 if (Node->isVPOpcode())
7890 return expandVPFunnelShift(Node, DAG);
7891
7892 EVT VT = Node->getValueType(0);
7893
7894 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7898 return SDValue();
7899
7900 SDValue X = Node->getOperand(0);
7901 SDValue Y = Node->getOperand(1);
7902 SDValue Z = Node->getOperand(2);
7903
7904 unsigned BW = VT.getScalarSizeInBits();
7905 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7906 SDLoc DL(SDValue(Node, 0));
7907
7908 EVT ShVT = Z.getValueType();
7909
7910 // If a funnel shift in the other direction is more supported, use it.
7911 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7912 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7913 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7914 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7915 // fshl X, Y, Z -> fshr X, Y, -Z
7916 // fshr X, Y, Z -> fshl X, Y, -Z
7917 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7918 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7919 } else {
7920 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7921 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7922 SDValue One = DAG.getConstant(1, DL, ShVT);
7923 if (IsFSHL) {
7924 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7925 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7926 } else {
7927 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7928 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7929 }
7930 Z = DAG.getNOT(DL, Z, ShVT);
7931 }
7932 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7933 }
7934
7935 SDValue ShX, ShY;
7936 SDValue ShAmt, InvShAmt;
7937 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7938 // fshl: X << C | Y >> (BW - C)
7939 // fshr: X << (BW - C) | Y >> C
7940 // where C = Z % BW is not zero
7941 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7942 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7943 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7944 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7945 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7946 } else {
7947 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7948 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7949 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7950 if (isPowerOf2_32(BW)) {
7951 // Z % BW -> Z & (BW - 1)
7952 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7953 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7954 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7955 } else {
7956 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7957 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7958 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7959 }
7960
7961 SDValue One = DAG.getConstant(1, DL, ShVT);
7962 if (IsFSHL) {
7963 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7964 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7965 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7966 } else {
7967 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7968 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7969 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7970 }
7971 }
7972 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7973}
7974
7975// TODO: Merge with expandFunnelShift.
7976SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7977 SelectionDAG &DAG) const {
7978 EVT VT = Node->getValueType(0);
7979 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7980 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7981 SDValue Op0 = Node->getOperand(0);
7982 SDValue Op1 = Node->getOperand(1);
7983 SDLoc DL(SDValue(Node, 0));
7984
7985 EVT ShVT = Op1.getValueType();
7986 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7987
7988 // If a rotate in the other direction is more supported, use it.
7989 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7990 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7991 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7992 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7993 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7994 }
7995
7996 if (!AllowVectorOps && VT.isVector() &&
8002 return SDValue();
8003
8004 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
8005 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
8006 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
8007 SDValue ShVal;
8008 SDValue HsVal;
8009 if (isPowerOf2_32(EltSizeInBits)) {
8010 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8011 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8012 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
8013 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
8014 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8015 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
8016 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
8017 } else {
8018 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8019 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8020 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
8021 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
8022 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
8023 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
8024 SDValue One = DAG.getConstant(1, DL, ShVT);
8025 HsVal =
8026 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
8027 }
8028 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
8029}
8030
8032 SelectionDAG &DAG) const {
8033 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
8034 EVT VT = Node->getValueType(0);
8035 unsigned VTBits = VT.getScalarSizeInBits();
8036 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
8037
8038 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
8039 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
8040 SDValue ShOpLo = Node->getOperand(0);
8041 SDValue ShOpHi = Node->getOperand(1);
8042 SDValue ShAmt = Node->getOperand(2);
8043 EVT ShAmtVT = ShAmt.getValueType();
8044 EVT ShAmtCCVT =
8045 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
8046 SDLoc dl(Node);
8047
8048 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
8049 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
8050 // away during isel.
8051 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8052 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
8053 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8054 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8055 : DAG.getConstant(0, dl, VT);
8056
8057 SDValue Tmp2, Tmp3;
8058 if (IsSHL) {
8059 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8060 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8061 } else {
8062 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8063 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8064 }
8065
8066 // If the shift amount is larger or equal than the width of a part we don't
8067 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8068 // values for large shift amounts.
8069 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8070 DAG.getConstant(VTBits, dl, ShAmtVT));
8071 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8072 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8073
8074 if (IsSHL) {
8075 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8076 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8077 } else {
8078 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8079 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8080 }
8081}
8082
8084 SelectionDAG &DAG) const {
8085 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8086 SDValue Src = Node->getOperand(OpNo);
8087 EVT SrcVT = Src.getValueType();
8088 EVT DstVT = Node->getValueType(0);
8089 SDLoc dl(SDValue(Node, 0));
8090
8091 // FIXME: Only f32 to i64 conversions are supported.
8092 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8093 return false;
8094
8095 if (Node->isStrictFPOpcode())
8096 // When a NaN is converted to an integer a trap is allowed. We can't
8097 // use this expansion here because it would eliminate that trap. Other
8098 // traps are also allowed and cannot be eliminated. See
8099 // IEEE 754-2008 sec 5.8.
8100 return false;
8101
8102 // Expand f32 -> i64 conversion
8103 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8104 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8105 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8106 EVT IntVT = SrcVT.changeTypeToInteger();
8107 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8108
8109 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8110 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8111 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8112 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8113 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8114 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8115
8116 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8117
8118 SDValue ExponentBits = DAG.getNode(
8119 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8120 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8121 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8122
8123 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8124 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8125 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8126 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8127
8128 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8129 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8130 DAG.getConstant(0x00800000, dl, IntVT));
8131
8132 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8133
8134 R = DAG.getSelectCC(
8135 dl, Exponent, ExponentLoBit,
8136 DAG.getNode(ISD::SHL, dl, DstVT, R,
8137 DAG.getZExtOrTrunc(
8138 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8139 dl, IntShVT)),
8140 DAG.getNode(ISD::SRL, dl, DstVT, R,
8141 DAG.getZExtOrTrunc(
8142 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8143 dl, IntShVT)),
8144 ISD::SETGT);
8145
8146 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8147 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8148
8149 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8150 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8151 return true;
8152}
8153
8155 SDValue &Chain,
8156 SelectionDAG &DAG) const {
8157 SDLoc dl(SDValue(Node, 0));
8158 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8159 SDValue Src = Node->getOperand(OpNo);
8160
8161 EVT SrcVT = Src.getValueType();
8162 EVT DstVT = Node->getValueType(0);
8163 EVT SetCCVT =
8164 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8165 EVT DstSetCCVT =
8166 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8167
8168 // Only expand vector types if we have the appropriate vector bit operations.
8169 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8171 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8173 return false;
8174
8175 // If the maximum float value is smaller then the signed integer range,
8176 // the destination signmask can't be represented by the float, so we can
8177 // just use FP_TO_SINT directly.
8178 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8179 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8180 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8182 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8183 if (Node->isStrictFPOpcode()) {
8184 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8185 { Node->getOperand(0), Src });
8186 Chain = Result.getValue(1);
8187 } else
8188 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8189 return true;
8190 }
8191
8192 // Don't expand it if there isn't cheap fsub instruction.
8194 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8195 return false;
8196
8197 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8198 SDValue Sel;
8199
8200 if (Node->isStrictFPOpcode()) {
8201 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8202 Node->getOperand(0), /*IsSignaling*/ true);
8203 Chain = Sel.getValue(1);
8204 } else {
8205 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8206 }
8207
8208 bool Strict = Node->isStrictFPOpcode() ||
8209 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8210
8211 if (Strict) {
8212 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8213 // signmask then offset (the result of which should be fully representable).
8214 // Sel = Src < 0x8000000000000000
8215 // FltOfs = select Sel, 0, 0x8000000000000000
8216 // IntOfs = select Sel, 0, 0x8000000000000000
8217 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8218
8219 // TODO: Should any fast-math-flags be set for the FSUB?
8220 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8221 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8222 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8223 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8224 DAG.getConstant(0, dl, DstVT),
8225 DAG.getConstant(SignMask, dl, DstVT));
8226 SDValue SInt;
8227 if (Node->isStrictFPOpcode()) {
8228 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8229 { Chain, Src, FltOfs });
8230 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8231 { Val.getValue(1), Val });
8232 Chain = SInt.getValue(1);
8233 } else {
8234 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8235 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8236 }
8237 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8238 } else {
8239 // Expand based on maximum range of FP_TO_SINT:
8240 // True = fp_to_sint(Src)
8241 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8242 // Result = select (Src < 0x8000000000000000), True, False
8243
8244 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8245 // TODO: Should any fast-math-flags be set for the FSUB?
8246 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8247 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8248 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8249 DAG.getConstant(SignMask, dl, DstVT));
8250 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8251 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8252 }
8253 return true;
8254}
8255
8257 SDValue &Chain,
8258 SelectionDAG &DAG) const {
8259 // This transform is not correct for converting 0 when rounding mode is set
8260 // to round toward negative infinity which will produce -0.0. So disable under
8261 // strictfp.
8262 if (Node->isStrictFPOpcode())
8263 return false;
8264
8265 SDValue Src = Node->getOperand(0);
8266 EVT SrcVT = Src.getValueType();
8267 EVT DstVT = Node->getValueType(0);
8268
8269 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8270 return false;
8271
8272 // Only expand vector types if we have the appropriate vector bit operations.
8273 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8278 return false;
8279
8280 SDLoc dl(SDValue(Node, 0));
8281 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8282
8283 // Implementation of unsigned i64 to f64 following the algorithm in
8284 // __floatundidf in compiler_rt. This implementation performs rounding
8285 // correctly in all rounding modes with the exception of converting 0
8286 // when rounding toward negative infinity. In that case the fsub will produce
8287 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8288 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8289 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8290 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8291 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8292 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8293 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8294
8295 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8296 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8297 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8298 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8299 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8300 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8301 SDValue HiSub =
8302 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8303 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8304 return true;
8305}
8306
8307SDValue
8309 SelectionDAG &DAG) const {
8310 unsigned Opcode = Node->getOpcode();
8311 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8312 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8313 "Wrong opcode");
8314
8315 if (Node->getFlags().hasNoNaNs()) {
8316 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8317 SDValue Op1 = Node->getOperand(0);
8318 SDValue Op2 = Node->getOperand(1);
8319 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8320 // Copy FMF flags, but always set the no-signed-zeros flag
8321 // as this is implied by the FMINNUM/FMAXNUM semantics.
8322 SDNodeFlags Flags = Node->getFlags();
8323 Flags.setNoSignedZeros(true);
8324 SelCC->setFlags(Flags);
8325 return SelCC;
8326 }
8327
8328 return SDValue();
8329}
8330
8332 SelectionDAG &DAG) const {
8333 SDLoc dl(Node);
8334 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8336 EVT VT = Node->getValueType(0);
8337
8338 if (VT.isScalableVector())
8340 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8341
8342 if (isOperationLegalOrCustom(NewOp, VT)) {
8343 SDValue Quiet0 = Node->getOperand(0);
8344 SDValue Quiet1 = Node->getOperand(1);
8345
8346 if (!Node->getFlags().hasNoNaNs()) {
8347 // Insert canonicalizes if it's possible we need to quiet to get correct
8348 // sNaN behavior.
8349 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8350 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8351 Node->getFlags());
8352 }
8353 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8354 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8355 Node->getFlags());
8356 }
8357 }
8358
8359 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8360 }
8361
8362 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8363 // instead if there are no NaNs and there can't be an incompatible zero
8364 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8365 if ((Node->getFlags().hasNoNaNs() ||
8366 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8367 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8368 (Node->getFlags().hasNoSignedZeros() ||
8369 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8370 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8371 unsigned IEEE2018Op =
8372 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8373 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8374 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8375 Node->getOperand(1), Node->getFlags());
8376 }
8377
8378 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8379 return SelCC;
8380
8381 return SDValue();
8382}
8383
8384/// Returns a true value if if this FPClassTest can be performed with an ordered
8385/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8386/// std::nullopt if it cannot be performed as a compare with 0.
8387static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8388 const fltSemantics &Semantics,
8389 const MachineFunction &MF) {
8390 FPClassTest OrderedMask = Test & ~fcNan;
8391 FPClassTest NanTest = Test & fcNan;
8392 bool IsOrdered = NanTest == fcNone;
8393 bool IsUnordered = NanTest == fcNan;
8394
8395 // Skip cases that are testing for only a qnan or snan.
8396 if (!IsOrdered && !IsUnordered)
8397 return std::nullopt;
8398
8399 if (OrderedMask == fcZero &&
8400 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8401 return IsOrdered;
8402 if (OrderedMask == (fcZero | fcSubnormal) &&
8403 MF.getDenormalMode(Semantics).inputsAreZero())
8404 return IsOrdered;
8405 return std::nullopt;
8406}
8407
8410 const SDLoc &DL,
8411 SelectionDAG &DAG) const {
8412 EVT OperandVT = Op.getValueType();
8413 assert(OperandVT.isFloatingPoint());
8414
8415 // Degenerated cases.
8416 if (Test == fcNone)
8417 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8418 if ((Test & fcAllFlags) == fcAllFlags)
8419 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8420
8421 // PPC double double is a pair of doubles, of which the higher part determines
8422 // the value class.
8423 if (OperandVT == MVT::ppcf128) {
8424 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8425 DAG.getConstant(1, DL, MVT::i32));
8426 OperandVT = MVT::f64;
8427 }
8428
8429 // Some checks may be represented as inversion of simpler check, for example
8430 // "inf|normal|subnormal|zero" => !"nan".
8431 bool IsInverted = false;
8432 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8433 IsInverted = true;
8434 Test = InvertedCheck;
8435 }
8436
8437 // Floating-point type properties.
8438 EVT ScalarFloatVT = OperandVT.getScalarType();
8439 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8440 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8441 bool IsF80 = (ScalarFloatVT == MVT::f80);
8442
8443 // Some checks can be implemented using float comparisons, if floating point
8444 // exceptions are ignored.
8445 if (Flags.hasNoFPExcept() &&
8447 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8448 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8449
8450 if (std::optional<bool> IsCmp0 =
8451 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8452 IsCmp0 && (isCondCodeLegalOrCustom(
8453 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8454 OperandVT.getScalarType().getSimpleVT()))) {
8455
8456 // If denormals could be implicitly treated as 0, this is not equivalent
8457 // to a compare with 0 since it will also be true for denormals.
8458 return DAG.getSetCC(DL, ResultVT, Op,
8459 DAG.getConstantFP(0.0, DL, OperandVT),
8460 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8461 }
8462
8463 if (Test == fcNan &&
8465 OperandVT.getScalarType().getSimpleVT())) {
8466 return DAG.getSetCC(DL, ResultVT, Op, Op,
8467 IsInverted ? ISD::SETO : ISD::SETUO);
8468 }
8469
8470 if (Test == fcInf &&
8472 OperandVT.getScalarType().getSimpleVT()) &&
8474 // isinf(x) --> fabs(x) == inf
8475 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8476 SDValue Inf =
8477 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8478 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8479 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8480 }
8481 }
8482
8483 // In the general case use integer operations.
8484 unsigned BitSize = OperandVT.getScalarSizeInBits();
8485 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8486 if (OperandVT.isVector())
8487 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8488 OperandVT.getVectorElementCount());
8489 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8490
8491 // Various masks.
8492 APInt SignBit = APInt::getSignMask(BitSize);
8493 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8494 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8495 const unsigned ExplicitIntBitInF80 = 63;
8496 APInt ExpMask = Inf;
8497 if (IsF80)
8498 ExpMask.clearBit(ExplicitIntBitInF80);
8499 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8500 APInt QNaNBitMask =
8501 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8502 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8503
8504 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8505 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8506 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8507 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8508 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8509 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8510
8511 SDValue Res;
8512 const auto appendResult = [&](SDValue PartialRes) {
8513 if (PartialRes) {
8514 if (Res)
8515 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8516 else
8517 Res = PartialRes;
8518 }
8519 };
8520
8521 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8522 const auto getIntBitIsSet = [&]() -> SDValue {
8523 if (!IntBitIsSetV) {
8524 APInt IntBitMask(BitSize, 0);
8525 IntBitMask.setBit(ExplicitIntBitInF80);
8526 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8527 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8528 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8529 }
8530 return IntBitIsSetV;
8531 };
8532
8533 // Split the value into sign bit and absolute value.
8534 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8535 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8536 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8537
8538 // Tests that involve more than one class should be processed first.
8539 SDValue PartialRes;
8540
8541 if (IsF80)
8542 ; // Detect finite numbers of f80 by checking individual classes because
8543 // they have different settings of the explicit integer bit.
8544 else if ((Test & fcFinite) == fcFinite) {
8545 // finite(V) ==> abs(V) < exp_mask
8546 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8547 Test &= ~fcFinite;
8548 } else if ((Test & fcFinite) == fcPosFinite) {
8549 // finite(V) && V > 0 ==> V < exp_mask
8550 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8551 Test &= ~fcPosFinite;
8552 } else if ((Test & fcFinite) == fcNegFinite) {
8553 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8554 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8555 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8556 Test &= ~fcNegFinite;
8557 }
8558 appendResult(PartialRes);
8559
8560 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8561 // fcZero | fcSubnormal => test all exponent bits are 0
8562 // TODO: Handle sign bit specific cases
8563 if (PartialCheck == (fcZero | fcSubnormal)) {
8564 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8565 SDValue ExpIsZero =
8566 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8567 appendResult(ExpIsZero);
8568 Test &= ~PartialCheck & fcAllFlags;
8569 }
8570 }
8571
8572 // Check for individual classes.
8573
8574 if (unsigned PartialCheck = Test & fcZero) {
8575 if (PartialCheck == fcPosZero)
8576 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8577 else if (PartialCheck == fcZero)
8578 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8579 else // ISD::fcNegZero
8580 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8581 appendResult(PartialRes);
8582 }
8583
8584 if (unsigned PartialCheck = Test & fcSubnormal) {
8585 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8586 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8587 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8588 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8589 SDValue VMinusOneV =
8590 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8591 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8592 if (PartialCheck == fcNegSubnormal)
8593 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8594 appendResult(PartialRes);
8595 }
8596
8597 if (unsigned PartialCheck = Test & fcInf) {
8598 if (PartialCheck == fcPosInf)
8599 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8600 else if (PartialCheck == fcInf)
8601 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8602 else { // ISD::fcNegInf
8603 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8604 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8605 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8606 }
8607 appendResult(PartialRes);
8608 }
8609
8610 if (unsigned PartialCheck = Test & fcNan) {
8611 APInt InfWithQnanBit = Inf | QNaNBitMask;
8612 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8613 if (PartialCheck == fcNan) {
8614 // isnan(V) ==> abs(V) > int(inf)
8615 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8616 if (IsF80) {
8617 // Recognize unsupported values as NaNs for compatibility with glibc.
8618 // In them (exp(V)==0) == int_bit.
8619 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8620 SDValue ExpIsZero =
8621 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8622 SDValue IsPseudo =
8623 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8624 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8625 }
8626 } else if (PartialCheck == fcQNan) {
8627 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8628 PartialRes =
8629 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8630 } else { // ISD::fcSNan
8631 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8632 // abs(V) < (unsigned(Inf) | quiet_bit)
8633 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8634 SDValue IsNotQnan =
8635 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8636 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8637 }
8638 appendResult(PartialRes);
8639 }
8640
8641 if (unsigned PartialCheck = Test & fcNormal) {
8642 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8643 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8644 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8645 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8646 APInt ExpLimit = ExpMask - ExpLSB;
8647 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8648 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8649 if (PartialCheck == fcNegNormal)
8650 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8651 else if (PartialCheck == fcPosNormal) {
8652 SDValue PosSignV =
8653 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8654 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8655 }
8656 if (IsF80)
8657 PartialRes =
8658 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8659 appendResult(PartialRes);
8660 }
8661
8662 if (!Res)
8663 return DAG.getConstant(IsInverted, DL, ResultVT);
8664 if (IsInverted)
8665 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8666 return Res;
8667}
8668
8669// Only expand vector types if we have the appropriate vector bit operations.
8670static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8671 assert(VT.isVector() && "Expected vector type");
8672 unsigned Len = VT.getScalarSizeInBits();
8673 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8676 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8678}
8679
8681 SDLoc dl(Node);
8682 EVT VT = Node->getValueType(0);
8683 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8684 SDValue Op = Node->getOperand(0);
8685 unsigned Len = VT.getScalarSizeInBits();
8686 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8687
8688 // TODO: Add support for irregular type lengths.
8689 if (!(Len <= 128 && Len % 8 == 0))
8690 return SDValue();
8691
8692 // Only expand vector types if we have the appropriate vector bit operations.
8693 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8694 return SDValue();
8695
8696 // This is the "best" algorithm from
8697 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8698 SDValue Mask55 =
8699 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8700 SDValue Mask33 =
8701 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8702 SDValue Mask0F =
8703 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8704
8705 // v = v - ((v >> 1) & 0x55555555...)
8706 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8707 DAG.getNode(ISD::AND, dl, VT,
8708 DAG.getNode(ISD::SRL, dl, VT, Op,
8709 DAG.getConstant(1, dl, ShVT)),
8710 Mask55));
8711 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8712 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8713 DAG.getNode(ISD::AND, dl, VT,
8714 DAG.getNode(ISD::SRL, dl, VT, Op,
8715 DAG.getConstant(2, dl, ShVT)),
8716 Mask33));
8717 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8718 Op = DAG.getNode(ISD::AND, dl, VT,
8719 DAG.getNode(ISD::ADD, dl, VT, Op,
8720 DAG.getNode(ISD::SRL, dl, VT, Op,
8721 DAG.getConstant(4, dl, ShVT))),
8722 Mask0F);
8723
8724 if (Len <= 8)
8725 return Op;
8726
8727 // Avoid the multiply if we only have 2 bytes to add.
8728 // TODO: Only doing this for scalars because vectors weren't as obviously
8729 // improved.
8730 if (Len == 16 && !VT.isVector()) {
8731 // v = (v + (v >> 8)) & 0x00FF;
8732 return DAG.getNode(ISD::AND, dl, VT,
8733 DAG.getNode(ISD::ADD, dl, VT, Op,
8734 DAG.getNode(ISD::SRL, dl, VT, Op,
8735 DAG.getConstant(8, dl, ShVT))),
8736 DAG.getConstant(0xFF, dl, VT));
8737 }
8738
8739 // v = (v * 0x01010101...) >> (Len - 8)
8740 SDValue V;
8743 SDValue Mask01 =
8744 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8745 V = DAG.getNode(ISD::MUL, dl, VT, Op, Mask01);
8746 } else {
8747 V = Op;
8748 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8749 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8750 V = DAG.getNode(ISD::ADD, dl, VT, V,
8751 DAG.getNode(ISD::SHL, dl, VT, V, ShiftC));
8752 }
8753 }
8754 return DAG.getNode(ISD::SRL, dl, VT, V, DAG.getConstant(Len - 8, dl, ShVT));
8755}
8756
8758 SDLoc dl(Node);
8759 EVT VT = Node->getValueType(0);
8760 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8761 SDValue Op = Node->getOperand(0);
8762 SDValue Mask = Node->getOperand(1);
8763 SDValue VL = Node->getOperand(2);
8764 unsigned Len = VT.getScalarSizeInBits();
8765 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8766
8767 // TODO: Add support for irregular type lengths.
8768 if (!(Len <= 128 && Len % 8 == 0))
8769 return SDValue();
8770
8771 // This is same algorithm of expandCTPOP from
8772 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8773 SDValue Mask55 =
8774 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8775 SDValue Mask33 =
8776 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8777 SDValue Mask0F =
8778 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8779
8780 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8781
8782 // v = v - ((v >> 1) & 0x55555555...)
8783 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8784 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8785 DAG.getConstant(1, dl, ShVT), Mask, VL),
8786 Mask55, Mask, VL);
8787 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8788
8789 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8790 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8791 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8792 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8793 DAG.getConstant(2, dl, ShVT), Mask, VL),
8794 Mask33, Mask, VL);
8795 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8796
8797 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8798 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8799 Mask, VL),
8800 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8801 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8802
8803 if (Len <= 8)
8804 return Op;
8805
8806 // v = (v * 0x01010101...) >> (Len - 8)
8807 SDValue V;
8809 ISD::VP_MUL, getTypeToTransformTo(*DAG.getContext(), VT))) {
8810 SDValue Mask01 =
8811 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8812 V = DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL);
8813 } else {
8814 V = Op;
8815 for (unsigned Shift = 8; Shift < Len; Shift *= 2) {
8816 SDValue ShiftC = DAG.getShiftAmountConstant(Shift, VT, dl);
8817 V = DAG.getNode(ISD::VP_ADD, dl, VT, V,
8818 DAG.getNode(ISD::VP_SHL, dl, VT, V, ShiftC, Mask, VL),
8819 Mask, VL);
8820 }
8821 }
8822 return DAG.getNode(ISD::VP_LSHR, dl, VT, V,
8823 DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8824}
8825
8827 SDLoc dl(Node);
8828 EVT VT = Node->getValueType(0);
8829 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8830 SDValue Op = Node->getOperand(0);
8831 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8832
8833 // If the non-ZERO_UNDEF version is supported we can use that instead.
8834 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8836 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8837
8838 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8840 EVT SetCCVT =
8841 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8842 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8843 SDValue Zero = DAG.getConstant(0, dl, VT);
8844 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8845 return DAG.getSelect(dl, VT, SrcIsZero,
8846 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8847 }
8848
8849 // Only expand vector types if we have the appropriate vector bit operations.
8850 // This includes the operations needed to expand CTPOP if it isn't supported.
8851 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8853 !canExpandVectorCTPOP(*this, VT)) ||
8856 return SDValue();
8857
8858 // for now, we do this:
8859 // x = x | (x >> 1);
8860 // x = x | (x >> 2);
8861 // ...
8862 // x = x | (x >>16);
8863 // x = x | (x >>32); // for 64-bit input
8864 // return popcount(~x);
8865 //
8866 // Ref: "Hacker's Delight" by Henry Warren
8867 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8868 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8869 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8870 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8871 }
8872 Op = DAG.getNOT(dl, Op, VT);
8873 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8874}
8875
8877 SDLoc dl(Node);
8878 EVT VT = Node->getValueType(0);
8879 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8880 SDValue Op = Node->getOperand(0);
8881 SDValue Mask = Node->getOperand(1);
8882 SDValue VL = Node->getOperand(2);
8883 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8884
8885 // do this:
8886 // x = x | (x >> 1);
8887 // x = x | (x >> 2);
8888 // ...
8889 // x = x | (x >>16);
8890 // x = x | (x >>32); // for 64-bit input
8891 // return popcount(~x);
8892 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8893 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8894 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8895 DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8896 VL);
8897 }
8898 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8899 VL);
8900 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8901}
8902
8904 const SDLoc &DL, EVT VT, SDValue Op,
8905 unsigned BitWidth) const {
8906 if (BitWidth != 32 && BitWidth != 64)
8907 return SDValue();
8908 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8909 : APInt(64, 0x0218A392CD3D5DBFULL);
8910 const DataLayout &TD = DAG.getDataLayout();
8911 MachinePointerInfo PtrInfo =
8913 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8914 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8915 SDValue Lookup = DAG.getNode(
8916 ISD::SRL, DL, VT,
8917 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8918 DAG.getConstant(DeBruijn, DL, VT)),
8919 DAG.getConstant(ShiftAmt, DL, VT));
8921
8923 for (unsigned i = 0; i < BitWidth; i++) {
8924 APInt Shl = DeBruijn.shl(i);
8925 APInt Lshr = Shl.lshr(ShiftAmt);
8926 Table[Lshr.getZExtValue()] = i;
8927 }
8928
8929 // Create a ConstantArray in Constant Pool
8930 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8931 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8932 TD.getPrefTypeAlign(CA->getType()));
8933 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8934 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8935 PtrInfo, MVT::i8);
8936 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8937 return ExtLoad;
8938
8939 EVT SetCCVT =
8940 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8941 SDValue Zero = DAG.getConstant(0, DL, VT);
8942 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8943 return DAG.getSelect(DL, VT, SrcIsZero,
8944 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8945}
8946
8948 SDLoc dl(Node);
8949 EVT VT = Node->getValueType(0);
8950 SDValue Op = Node->getOperand(0);
8951 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8952
8953 // If the non-ZERO_UNDEF version is supported we can use that instead.
8954 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8956 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
8957
8958 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8960 EVT SetCCVT =
8961 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8962 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
8963 SDValue Zero = DAG.getConstant(0, dl, VT);
8964 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8965 return DAG.getSelect(dl, VT, SrcIsZero,
8966 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
8967 }
8968
8969 // Only expand vector types if we have the appropriate vector bit operations.
8970 // This includes the operations needed to expand CTPOP if it isn't supported.
8971 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8974 !canExpandVectorCTPOP(*this, VT)) ||
8978 return SDValue();
8979
8980 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8981 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8983 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8984 return V;
8985
8986 // for now, we use: { return popcount(~x & (x - 1)); }
8987 // unless the target has ctlz but not ctpop, in which case we use:
8988 // { return 32 - nlz(~x & (x-1)); }
8989 // Ref: "Hacker's Delight" by Henry Warren
8990 SDValue Tmp = DAG.getNode(
8991 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
8992 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
8993
8994 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8996 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
8997 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
8998 }
8999
9000 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
9001}
9002
9004 SDValue Op = Node->getOperand(0);
9005 SDValue Mask = Node->getOperand(1);
9006 SDValue VL = Node->getOperand(2);
9007 SDLoc dl(Node);
9008 EVT VT = Node->getValueType(0);
9009
9010 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
9011 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
9012 DAG.getConstant(-1, dl, VT), Mask, VL);
9013 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
9014 DAG.getConstant(1, dl, VT), Mask, VL);
9015 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
9016 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
9017}
9018
9020 bool IsNegative) const {
9021 SDLoc dl(N);
9022 EVT VT = N->getValueType(0);
9023 SDValue Op = N->getOperand(0);
9024
9025 // abs(x) -> smax(x,sub(0,x))
9026 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9028 SDValue Zero = DAG.getConstant(0, dl, VT);
9029 return DAG.getNode(ISD::SMAX, dl, VT, Op,
9030 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9031 }
9032
9033 // abs(x) -> umin(x,sub(0,x))
9034 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
9036 SDValue Zero = DAG.getConstant(0, dl, VT);
9037 Op = DAG.getFreeze(Op);
9038 return DAG.getNode(ISD::UMIN, dl, VT, Op,
9039 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9040 }
9041
9042 // 0 - abs(x) -> smin(x, sub(0,x))
9043 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
9045 Op = DAG.getFreeze(Op);
9046 SDValue Zero = DAG.getConstant(0, dl, VT);
9047 return DAG.getNode(ISD::SMIN, dl, VT, Op,
9048 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
9049 }
9050
9051 // Only expand vector types if we have the appropriate vector operations.
9052 if (VT.isVector() &&
9054 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
9055 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
9057 return SDValue();
9058
9059 Op = DAG.getFreeze(Op);
9060 SDValue Shift = DAG.getNode(
9061 ISD::SRA, dl, VT, Op,
9062 DAG.getShiftAmountConstant(VT.getScalarSizeInBits() - 1, VT, dl));
9063 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
9064
9065 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
9066 if (!IsNegative)
9067 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
9068
9069 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
9070 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
9071}
9072
9074 SDLoc dl(N);
9075 EVT VT = N->getValueType(0);
9076 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9077 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9078 bool IsSigned = N->getOpcode() == ISD::ABDS;
9079
9080 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9081 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9082 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9083 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9084 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9085 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9086 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9087 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9088 }
9089
9090 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9091 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9092 return DAG.getNode(ISD::OR, dl, VT,
9093 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9094 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9095
9096 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9097 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9098 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9100 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9101 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9102 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9103}
9104
9106 SDLoc dl(N);
9107 EVT VT = N->getValueType(0);
9108 SDValue Op = N->getOperand(0);
9109
9110 if (!VT.isSimple())
9111 return SDValue();
9112
9113 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9114 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9115 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9116 default:
9117 return SDValue();
9118 case MVT::i16:
9119 // Use a rotate by 8. This can be further expanded if necessary.
9120 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9121 case MVT::i32:
9122 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9123 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9124 DAG.getConstant(0xFF00, dl, VT));
9125 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9126 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9127 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9128 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9129 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9130 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9131 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9132 case MVT::i64:
9133 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9134 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9135 DAG.getConstant(255ULL<<8, dl, VT));
9136 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9137 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9138 DAG.getConstant(255ULL<<16, dl, VT));
9139 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9140 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9141 DAG.getConstant(255ULL<<24, dl, VT));
9142 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9143 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9144 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9145 DAG.getConstant(255ULL<<24, dl, VT));
9146 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9147 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9148 DAG.getConstant(255ULL<<16, dl, VT));
9149 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9150 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9151 DAG.getConstant(255ULL<<8, dl, VT));
9152 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9153 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9154 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9155 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9156 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9157 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9158 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9159 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9160 }
9161}
9162
9164 SDLoc dl(N);
9165 EVT VT = N->getValueType(0);
9166 SDValue Op = N->getOperand(0);
9167 SDValue Mask = N->getOperand(1);
9168 SDValue EVL = N->getOperand(2);
9169
9170 if (!VT.isSimple())
9171 return SDValue();
9172
9173 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9174 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9175 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9176 default:
9177 return SDValue();
9178 case MVT::i16:
9179 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9180 Mask, EVL);
9181 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9182 Mask, EVL);
9183 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9184 case MVT::i32:
9185 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9186 Mask, EVL);
9187 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9188 Mask, EVL);
9189 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9190 Mask, EVL);
9191 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9192 Mask, EVL);
9193 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9194 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9195 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9196 Mask, EVL);
9197 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9198 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9199 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9200 case MVT::i64:
9201 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9202 Mask, EVL);
9203 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9204 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9205 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9206 Mask, EVL);
9207 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9208 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9209 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9210 Mask, EVL);
9211 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9212 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9213 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9214 Mask, EVL);
9215 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9216 Mask, EVL);
9217 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9218 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9219 Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9220 Mask, EVL);
9221 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9222 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9223 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9224 Mask, EVL);
9225 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9226 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9227 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9228 Mask, EVL);
9229 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9230 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9231 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9232 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9233 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9234 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9235 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9236 }
9237}
9238
9240 SDLoc dl(N);
9241 EVT VT = N->getValueType(0);
9242 SDValue Op = N->getOperand(0);
9243 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9244 unsigned Sz = VT.getScalarSizeInBits();
9245
9246 SDValue Tmp, Tmp2, Tmp3;
9247
9248 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9249 // and finally the i1 pairs.
9250 // TODO: We can easily support i4/i2 legal types if any target ever does.
9251 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9252 // Create the masks - repeating the pattern every byte.
9253 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9254 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9255 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9256
9257 // BSWAP if the type is wider than a single byte.
9258 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9259
9260 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9261 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9262 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9263 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9264 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9265 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9266
9267 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9268 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9269 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9270 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9271 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9272 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9273
9274 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9275 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9276 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9277 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9278 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9279 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9280 return Tmp;
9281 }
9282
9283 Tmp = DAG.getConstant(0, dl, VT);
9284 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9285 if (I < J)
9286 Tmp2 =
9287 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9288 else
9289 Tmp2 =
9290 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9291
9292 APInt Shift = APInt::getOneBitSet(Sz, J);
9293 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9294 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9295 }
9296
9297 return Tmp;
9298}
9299
9301 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9302
9303 SDLoc dl(N);
9304 EVT VT = N->getValueType(0);
9305 SDValue Op = N->getOperand(0);
9306 SDValue Mask = N->getOperand(1);
9307 SDValue EVL = N->getOperand(2);
9308 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9309 unsigned Sz = VT.getScalarSizeInBits();
9310
9311 SDValue Tmp, Tmp2, Tmp3;
9312
9313 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9314 // and finally the i1 pairs.
9315 // TODO: We can easily support i4/i2 legal types if any target ever does.
9316 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9317 // Create the masks - repeating the pattern every byte.
9318 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9319 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9320 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9321
9322 // BSWAP if the type is wider than a single byte.
9323 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9324
9325 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9326 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9327 Mask, EVL);
9328 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9329 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9330 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9331 Mask, EVL);
9332 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9333 Mask, EVL);
9334 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9335
9336 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9337 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9338 Mask, EVL);
9339 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9340 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9341 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9342 Mask, EVL);
9343 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9344 Mask, EVL);
9345 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9346
9347 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9348 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9349 Mask, EVL);
9350 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9351 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9352 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9353 Mask, EVL);
9354 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9355 Mask, EVL);
9356 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9357 return Tmp;
9358 }
9359 return SDValue();
9360}
9361
9362std::pair<SDValue, SDValue>
9364 SelectionDAG &DAG) const {
9365 SDLoc SL(LD);
9366 SDValue Chain = LD->getChain();
9367 SDValue BasePTR = LD->getBasePtr();
9368 EVT SrcVT = LD->getMemoryVT();
9369 EVT DstVT = LD->getValueType(0);
9370 ISD::LoadExtType ExtType = LD->getExtensionType();
9371
9372 if (SrcVT.isScalableVector())
9373 report_fatal_error("Cannot scalarize scalable vector loads");
9374
9375 unsigned NumElem = SrcVT.getVectorNumElements();
9376
9377 EVT SrcEltVT = SrcVT.getScalarType();
9378 EVT DstEltVT = DstVT.getScalarType();
9379
9380 // A vector must always be stored in memory as-is, i.e. without any padding
9381 // between the elements, since various code depend on it, e.g. in the
9382 // handling of a bitcast of a vector type to int, which may be done with a
9383 // vector store followed by an integer load. A vector that does not have
9384 // elements that are byte-sized must therefore be stored as an integer
9385 // built out of the extracted vector elements.
9386 if (!SrcEltVT.isByteSized()) {
9387 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9388 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9389
9390 unsigned NumSrcBits = SrcVT.getSizeInBits();
9391 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9392
9393 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9394 SDValue SrcEltBitMask = DAG.getConstant(
9395 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9396
9397 // Load the whole vector and avoid masking off the top bits as it makes
9398 // the codegen worse.
9399 SDValue Load =
9400 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9401 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9402 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9403
9405 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9406 unsigned ShiftIntoIdx =
9407 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9408 SDValue ShiftAmount =
9409 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9410 LoadVT, SL, /*LegalTypes=*/false);
9411 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9412 SDValue Elt =
9413 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9414 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9415
9416 if (ExtType != ISD::NON_EXTLOAD) {
9417 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9418 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9419 }
9420
9421 Vals.push_back(Scalar);
9422 }
9423
9424 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9425 return std::make_pair(Value, Load.getValue(1));
9426 }
9427
9428 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9429 assert(SrcEltVT.isByteSized());
9430
9432 SmallVector<SDValue, 8> LoadChains;
9433
9434 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9435 SDValue ScalarLoad =
9436 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9437 LD->getPointerInfo().getWithOffset(Idx * Stride),
9438 SrcEltVT, LD->getOriginalAlign(),
9439 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9440
9441 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9442
9443 Vals.push_back(ScalarLoad.getValue(0));
9444 LoadChains.push_back(ScalarLoad.getValue(1));
9445 }
9446
9447 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9448 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9449
9450 return std::make_pair(Value, NewChain);
9451}
9452
9454 SelectionDAG &DAG) const {
9455 SDLoc SL(ST);
9456
9457 SDValue Chain = ST->getChain();
9458 SDValue BasePtr = ST->getBasePtr();
9459 SDValue Value = ST->getValue();
9460 EVT StVT = ST->getMemoryVT();
9461
9462 if (StVT.isScalableVector())
9463 report_fatal_error("Cannot scalarize scalable vector stores");
9464
9465 // The type of the data we want to save
9466 EVT RegVT = Value.getValueType();
9467 EVT RegSclVT = RegVT.getScalarType();
9468
9469 // The type of data as saved in memory.
9470 EVT MemSclVT = StVT.getScalarType();
9471
9472 unsigned NumElem = StVT.getVectorNumElements();
9473
9474 // A vector must always be stored in memory as-is, i.e. without any padding
9475 // between the elements, since various code depend on it, e.g. in the
9476 // handling of a bitcast of a vector type to int, which may be done with a
9477 // vector store followed by an integer load. A vector that does not have
9478 // elements that are byte-sized must therefore be stored as an integer
9479 // built out of the extracted vector elements.
9480 if (!MemSclVT.isByteSized()) {
9481 unsigned NumBits = StVT.getSizeInBits();
9482 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9483
9484 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9485
9486 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9487 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9488 DAG.getVectorIdxConstant(Idx, SL));
9489 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9490 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9491 unsigned ShiftIntoIdx =
9492 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9493 SDValue ShiftAmount =
9494 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9495 SDValue ShiftedElt =
9496 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9497 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9498 }
9499
9500 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9501 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9502 ST->getAAInfo());
9503 }
9504
9505 // Store Stride in bytes
9506 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9507 assert(Stride && "Zero stride!");
9508 // Extract each of the elements from the original vector and save them into
9509 // memory individually.
9511 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9512 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9513 DAG.getVectorIdxConstant(Idx, SL));
9514
9515 SDValue Ptr =
9516 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9517
9518 // This scalar TruncStore may be illegal, but we legalize it later.
9519 SDValue Store = DAG.getTruncStore(
9520 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9521 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9522 ST->getAAInfo());
9523
9524 Stores.push_back(Store);
9525 }
9526
9527 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9528}
9529
9530std::pair<SDValue, SDValue>
9532 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9533 "unaligned indexed loads not implemented!");
9534 SDValue Chain = LD->getChain();
9535 SDValue Ptr = LD->getBasePtr();
9536 EVT VT = LD->getValueType(0);
9537 EVT LoadedVT = LD->getMemoryVT();
9538 SDLoc dl(LD);
9539 auto &MF = DAG.getMachineFunction();
9540
9541 if (VT.isFloatingPoint() || VT.isVector()) {
9542 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9543 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9544 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9545 LoadedVT.isVector()) {
9546 // Scalarize the load and let the individual components be handled.
9547 return scalarizeVectorLoad(LD, DAG);
9548 }
9549
9550 // Expand to a (misaligned) integer load of the same size,
9551 // then bitconvert to floating point or vector.
9552 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9553 LD->getMemOperand());
9554 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9555 if (LoadedVT != VT)
9556 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9557 ISD::ANY_EXTEND, dl, VT, Result);
9558
9559 return std::make_pair(Result, newLoad.getValue(1));
9560 }
9561
9562 // Copy the value to a (aligned) stack slot using (unaligned) integer
9563 // loads and stores, then do a (aligned) load from the stack slot.
9564 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9565 unsigned LoadedBytes = LoadedVT.getStoreSize();
9566 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9567 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9568
9569 // Make sure the stack slot is also aligned for the register type.
9570 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9571 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9573 SDValue StackPtr = StackBase;
9574 unsigned Offset = 0;
9575
9576 EVT PtrVT = Ptr.getValueType();
9577 EVT StackPtrVT = StackPtr.getValueType();
9578
9579 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9580 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9581
9582 // Do all but one copies using the full register width.
9583 for (unsigned i = 1; i < NumRegs; i++) {
9584 // Load one integer register's worth from the original location.
9585 SDValue Load = DAG.getLoad(
9586 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9587 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9588 LD->getAAInfo());
9589 // Follow the load with a store to the stack slot. Remember the store.
9590 Stores.push_back(DAG.getStore(
9591 Load.getValue(1), dl, Load, StackPtr,
9592 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9593 // Increment the pointers.
9594 Offset += RegBytes;
9595
9596 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9597 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9598 }
9599
9600 // The last copy may be partial. Do an extending load.
9601 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9602 8 * (LoadedBytes - Offset));
9603 SDValue Load =
9604 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9605 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9606 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9607 LD->getAAInfo());
9608 // Follow the load with a store to the stack slot. Remember the store.
9609 // On big-endian machines this requires a truncating store to ensure
9610 // that the bits end up in the right place.
9611 Stores.push_back(DAG.getTruncStore(
9612 Load.getValue(1), dl, Load, StackPtr,
9613 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9614
9615 // The order of the stores doesn't matter - say it with a TokenFactor.
9616 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9617
9618 // Finally, perform the original load only redirected to the stack slot.
9619 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9620 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9621 LoadedVT);
9622
9623 // Callers expect a MERGE_VALUES node.
9624 return std::make_pair(Load, TF);
9625 }
9626
9627 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9628 "Unaligned load of unsupported type.");
9629
9630 // Compute the new VT that is half the size of the old one. This is an
9631 // integer MVT.
9632 unsigned NumBits = LoadedVT.getSizeInBits();
9633 EVT NewLoadedVT;
9634 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9635 NumBits >>= 1;
9636
9637 Align Alignment = LD->getOriginalAlign();
9638 unsigned IncrementSize = NumBits / 8;
9639 ISD::LoadExtType HiExtType = LD->getExtensionType();
9640
9641 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9642 if (HiExtType == ISD::NON_EXTLOAD)
9643 HiExtType = ISD::ZEXTLOAD;
9644
9645 // Load the value in two parts
9646 SDValue Lo, Hi;
9647 if (DAG.getDataLayout().isLittleEndian()) {
9648 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9649 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9650 LD->getAAInfo());
9651
9652 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9653 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9654 LD->getPointerInfo().getWithOffset(IncrementSize),
9655 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9656 LD->getAAInfo());
9657 } else {
9658 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9659 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9660 LD->getAAInfo());
9661
9662 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9663 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9664 LD->getPointerInfo().getWithOffset(IncrementSize),
9665 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9666 LD->getAAInfo());
9667 }
9668
9669 // aggregate the two parts
9670 SDValue ShiftAmount = DAG.getShiftAmountConstant(NumBits, VT, dl);
9671 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9672 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9673
9674 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9675 Hi.getValue(1));
9676
9677 return std::make_pair(Result, TF);
9678}
9679
9681 SelectionDAG &DAG) const {
9682 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9683 "unaligned indexed stores not implemented!");
9684 SDValue Chain = ST->getChain();
9685 SDValue Ptr = ST->getBasePtr();
9686 SDValue Val = ST->getValue();
9687 EVT VT = Val.getValueType();
9688 Align Alignment = ST->getOriginalAlign();
9689 auto &MF = DAG.getMachineFunction();
9690 EVT StoreMemVT = ST->getMemoryVT();
9691
9692 SDLoc dl(ST);
9693 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9694 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9695 if (isTypeLegal(intVT)) {
9696 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9697 StoreMemVT.isVector()) {
9698 // Scalarize the store and let the individual components be handled.
9699 SDValue Result = scalarizeVectorStore(ST, DAG);
9700 return Result;
9701 }
9702 // Expand to a bitconvert of the value to the integer type of the
9703 // same size, then a (misaligned) int store.
9704 // FIXME: Does not handle truncating floating point stores!
9705 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9706 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9707 Alignment, ST->getMemOperand()->getFlags());
9708 return Result;
9709 }
9710 // Do a (aligned) store to a stack slot, then copy from the stack slot
9711 // to the final destination using (unaligned) integer loads and stores.
9712 MVT RegVT = getRegisterType(
9713 *DAG.getContext(),
9714 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9715 EVT PtrVT = Ptr.getValueType();
9716 unsigned StoredBytes = StoreMemVT.getStoreSize();
9717 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9718 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9719
9720 // Make sure the stack slot is also aligned for the register type.
9721 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9722 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9723
9724 // Perform the original store, only redirected to the stack slot.
9725 SDValue Store = DAG.getTruncStore(
9726 Chain, dl, Val, StackPtr,
9727 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9728
9729 EVT StackPtrVT = StackPtr.getValueType();
9730
9731 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9732 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9734 unsigned Offset = 0;
9735
9736 // Do all but one copies using the full register width.
9737 for (unsigned i = 1; i < NumRegs; i++) {
9738 // Load one integer register's worth from the stack slot.
9739 SDValue Load = DAG.getLoad(
9740 RegVT, dl, Store, StackPtr,
9741 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9742 // Store it to the final location. Remember the store.
9743 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9744 ST->getPointerInfo().getWithOffset(Offset),
9745 ST->getOriginalAlign(),
9746 ST->getMemOperand()->getFlags()));
9747 // Increment the pointers.
9748 Offset += RegBytes;
9749 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9750 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9751 }
9752
9753 // The last store may be partial. Do a truncating store. On big-endian
9754 // machines this requires an extending load from the stack slot to ensure
9755 // that the bits are in the right place.
9756 EVT LoadMemVT =
9757 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9758
9759 // Load from the stack slot.
9760 SDValue Load = DAG.getExtLoad(
9761 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9762 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9763
9764 Stores.push_back(
9765 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9766 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9767 ST->getOriginalAlign(),
9768 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9769 // The order of the stores doesn't matter - say it with a TokenFactor.
9770 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9771 return Result;
9772 }
9773
9774 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9775 "Unaligned store of unknown type.");
9776 // Get the half-size VT
9777 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9778 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9779 unsigned IncrementSize = NumBits / 8;
9780
9781 // Divide the stored value in two parts.
9782 SDValue ShiftAmount =
9783 DAG.getShiftAmountConstant(NumBits, Val.getValueType(), dl);
9784 SDValue Lo = Val;
9785 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9786 // fold and not use the upper bits. A smaller constant may be easier to
9787 // materialize.
9788 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9789 Lo = DAG.getNode(
9790 ISD::AND, dl, VT, Lo,
9791 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9792 VT));
9793 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9794
9795 // Store the two parts
9796 SDValue Store1, Store2;
9797 Store1 = DAG.getTruncStore(Chain, dl,
9798 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9799 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9800 ST->getMemOperand()->getFlags());
9801
9802 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9803 Store2 = DAG.getTruncStore(
9804 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9805 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9806 ST->getMemOperand()->getFlags(), ST->getAAInfo());
9807
9808 SDValue Result =
9809 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9810 return Result;
9811}
9812
9813SDValue
9815 const SDLoc &DL, EVT DataVT,
9816 SelectionDAG &DAG,
9817 bool IsCompressedMemory) const {
9818 SDValue Increment;
9819 EVT AddrVT = Addr.getValueType();
9820 EVT MaskVT = Mask.getValueType();
9821 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9822 "Incompatible types of Data and Mask");
9823 if (IsCompressedMemory) {
9824 if (DataVT.isScalableVector())
9826 "Cannot currently handle compressed memory with scalable vectors");
9827 // Incrementing the pointer according to number of '1's in the mask.
9828 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9829 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9830 if (MaskIntVT.getSizeInBits() < 32) {
9831 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9832 MaskIntVT = MVT::i32;
9833 }
9834
9835 // Count '1's with POPCNT.
9836 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9837 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9838 // Scale is an element size in bytes.
9839 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9840 AddrVT);
9841 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9842 } else if (DataVT.isScalableVector()) {
9843 Increment = DAG.getVScale(DL, AddrVT,
9844 APInt(AddrVT.getFixedSizeInBits(),
9845 DataVT.getStoreSize().getKnownMinValue()));
9846 } else
9847 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9848
9849 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9850}
9851
9853 EVT VecVT, const SDLoc &dl,
9854 ElementCount SubEC) {
9855 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9856 "Cannot index a scalable vector within a fixed-width vector");
9857
9858 unsigned NElts = VecVT.getVectorMinNumElements();
9859 unsigned NumSubElts = SubEC.getKnownMinValue();
9860 EVT IdxVT = Idx.getValueType();
9861
9862 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9863 // If this is a constant index and we know the value plus the number of the
9864 // elements in the subvector minus one is less than the minimum number of
9865 // elements then it's safe to return Idx.
9866 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9867 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9868 return Idx;
9869 SDValue VS =
9870 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9871 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9872 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9873 DAG.getConstant(NumSubElts, dl, IdxVT));
9874 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9875 }
9876 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9877 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9878 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9879 DAG.getConstant(Imm, dl, IdxVT));
9880 }
9881 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9882 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9883 DAG.getConstant(MaxIndex, dl, IdxVT));
9884}
9885
9887 SDValue VecPtr, EVT VecVT,
9888 SDValue Index) const {
9889 return getVectorSubVecPointer(
9890 DAG, VecPtr, VecVT,
9892 Index);
9893}
9894
9896 SDValue VecPtr, EVT VecVT,
9897 EVT SubVecVT,
9898 SDValue Index) const {
9899 SDLoc dl(Index);
9900 // Make sure the index type is big enough to compute in.
9901 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
9902
9903 EVT EltVT = VecVT.getVectorElementType();
9904
9905 // Calculate the element offset and add it to the pointer.
9906 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9907 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9908 "Converting bits to bytes lost precision");
9909 assert(SubVecVT.getVectorElementType() == EltVT &&
9910 "Sub-vector must be a vector with matching element type");
9911 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
9912 SubVecVT.getVectorElementCount());
9913
9914 EVT IdxVT = Index.getValueType();
9915 if (SubVecVT.isScalableVector())
9916 Index =
9917 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9918 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
9919
9920 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9921 DAG.getConstant(EltSize, dl, IdxVT));
9922 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
9923}
9924
9925//===----------------------------------------------------------------------===//
9926// Implementation of Emulated TLS Model
9927//===----------------------------------------------------------------------===//
9928
9930 SelectionDAG &DAG) const {
9931 // Access to address of TLS varialbe xyz is lowered to a function call:
9932 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9933 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9934 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
9935 SDLoc dl(GA);
9936
9937 ArgListTy Args;
9938 ArgListEntry Entry;
9939 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9940 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9941 StringRef EmuTlsVarName(NameString);
9942 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
9943 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9944 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
9945 Entry.Ty = VoidPtrType;
9946 Args.push_back(Entry);
9947
9948 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
9949
9951 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9952 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
9953 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9954
9955 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9956 // At last for X86 targets, maybe good for other targets too?
9958 MFI.setAdjustsStack(true); // Is this only for X86 target?
9959 MFI.setHasCalls(true);
9960
9961 assert((GA->getOffset() == 0) &&
9962 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9963 return CallResult.first;
9964}
9965
9967 SelectionDAG &DAG) const {
9968 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9969 if (!isCtlzFast())
9970 return SDValue();
9971 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9972 SDLoc dl(Op);
9973 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
9974 EVT VT = Op.getOperand(0).getValueType();
9975 SDValue Zext = Op.getOperand(0);
9976 if (VT.bitsLT(MVT::i32)) {
9977 VT = MVT::i32;
9978 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
9979 }
9980 unsigned Log2b = Log2_32(VT.getSizeInBits());
9981 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
9982 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9983 DAG.getConstant(Log2b, dl, MVT::i32));
9984 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9985 }
9986 return SDValue();
9987}
9988
9990 SDValue Op0 = Node->getOperand(0);
9991 SDValue Op1 = Node->getOperand(1);
9992 EVT VT = Op0.getValueType();
9993 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9994 unsigned Opcode = Node->getOpcode();
9995 SDLoc DL(Node);
9996
9997 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9998 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
10000 Op0 = DAG.getFreeze(Op0);
10001 SDValue Zero = DAG.getConstant(0, DL, VT);
10002 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10003 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
10004 }
10005
10006 // umin(x,y) -> sub(x,usubsat(x,y))
10007 // TODO: Missing freeze(Op0)?
10008 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
10010 return DAG.getNode(ISD::SUB, DL, VT, Op0,
10011 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
10012 }
10013
10014 // umax(x,y) -> add(x,usubsat(y,x))
10015 // TODO: Missing freeze(Op0)?
10016 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
10018 return DAG.getNode(ISD::ADD, DL, VT, Op0,
10019 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
10020 }
10021
10022 // FIXME: Should really try to split the vector in case it's legal on a
10023 // subvector.
10025 return DAG.UnrollVectorOp(Node);
10026
10027 // Attempt to find an existing SETCC node that we can reuse.
10028 // TODO: Do we need a generic doesSETCCNodeExist?
10029 // TODO: Missing freeze(Op0)/freeze(Op1)?
10030 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
10031 ISD::CondCode PrefCommuteCC,
10032 ISD::CondCode AltCommuteCC) {
10033 SDVTList BoolVTList = DAG.getVTList(BoolVT);
10034 for (ISD::CondCode CC : {PrefCC, AltCC}) {
10035 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10036 {Op0, Op1, DAG.getCondCode(CC)})) {
10037 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10038 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10039 }
10040 }
10041 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
10042 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
10043 {Op0, Op1, DAG.getCondCode(CC)})) {
10044 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
10045 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
10046 }
10047 }
10048 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
10049 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
10050 };
10051
10052 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
10053 // -> Y = (A < B) ? B : A
10054 // -> Y = (A >= B) ? A : B
10055 // -> Y = (A <= B) ? B : A
10056 switch (Opcode) {
10057 case ISD::SMAX:
10058 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
10059 case ISD::SMIN:
10060 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
10061 case ISD::UMAX:
10062 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
10063 case ISD::UMIN:
10064 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
10065 }
10066
10067 llvm_unreachable("How did we get here?");
10068}
10069
10071 unsigned Opcode = Node->getOpcode();
10072 SDValue LHS = Node->getOperand(0);
10073 SDValue RHS = Node->getOperand(1);
10074 EVT VT = LHS.getValueType();
10075 SDLoc dl(Node);
10076
10077 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10078 assert(VT.isInteger() && "Expected operands to be integers");
10079
10080 // usub.sat(a, b) -> umax(a, b) - b
10081 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10082 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10083 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10084 }
10085
10086 // uadd.sat(a, b) -> umin(a, ~b) + b
10087 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10088 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10089 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10090 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10091 }
10092
10093 unsigned OverflowOp;
10094 switch (Opcode) {
10095 case ISD::SADDSAT:
10096 OverflowOp = ISD::SADDO;
10097 break;
10098 case ISD::UADDSAT:
10099 OverflowOp = ISD::UADDO;
10100 break;
10101 case ISD::SSUBSAT:
10102 OverflowOp = ISD::SSUBO;
10103 break;
10104 case ISD::USUBSAT:
10105 OverflowOp = ISD::USUBO;
10106 break;
10107 default:
10108 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10109 "addition or subtraction node.");
10110 }
10111
10112 // FIXME: Should really try to split the vector in case it's legal on a
10113 // subvector.
10115 return DAG.UnrollVectorOp(Node);
10116
10117 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10118 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10119 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10120 SDValue SumDiff = Result.getValue(0);
10121 SDValue Overflow = Result.getValue(1);
10122 SDValue Zero = DAG.getConstant(0, dl, VT);
10123 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10124
10125 if (Opcode == ISD::UADDSAT) {
10127 // (LHS + RHS) | OverflowMask
10128 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10129 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10130 }
10131 // Overflow ? 0xffff.... : (LHS + RHS)
10132 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10133 }
10134
10135 if (Opcode == ISD::USUBSAT) {
10137 // (LHS - RHS) & ~OverflowMask
10138 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10139 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10140 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10141 }
10142 // Overflow ? 0 : (LHS - RHS)
10143 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10144 }
10145
10146 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10149
10150 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10151 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10152
10153 // If either of the operand signs are known, then they are guaranteed to
10154 // only saturate in one direction. If non-negative they will saturate
10155 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10156 //
10157 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10158 // sign of 'y' has to be flipped.
10159
10160 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10161 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10162 : KnownRHS.isNegative();
10163 if (LHSIsNonNegative || RHSIsNonNegative) {
10164 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10165 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10166 }
10167
10168 bool LHSIsNegative = KnownLHS.isNegative();
10169 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10170 : KnownRHS.isNonNegative();
10171 if (LHSIsNegative || RHSIsNegative) {
10172 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10173 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10174 }
10175 }
10176
10177 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10179 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10180 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10181 DAG.getConstant(BitWidth - 1, dl, VT));
10182 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10183 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10184}
10185
10187 unsigned Opcode = Node->getOpcode();
10188 bool IsSigned = Opcode == ISD::SSHLSAT;
10189 SDValue LHS = Node->getOperand(0);
10190 SDValue RHS = Node->getOperand(1);
10191 EVT VT = LHS.getValueType();
10192 SDLoc dl(Node);
10193
10194 assert((Node->getOpcode() == ISD::SSHLSAT ||
10195 Node->getOpcode() == ISD::USHLSAT) &&
10196 "Expected a SHLSAT opcode");
10197 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10198 assert(VT.isInteger() && "Expected operands to be integers");
10199
10201 return DAG.UnrollVectorOp(Node);
10202
10203 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10204
10205 unsigned BW = VT.getScalarSizeInBits();
10206 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10207 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10208 SDValue Orig =
10209 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10210
10211 SDValue SatVal;
10212 if (IsSigned) {
10213 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10214 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10215 SDValue Cond =
10216 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10217 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10218 } else {
10219 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10220 }
10221 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10222 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10223}
10224
10226 bool Signed, EVT WideVT,
10227 const SDValue LL, const SDValue LH,
10228 const SDValue RL, const SDValue RH,
10229 SDValue &Lo, SDValue &Hi) const {
10230 // We can fall back to a libcall with an illegal type for the MUL if we
10231 // have a libcall big enough.
10232 // Also, we can fall back to a division in some cases, but that's a big
10233 // performance hit in the general case.
10234 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10235 if (WideVT == MVT::i16)
10236 LC = RTLIB::MUL_I16;
10237 else if (WideVT == MVT::i32)
10238 LC = RTLIB::MUL_I32;
10239 else if (WideVT == MVT::i64)
10240 LC = RTLIB::MUL_I64;
10241 else if (WideVT == MVT::i128)
10242 LC = RTLIB::MUL_I128;
10243
10244 if (LC == RTLIB::UNKNOWN_LIBCALL || !getLibcallName(LC)) {
10245 // We'll expand the multiplication by brute force because we have no other
10246 // options. This is a trivially-generalized version of the code from
10247 // Hacker's Delight (itself derived from Knuth's Algorithm M from section
10248 // 4.3.1).
10249 EVT VT = LL.getValueType();
10250 unsigned Bits = VT.getSizeInBits();
10251 unsigned HalfBits = Bits >> 1;
10252 SDValue Mask =
10253 DAG.getConstant(APInt::getLowBitsSet(Bits, HalfBits), dl, VT);
10254 SDValue LLL = DAG.getNode(ISD::AND, dl, VT, LL, Mask);
10255 SDValue RLL = DAG.getNode(ISD::AND, dl, VT, RL, Mask);
10256
10257 SDValue T = DAG.getNode(ISD::MUL, dl, VT, LLL, RLL);
10258 SDValue TL = DAG.getNode(ISD::AND, dl, VT, T, Mask);
10259
10260 SDValue Shift = DAG.getShiftAmountConstant(HalfBits, VT, dl);
10261 SDValue TH = DAG.getNode(ISD::SRL, dl, VT, T, Shift);
10262 SDValue LLH = DAG.getNode(ISD::SRL, dl, VT, LL, Shift);
10263 SDValue RLH = DAG.getNode(ISD::SRL, dl, VT, RL, Shift);
10264
10265 SDValue U = DAG.getNode(ISD::ADD, dl, VT,
10266 DAG.getNode(ISD::MUL, dl, VT, LLH, RLL), TH);
10267 SDValue UL = DAG.getNode(ISD::AND, dl, VT, U, Mask);
10268 SDValue UH = DAG.getNode(ISD::SRL, dl, VT, U, Shift);
10269
10270 SDValue V = DAG.getNode(ISD::ADD, dl, VT,
10271 DAG.getNode(ISD::MUL, dl, VT, LLL, RLH), UL);
10272 SDValue VH = DAG.getNode(ISD::SRL, dl, VT, V, Shift);
10273
10274 SDValue W =
10275 DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::MUL, dl, VT, LLH, RLH),
10276 DAG.getNode(ISD::ADD, dl, VT, UH, VH));
10277 Lo = DAG.getNode(ISD::ADD, dl, VT, TL,
10278 DAG.getNode(ISD::SHL, dl, VT, V, Shift));
10279
10280 Hi = DAG.getNode(ISD::ADD, dl, VT, W,
10281 DAG.getNode(ISD::ADD, dl, VT,
10282 DAG.getNode(ISD::MUL, dl, VT, RH, LL),
10283 DAG.getNode(ISD::MUL, dl, VT, RL, LH)));
10284 } else {
10285 // Attempt a libcall.
10286 SDValue Ret;
10288 CallOptions.setSExt(Signed);
10289 CallOptions.setIsPostTypeLegalization(true);
10290 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10291 // Halves of WideVT are packed into registers in different order
10292 // depending on platform endianness. This is usually handled by
10293 // the C calling convention, but we can't defer to it in
10294 // the legalizer.
10295 SDValue Args[] = {LL, LH, RL, RH};
10296 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10297 } else {
10298 SDValue Args[] = {LH, LL, RH, RL};
10299 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10300 }
10301 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10302 "Ret value is a collection of constituent nodes holding result.");
10303 if (DAG.getDataLayout().isLittleEndian()) {
10304 // Same as above.
10305 Lo = Ret.getOperand(0);
10306 Hi = Ret.getOperand(1);
10307 } else {
10308 Lo = Ret.getOperand(1);
10309 Hi = Ret.getOperand(0);
10310 }
10311 }
10312}
10313
10315 bool Signed, const SDValue LHS,
10316 const SDValue RHS, SDValue &Lo,
10317 SDValue &Hi) const {
10318 EVT VT = LHS.getValueType();
10319 assert(RHS.getValueType() == VT && "Mismatching operand types");
10320
10321 SDValue HiLHS;
10322 SDValue HiRHS;
10323 if (Signed) {
10324 // The high part is obtained by SRA'ing all but one of the bits of low
10325 // part.
10326 unsigned LoSize = VT.getFixedSizeInBits();
10327 HiLHS = DAG.getNode(
10328 ISD::SRA, dl, VT, LHS,
10329 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10330 HiRHS = DAG.getNode(
10331 ISD::SRA, dl, VT, RHS,
10332 DAG.getConstant(LoSize - 1, dl, getPointerTy(DAG.getDataLayout())));
10333 } else {
10334 HiLHS = DAG.getConstant(0, dl, VT);
10335 HiRHS = DAG.getConstant(0, dl, VT);
10336 }
10337 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
10338 forceExpandWideMUL(DAG, dl, Signed, WideVT, LHS, HiLHS, RHS, HiRHS, Lo, Hi);
10339}
10340
10341SDValue
10343 assert((Node->getOpcode() == ISD::SMULFIX ||
10344 Node->getOpcode() == ISD::UMULFIX ||
10345 Node->getOpcode() == ISD::SMULFIXSAT ||
10346 Node->getOpcode() == ISD::UMULFIXSAT) &&
10347 "Expected a fixed point multiplication opcode");
10348
10349 SDLoc dl(Node);
10350 SDValue LHS = Node->getOperand(0);
10351 SDValue RHS = Node->getOperand(1);
10352 EVT VT = LHS.getValueType();
10353 unsigned Scale = Node->getConstantOperandVal(2);
10354 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10355 Node->getOpcode() == ISD::UMULFIXSAT);
10356 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10357 Node->getOpcode() == ISD::SMULFIXSAT);
10358 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10359 unsigned VTSize = VT.getScalarSizeInBits();
10360
10361 if (!Scale) {
10362 // [us]mul.fix(a, b, 0) -> mul(a, b)
10363 if (!Saturating) {
10365 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10366 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10367 SDValue Result =
10368 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10369 SDValue Product = Result.getValue(0);
10370 SDValue Overflow = Result.getValue(1);
10371 SDValue Zero = DAG.getConstant(0, dl, VT);
10372
10373 APInt MinVal = APInt::getSignedMinValue(VTSize);
10374 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10375 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10376 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10377 // Xor the inputs, if resulting sign bit is 0 the product will be
10378 // positive, else negative.
10379 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10380 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10381 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10382 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10383 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10384 SDValue Result =
10385 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10386 SDValue Product = Result.getValue(0);
10387 SDValue Overflow = Result.getValue(1);
10388
10389 APInt MaxVal = APInt::getMaxValue(VTSize);
10390 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10391 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10392 }
10393 }
10394
10395 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10396 "Expected scale to be less than the number of bits if signed or at "
10397 "most the number of bits if unsigned.");
10398 assert(LHS.getValueType() == RHS.getValueType() &&
10399 "Expected both operands to be the same type");
10400
10401 // Get the upper and lower bits of the result.
10402 SDValue Lo, Hi;
10403 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10404 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10405 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10406 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10407 Lo = Result.getValue(0);
10408 Hi = Result.getValue(1);
10409 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10410 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10411 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10412 } else if (VT.isVector()) {
10413 return SDValue();
10414 } else {
10415 forceExpandWideMUL(DAG, dl, Signed, LHS, RHS, Lo, Hi);
10416 }
10417
10418 if (Scale == VTSize)
10419 // Result is just the top half since we'd be shifting by the width of the
10420 // operand. Overflow impossible so this works for both UMULFIX and
10421 // UMULFIXSAT.
10422 return Hi;
10423
10424 // The result will need to be shifted right by the scale since both operands
10425 // are scaled. The result is given to us in 2 halves, so we only want part of
10426 // both in the result.
10427 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10428 DAG.getShiftAmountConstant(Scale, VT, dl));
10429 if (!Saturating)
10430 return Result;
10431
10432 if (!Signed) {
10433 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10434 // widened multiplication) aren't all zeroes.
10435
10436 // Saturate to max if ((Hi >> Scale) != 0),
10437 // which is the same as if (Hi > ((1 << Scale) - 1))
10438 APInt MaxVal = APInt::getMaxValue(VTSize);
10439 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10440 dl, VT);
10441 Result = DAG.getSelectCC(dl, Hi, LowMask,
10442 DAG.getConstant(MaxVal, dl, VT), Result,
10443 ISD::SETUGT);
10444
10445 return Result;
10446 }
10447
10448 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10449 // widened multiplication) aren't all ones or all zeroes.
10450
10451 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10452 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10453
10454 if (Scale == 0) {
10455 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10456 DAG.getShiftAmountConstant(VTSize - 1, VT, dl));
10457 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10458 // Saturated to SatMin if wide product is negative, and SatMax if wide
10459 // product is positive ...
10460 SDValue Zero = DAG.getConstant(0, dl, VT);
10461 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10462 ISD::SETLT);
10463 // ... but only if we overflowed.
10464 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10465 }
10466
10467 // We handled Scale==0 above so all the bits to examine is in Hi.
10468
10469 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10470 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10471 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10472 dl, VT);
10473 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10474 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10475 // which is the same as if (HI < (-1 << (Scale - 1))
10476 SDValue HighMask =
10477 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10478 dl, VT);
10479 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10480 return Result;
10481}
10482
10483SDValue
10485 SDValue LHS, SDValue RHS,
10486 unsigned Scale, SelectionDAG &DAG) const {
10487 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10488 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10489 "Expected a fixed point division opcode");
10490
10491 EVT VT = LHS.getValueType();
10492 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10493 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10494 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10495
10496 // If there is enough room in the type to upscale the LHS or downscale the
10497 // RHS before the division, we can perform it in this type without having to
10498 // resize. For signed operations, the LHS headroom is the number of
10499 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10500 // The headroom for the RHS is the number of trailing zeroes.
10501 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10503 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10504
10505 // For signed saturating operations, we need to be able to detect true integer
10506 // division overflow; that is, when you have MIN / -EPS. However, this
10507 // is undefined behavior and if we emit divisions that could take such
10508 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10509 // example).
10510 // Avoid this by requiring an extra bit so that we never get this case.
10511 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10512 // signed saturating division, we need to emit a whopping 32-bit division.
10513 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10514 return SDValue();
10515
10516 unsigned LHSShift = std::min(LHSLead, Scale);
10517 unsigned RHSShift = Scale - LHSShift;
10518
10519 // At this point, we know that if we shift the LHS up by LHSShift and the
10520 // RHS down by RHSShift, we can emit a regular division with a final scaling
10521 // factor of Scale.
10522
10523 if (LHSShift)
10524 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10525 DAG.getShiftAmountConstant(LHSShift, VT, dl));
10526 if (RHSShift)
10527 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10528 DAG.getShiftAmountConstant(RHSShift, VT, dl));
10529
10530 SDValue Quot;
10531 if (Signed) {
10532 // For signed operations, if the resulting quotient is negative and the
10533 // remainder is nonzero, subtract 1 from the quotient to round towards
10534 // negative infinity.
10535 SDValue Rem;
10536 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10537 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10538 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10539 if (isTypeLegal(VT) &&
10541 Quot = DAG.getNode(ISD::SDIVREM, dl,
10542 DAG.getVTList(VT, VT),
10543 LHS, RHS);
10544 Rem = Quot.getValue(1);
10545 Quot = Quot.getValue(0);
10546 } else {
10547 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10548 LHS, RHS);
10549 Rem = DAG.getNode(ISD::SREM, dl, VT,
10550 LHS, RHS);
10551 }
10552 SDValue Zero = DAG.getConstant(0, dl, VT);
10553 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10554 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10555 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10556 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10557 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10558 DAG.getConstant(1, dl, VT));
10559 Quot = DAG.getSelect(dl, VT,
10560 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10561 Sub1, Quot);
10562 } else
10563 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10564 LHS, RHS);
10565
10566 return Quot;
10567}
10568
10570 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10571 SDLoc dl(Node);
10572 SDValue LHS = Node->getOperand(0);
10573 SDValue RHS = Node->getOperand(1);
10574 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10575
10576 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10577 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10578 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10579 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10580 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10581 { LHS, RHS, CarryIn });
10582 Result = SDValue(NodeCarry.getNode(), 0);
10583 Overflow = SDValue(NodeCarry.getNode(), 1);
10584 return;
10585 }
10586
10587 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10588 LHS.getValueType(), LHS, RHS);
10589
10590 EVT ResultType = Node->getValueType(1);
10591 EVT SetCCType = getSetCCResultType(
10592 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10593 SDValue SetCC;
10594 if (IsAdd && isOneConstant(RHS)) {
10595 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10596 // the live range of X. We assume comparing with 0 is cheap.
10597 // The general case (X + C) < C is not necessarily beneficial. Although we
10598 // reduce the live range of X, we may introduce the materialization of
10599 // constant C.
10600 SetCC =
10601 DAG.getSetCC(dl, SetCCType, Result,
10602 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10603 } else if (IsAdd && isAllOnesConstant(RHS)) {
10604 // Special case: uaddo X, -1 overflows if X != 0.
10605 SetCC =
10606 DAG.getSetCC(dl, SetCCType, LHS,
10607 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10608 } else {
10610 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10611 }
10612 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10613}
10614
10616 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10617 SDLoc dl(Node);
10618 SDValue LHS = Node->getOperand(0);
10619 SDValue RHS = Node->getOperand(1);
10620 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10621
10622 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10623 LHS.getValueType(), LHS, RHS);
10624
10625 EVT ResultType = Node->getValueType(1);
10626 EVT OType = getSetCCResultType(
10627 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10628
10629 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10630 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10631 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10632 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10633 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10634 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10635 return;
10636 }
10637
10638 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10639
10640 // For an addition, the result should be less than one of the operands (LHS)
10641 // if and only if the other operand (RHS) is negative, otherwise there will
10642 // be overflow.
10643 // For a subtraction, the result should be less than one of the operands
10644 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10645 // otherwise there will be overflow.
10646 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10647 SDValue ConditionRHS =
10648 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10649
10650 Overflow = DAG.getBoolExtOrTrunc(
10651 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10652 ResultType, ResultType);
10653}
10654
10656 SDValue &Overflow, SelectionDAG &DAG) const {
10657 SDLoc dl(Node);
10658 EVT VT = Node->getValueType(0);
10659 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10660 SDValue LHS = Node->getOperand(0);
10661 SDValue RHS = Node->getOperand(1);
10662 bool isSigned = Node->getOpcode() == ISD::SMULO;
10663
10664 // For power-of-two multiplications we can use a simpler shift expansion.
10665 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10666 const APInt &C = RHSC->getAPIntValue();
10667 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10668 if (C.isPowerOf2()) {
10669 // smulo(x, signed_min) is same as umulo(x, signed_min).
10670 bool UseArithShift = isSigned && !C.isMinSignedValue();
10671 SDValue ShiftAmt = DAG.getShiftAmountConstant(C.logBase2(), VT, dl);
10672 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10673 Overflow = DAG.getSetCC(dl, SetCCVT,
10674 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10675 dl, VT, Result, ShiftAmt),
10676 LHS, ISD::SETNE);
10677 return true;
10678 }
10679 }
10680
10681 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10682 if (VT.isVector())
10683 WideVT =
10685
10686 SDValue BottomHalf;
10687 SDValue TopHalf;
10688 static const unsigned Ops[2][3] =
10691 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10692 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10693 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10694 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10695 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10696 RHS);
10697 TopHalf = BottomHalf.getValue(1);
10698 } else if (isTypeLegal(WideVT)) {
10699 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10700 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10701 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10702 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10703 SDValue ShiftAmt =
10704 DAG.getShiftAmountConstant(VT.getScalarSizeInBits(), WideVT, dl);
10705 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10706 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10707 } else {
10708 if (VT.isVector())
10709 return false;
10710
10711 forceExpandWideMUL(DAG, dl, isSigned, LHS, RHS, BottomHalf, TopHalf);
10712 }
10713
10714 Result = BottomHalf;
10715 if (isSigned) {
10716 SDValue ShiftAmt = DAG.getShiftAmountConstant(
10717 VT.getScalarSizeInBits() - 1, BottomHalf.getValueType(), dl);
10718 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10719 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10720 } else {
10721 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10722 DAG.getConstant(0, dl, VT), ISD::SETNE);
10723 }
10724
10725 // Truncate the result if SetCC returns a larger type than needed.
10726 EVT RType = Node->getValueType(1);
10727 if (RType.bitsLT(Overflow.getValueType()))
10728 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10729
10730 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10731 "Unexpected result type for S/UMULO legalization");
10732 return true;
10733}
10734
10736 SDLoc dl(Node);
10737 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10738 SDValue Op = Node->getOperand(0);
10739 EVT VT = Op.getValueType();
10740
10741 if (VT.isScalableVector())
10743 "Expanding reductions for scalable vectors is undefined.");
10744
10745 // Try to use a shuffle reduction for power of two vectors.
10746 if (VT.isPow2VectorType()) {
10747 while (VT.getVectorNumElements() > 1) {
10748 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10749 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10750 break;
10751
10752 SDValue Lo, Hi;
10753 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10754 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi, Node->getFlags());
10755 VT = HalfVT;
10756 }
10757 }
10758
10759 EVT EltVT = VT.getVectorElementType();
10760 unsigned NumElts = VT.getVectorNumElements();
10761
10763 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10764
10765 SDValue Res = Ops[0];
10766 for (unsigned i = 1; i < NumElts; i++)
10767 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10768
10769 // Result type may be wider than element type.
10770 if (EltVT != Node->getValueType(0))
10771 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10772 return Res;
10773}
10774
10776 SDLoc dl(Node);
10777 SDValue AccOp = Node->getOperand(0);
10778 SDValue VecOp = Node->getOperand(1);
10779 SDNodeFlags Flags = Node->getFlags();
10780
10781 EVT VT = VecOp.getValueType();
10782 EVT EltVT = VT.getVectorElementType();
10783
10784 if (VT.isScalableVector())
10786 "Expanding reductions for scalable vectors is undefined.");
10787
10788 unsigned NumElts = VT.getVectorNumElements();
10789
10791 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10792
10793 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10794
10795 SDValue Res = AccOp;
10796 for (unsigned i = 0; i < NumElts; i++)
10797 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10798
10799 return Res;
10800}
10801
10803 SelectionDAG &DAG) const {
10804 EVT VT = Node->getValueType(0);
10805 SDLoc dl(Node);
10806 bool isSigned = Node->getOpcode() == ISD::SREM;
10807 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10808 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10809 SDValue Dividend = Node->getOperand(0);
10810 SDValue Divisor = Node->getOperand(1);
10811 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10812 SDVTList VTs = DAG.getVTList(VT, VT);
10813 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10814 return true;
10815 }
10816 if (isOperationLegalOrCustom(DivOpc, VT)) {
10817 // X % Y -> X-X/Y*Y
10818 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10819 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10820 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10821 return true;
10822 }
10823 return false;
10824}
10825
10827 SelectionDAG &DAG) const {
10828 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10829 SDLoc dl(SDValue(Node, 0));
10830 SDValue Src = Node->getOperand(0);
10831
10832 // DstVT is the result type, while SatVT is the size to which we saturate
10833 EVT SrcVT = Src.getValueType();
10834 EVT DstVT = Node->getValueType(0);
10835
10836 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10837 unsigned SatWidth = SatVT.getScalarSizeInBits();
10838 unsigned DstWidth = DstVT.getScalarSizeInBits();
10839 assert(SatWidth <= DstWidth &&
10840 "Expected saturation width smaller than result width");
10841
10842 // Determine minimum and maximum integer values and their corresponding
10843 // floating-point values.
10844 APInt MinInt, MaxInt;
10845 if (IsSigned) {
10846 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10847 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10848 } else {
10849 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10850 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10851 }
10852
10853 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10854 // libcall emission cannot handle this. Large result types will fail.
10855 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10856 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10857 SrcVT = Src.getValueType();
10858 }
10859
10860 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10861 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10862
10863 APFloat::opStatus MinStatus =
10864 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10865 APFloat::opStatus MaxStatus =
10866 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10867 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10868 !(MaxStatus & APFloat::opStatus::opInexact);
10869
10870 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10871 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10872
10873 // If the integer bounds are exactly representable as floats and min/max are
10874 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10875 // of comparisons and selects.
10876 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10878 if (AreExactFloatBounds && MinMaxLegal) {
10879 SDValue Clamped = Src;
10880
10881 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10882 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10883 // Clamp by MaxFloat from above. NaN cannot occur.
10884 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10885 // Convert clamped value to integer.
10886 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10887 dl, DstVT, Clamped);
10888
10889 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10890 // which will cast to zero.
10891 if (!IsSigned)
10892 return FpToInt;
10893
10894 // Otherwise, select 0 if Src is NaN.
10895 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10896 EVT SetCCVT =
10897 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10898 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10899 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
10900 }
10901
10902 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
10903 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
10904
10905 // Result of direct conversion. The assumption here is that the operation is
10906 // non-trapping and it's fine to apply it to an out-of-range value if we
10907 // select it away later.
10908 SDValue FpToInt =
10909 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
10910
10911 SDValue Select = FpToInt;
10912
10913 EVT SetCCVT =
10914 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10915
10916 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10917 // MinInt if Src is NaN.
10918 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
10919 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
10920 // If Src OGT MaxFloat, select MaxInt.
10921 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
10922 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
10923
10924 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10925 // is already zero.
10926 if (!IsSigned)
10927 return Select;
10928
10929 // Otherwise, select 0 if Src is NaN.
10930 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10931 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10932 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
10933}
10934
10936 const SDLoc &dl,
10937 SelectionDAG &DAG) const {
10938 EVT OperandVT = Op.getValueType();
10939 if (OperandVT.getScalarType() == ResultVT.getScalarType())
10940 return Op;
10941 EVT ResultIntVT = ResultVT.changeTypeToInteger();
10942 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
10943 // can induce double-rounding which may alter the results. We can
10944 // correct for this using a trick explained in: Boldo, Sylvie, and
10945 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
10946 // World Congress. 2005.
10947 unsigned BitSize = OperandVT.getScalarSizeInBits();
10948 EVT WideIntVT = OperandVT.changeTypeToInteger();
10949 SDValue OpAsInt = DAG.getBitcast(WideIntVT, Op);
10950 SDValue SignBit =
10951 DAG.getNode(ISD::AND, dl, WideIntVT, OpAsInt,
10952 DAG.getConstant(APInt::getSignMask(BitSize), dl, WideIntVT));
10953 SDValue AbsWide;
10954 if (isOperationLegalOrCustom(ISD::FABS, OperandVT)) {
10955 AbsWide = DAG.getNode(ISD::FABS, dl, OperandVT, Op);
10956 } else {
10957 SDValue ClearedSign = DAG.getNode(
10958 ISD::AND, dl, WideIntVT, OpAsInt,
10959 DAG.getConstant(APInt::getSignedMaxValue(BitSize), dl, WideIntVT));
10960 AbsWide = DAG.getBitcast(OperandVT, ClearedSign);
10961 }
10962 SDValue AbsNarrow = DAG.getFPExtendOrRound(AbsWide, dl, ResultVT);
10963 SDValue AbsNarrowAsWide = DAG.getFPExtendOrRound(AbsNarrow, dl, OperandVT);
10964
10965 // We can keep the narrow value as-is if narrowing was exact (no
10966 // rounding error), the wide value was NaN (the narrow value is also
10967 // NaN and should be preserved) or if we rounded to the odd value.
10968 SDValue NarrowBits = DAG.getNode(ISD::BITCAST, dl, ResultIntVT, AbsNarrow);
10969 SDValue One = DAG.getConstant(1, dl, ResultIntVT);
10970 SDValue NegativeOne = DAG.getAllOnesConstant(dl, ResultIntVT);
10971 SDValue And = DAG.getNode(ISD::AND, dl, ResultIntVT, NarrowBits, One);
10972 EVT ResultIntVTCCVT = getSetCCResultType(
10973 DAG.getDataLayout(), *DAG.getContext(), And.getValueType());
10974 SDValue Zero = DAG.getConstant(0, dl, ResultIntVT);
10975 // The result is already odd so we don't need to do anything.
10976 SDValue AlreadyOdd = DAG.getSetCC(dl, ResultIntVTCCVT, And, Zero, ISD::SETNE);
10977
10978 EVT WideSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10979 AbsWide.getValueType());
10980 // We keep results which are exact, odd or NaN.
10981 SDValue KeepNarrow =
10982 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETUEQ);
10983 KeepNarrow = DAG.getNode(ISD::OR, dl, WideSetCCVT, KeepNarrow, AlreadyOdd);
10984 // We morally performed a round-down if AbsNarrow is smaller than
10985 // AbsWide.
10986 SDValue NarrowIsRd =
10987 DAG.getSetCC(dl, WideSetCCVT, AbsWide, AbsNarrowAsWide, ISD::SETOGT);
10988 // If the narrow value is odd or exact, pick it.
10989 // Otherwise, narrow is even and corresponds to either the rounded-up
10990 // or rounded-down value. If narrow is the rounded-down value, we want
10991 // the rounded-up value as it will be odd.
10992 SDValue Adjust = DAG.getSelect(dl, ResultIntVT, NarrowIsRd, One, NegativeOne);
10993 SDValue Adjusted = DAG.getNode(ISD::ADD, dl, ResultIntVT, NarrowBits, Adjust);
10994 Op = DAG.getSelect(dl, ResultIntVT, KeepNarrow, NarrowBits, Adjusted);
10995 int ShiftAmount = BitSize - ResultVT.getScalarSizeInBits();
10996 SDValue ShiftCnst = DAG.getShiftAmountConstant(ShiftAmount, WideIntVT, dl);
10997 SignBit = DAG.getNode(ISD::SRL, dl, WideIntVT, SignBit, ShiftCnst);
10998 SignBit = DAG.getNode(ISD::TRUNCATE, dl, ResultIntVT, SignBit);
10999 Op = DAG.getNode(ISD::OR, dl, ResultIntVT, Op, SignBit);
11000 return DAG.getNode(ISD::BITCAST, dl, ResultVT, Op);
11001}
11002
11004 assert(Node->getOpcode() == ISD::FP_ROUND && "Unexpected opcode!");
11005 SDValue Op = Node->getOperand(0);
11006 EVT VT = Node->getValueType(0);
11007 SDLoc dl(Node);
11008 if (VT.getScalarType() == MVT::bf16) {
11009 if (Node->getConstantOperandVal(1) == 1) {
11010 return DAG.getNode(ISD::FP_TO_BF16, dl, VT, Node->getOperand(0));
11011 }
11012 EVT OperandVT = Op.getValueType();
11013 SDValue IsNaN = DAG.getSetCC(
11014 dl,
11015 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT),
11016 Op, Op, ISD::SETUO);
11017
11018 // We are rounding binary64/binary128 -> binary32 -> bfloat16. This
11019 // can induce double-rounding which may alter the results. We can
11020 // correct for this using a trick explained in: Boldo, Sylvie, and
11021 // Guillaume Melquiond. "When double rounding is odd." 17th IMACS
11022 // World Congress. 2005.
11023 EVT F32 = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32;
11024 EVT I32 = F32.changeTypeToInteger();
11025 Op = expandRoundInexactToOdd(F32, Op, dl, DAG);
11026 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11027
11028 // Conversions should set NaN's quiet bit. This also prevents NaNs from
11029 // turning into infinities.
11030 SDValue NaN =
11031 DAG.getNode(ISD::OR, dl, I32, Op, DAG.getConstant(0x400000, dl, I32));
11032
11033 // Factor in the contribution of the low 16 bits.
11034 SDValue One = DAG.getConstant(1, dl, I32);
11035 SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Op,
11036 DAG.getShiftAmountConstant(16, I32, dl));
11037 Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
11038 SDValue RoundingBias =
11039 DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
11040 SDValue Add = DAG.getNode(ISD::ADD, dl, I32, Op, RoundingBias);
11041
11042 // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
11043 // 0x80000000.
11044 Op = DAG.getSelect(dl, I32, IsNaN, NaN, Add);
11045
11046 // Now that we have rounded, shift the bits into position.
11047 Op = DAG.getNode(ISD::SRL, dl, I32, Op,
11048 DAG.getShiftAmountConstant(16, I32, dl));
11049 Op = DAG.getNode(ISD::BITCAST, dl, I32, Op);
11050 EVT I16 = I32.isVector() ? I32.changeVectorElementType(MVT::i16) : MVT::i16;
11051 Op = DAG.getNode(ISD::TRUNCATE, dl, I16, Op);
11052 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
11053 }
11054 return SDValue();
11055}
11056
11058 SelectionDAG &DAG) const {
11059 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
11060 assert(Node->getValueType(0).isScalableVector() &&
11061 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
11062
11063 EVT VT = Node->getValueType(0);
11064 SDValue V1 = Node->getOperand(0);
11065 SDValue V2 = Node->getOperand(1);
11066 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
11067 SDLoc DL(Node);
11068
11069 // Expand through memory thusly:
11070 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
11071 // Store V1, Ptr
11072 // Store V2, Ptr + sizeof(V1)
11073 // If (Imm < 0)
11074 // TrailingElts = -Imm
11075 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
11076 // else
11077 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
11078 // Res = Load Ptr
11079
11080 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
11081
11083 VT.getVectorElementCount() * 2);
11084 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
11085 EVT PtrVT = StackPtr.getValueType();
11086 auto &MF = DAG.getMachineFunction();
11087 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
11088 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
11089
11090 // Store the lo part of CONCAT_VECTORS(V1, V2)
11091 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
11092 // Store the hi part of CONCAT_VECTORS(V1, V2)
11093 SDValue OffsetToV2 = DAG.getVScale(
11094 DL, PtrVT,
11096 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
11097 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
11098
11099 if (Imm >= 0) {
11100 // Load back the required element. getVectorElementPointer takes care of
11101 // clamping the index if it's out-of-bounds.
11102 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
11103 // Load the spliced result
11104 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
11106 }
11107
11108 uint64_t TrailingElts = -Imm;
11109
11110 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
11111 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
11112 SDValue TrailingBytes =
11113 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
11114
11115 if (TrailingElts > VT.getVectorMinNumElements()) {
11116 SDValue VLBytes =
11117 DAG.getVScale(DL, PtrVT,
11118 APInt(PtrVT.getFixedSizeInBits(),
11120 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
11121 }
11122
11123 // Calculate the start address of the spliced result.
11124 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
11125
11126 // Load the spliced result
11127 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
11129}
11130
11132 SDValue &LHS, SDValue &RHS,
11133 SDValue &CC, SDValue Mask,
11134 SDValue EVL, bool &NeedInvert,
11135 const SDLoc &dl, SDValue &Chain,
11136 bool IsSignaling) const {
11137 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11138 MVT OpVT = LHS.getSimpleValueType();
11139 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
11140 NeedInvert = false;
11141 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
11142 bool IsNonVP = !EVL;
11143 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
11144 default:
11145 llvm_unreachable("Unknown condition code action!");
11147 // Nothing to do.
11148 break;
11151 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11152 std::swap(LHS, RHS);
11153 CC = DAG.getCondCode(InvCC);
11154 return true;
11155 }
11156 // Swapping operands didn't work. Try inverting the condition.
11157 bool NeedSwap = false;
11158 InvCC = getSetCCInverse(CCCode, OpVT);
11159 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11160 // If inverting the condition is not enough, try swapping operands
11161 // on top of it.
11162 InvCC = ISD::getSetCCSwappedOperands(InvCC);
11163 NeedSwap = true;
11164 }
11165 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
11166 CC = DAG.getCondCode(InvCC);
11167 NeedInvert = true;
11168 if (NeedSwap)
11169 std::swap(LHS, RHS);
11170 return true;
11171 }
11172
11174 unsigned Opc = 0;
11175 switch (CCCode) {
11176 default:
11177 llvm_unreachable("Don't know how to expand this condition!");
11178 case ISD::SETUO:
11179 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
11180 CC1 = ISD::SETUNE;
11181 CC2 = ISD::SETUNE;
11182 Opc = ISD::OR;
11183 break;
11184 }
11185 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11186 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
11187 NeedInvert = true;
11188 [[fallthrough]];
11189 case ISD::SETO:
11190 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
11191 "If SETO is expanded, SETOEQ must be legal!");
11192 CC1 = ISD::SETOEQ;
11193 CC2 = ISD::SETOEQ;
11194 Opc = ISD::AND;
11195 break;
11196 case ISD::SETONE:
11197 case ISD::SETUEQ:
11198 // If the SETUO or SETO CC isn't legal, we might be able to use
11199 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
11200 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
11201 // the operands.
11202 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11203 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
11204 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
11205 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
11206 CC1 = ISD::SETOGT;
11207 CC2 = ISD::SETOLT;
11208 Opc = ISD::OR;
11209 NeedInvert = ((unsigned)CCCode & 0x8U);
11210 break;
11211 }
11212 [[fallthrough]];
11213 case ISD::SETOEQ:
11214 case ISD::SETOGT:
11215 case ISD::SETOGE:
11216 case ISD::SETOLT:
11217 case ISD::SETOLE:
11218 case ISD::SETUNE:
11219 case ISD::SETUGT:
11220 case ISD::SETUGE:
11221 case ISD::SETULT:
11222 case ISD::SETULE:
11223 // If we are floating point, assign and break, otherwise fall through.
11224 if (!OpVT.isInteger()) {
11225 // We can use the 4th bit to tell if we are the unordered
11226 // or ordered version of the opcode.
11227 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
11228 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
11229 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
11230 break;
11231 }
11232 // Fallthrough if we are unsigned integer.
11233 [[fallthrough]];
11234 case ISD::SETLE:
11235 case ISD::SETGT:
11236 case ISD::SETGE:
11237 case ISD::SETLT:
11238 case ISD::SETNE:
11239 case ISD::SETEQ:
11240 // If all combinations of inverting the condition and swapping operands
11241 // didn't work then we have no means to expand the condition.
11242 llvm_unreachable("Don't know how to expand this condition!");
11243 }
11244
11245 SDValue SetCC1, SetCC2;
11246 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11247 // If we aren't the ordered or unorder operation,
11248 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11249 if (IsNonVP) {
11250 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11251 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11252 } else {
11253 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11254 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11255 }
11256 } else {
11257 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11258 if (IsNonVP) {
11259 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11260 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11261 } else {
11262 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11263 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11264 }
11265 }
11266 if (Chain)
11267 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11268 SetCC2.getValue(1));
11269 if (IsNonVP)
11270 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11271 else {
11272 // Transform the binary opcode to the VP equivalent.
11273 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11274 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11275 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11276 }
11277 RHS = SDValue();
11278 CC = SDValue();
11279 return true;
11280 }
11281 }
11282 return false;
11283}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
basic Basic Alias true
block Block Frequency Analysis
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, const APInt &Demanded)
Check to see if the specified operand of the specified instruction is a constant integer.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
#define T1
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
const char * Passes
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, SDValue Op, SelectionDAG *DAG, const TargetLowering &TLI)
If we have an immediate, see if we can lower it.
static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG)
static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1)
Given a vector binary operation and known undefined elements for each input operand,...
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, ElementCount SubEC)
static unsigned getConstraintPiority(TargetLowering::ConstraintType CT)
Return a number indicating our preference for chosing a type of constraint over another,...
static std::optional< bool > isFCmpEqualZero(FPClassTest Test, const fltSemantics &Semantics, const MachineFunction &MF)
Returns a true value if if this FPClassTest can be performed with an ordered fcmp to 0,...
static void turnVectorIntoSplatVector(MutableArrayRef< SDValue > Values, std::function< bool(SDValue)> Predicate, SDValue AlternativeReplacement=SDValue())
If all values in Values that don't match the predicate are same 'splat' value, then replace all value...
static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT)
static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, const APInt &DemandedBits, const APInt &DemandedElts, unsigned Depth)
static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created)
Given an exact SDIV by a constant, create a multiplication with the multiplicative inverse of the con...
static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, SDValue N0, const APInt &C1, ISD::CondCode Cond, const SDLoc &dl, SelectionDAG &DAG)
This file describes how to lower LLVM code to machine code.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, SelectionDAG &DAG)
Scalarize a vector store, bitcasting to TargetVT to determine the scalar type.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:1026
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1543
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:401
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition: APInt.h:1370
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
void setSignBit()
Set the sign bit to 1.
Definition: APInt.h:1318
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition: APInt.h:194
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
APInt reverseBits() const
Definition: APInt.cpp:737
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
void negate()
Negate this APInt in place.
Definition: APInt.h:1421
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1297
APInt multiplicativeInverse() const
Definition: APInt.cpp:1244
bool isMaxSignedValue() const
Determine if this is the largest signed value.
Definition: APInt.h:383
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:312
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
void setBits(unsigned loBit, unsigned hiBit)
Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
Definition: APInt.h:1345
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
APInt byteSwap() const
Definition: APInt.cpp:715
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
void setBitVal(unsigned BitPosition, bool BitValue)
Set a given bit to a given value.
Definition: APInt.h:1321
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1075
bool contains(Attribute::AttrKind A) const
Return true if the builder has the specified attribute.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
This class represents a function call, abstracting a target machine's calling convention.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
std::vector< std::string > ConstraintCodeVector
Definition: InlineAsm.h:102
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Machine Value Type.
SimpleValueType SimpleTy
bool isInteger() const
Return true if this is an integer or a vector integer type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MCSymbol * getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate=false) const
getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
Function & getFunction()
Return the LLVM function that this machine code represents.
@ EK_GPRel32BlockAddress
EK_GPRel32BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
@ EK_GPRel64BlockAddress
EK_GPRel64BlockAddress - Each entry is an address of block, encoded with a relocation as gp-relative,...
Flags getFlags() const
Return the raw flags of the source value,.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:461
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static SDNodeIterator end(const SDNode *N)
static SDNodeIterator begin(const SDNode *N)
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const APInt * getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has constant shift amounts that are all less than the element bit-width of th...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
const APInt * getValidShiftAmountConstant(SDValue V, const APInt &DemandedElts) const
If a SHL/SRA/SRL node V has a constant or splat constant shift amount that is less than the element b...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getSetCCVP(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Mask, SDValue EVL)
Helper function to make it easier to build VP_SETCCs if you just have an ISD::CondCode instead of an ...
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:557
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
iterator end() const
Definition: StringRef.h:113
Class to represent struct types.
Definition: DerivedTypes.h:216
void setAttributes(const CallBase *Call, unsigned ArgIdx)
Set CallLoweringInfo attribute flags based on a call instruction and called function attributes.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool shouldRemoveRedundantExtend(SDValue Op) const
Return true (the default) if it is profitable to remove a sext_inreg(x) where the sext is redundant,...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual bool isSafeMemOpType(MVT) const
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isPaddedAtMostSignificantBitsWhenStored(EVT VT) const
Indicates if any padding is guaranteed to go at the most significant bits when storing the type to me...
virtual EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &) const
Returns the target specific optimal type for load and store operations as a result of memset,...
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual MVT::SimpleValueType getCmpLibcallReturnType() const
Return the ValueType for comparison libcalls.
virtual bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool isLegalRC(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) const
Return true if the value types that can be represented by the specified register class are all legal.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool shouldExtendTypeInLibCall(EVT Type) const
Returns true if arguments should be extended in lib calls.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
MVT getSimpleValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the MVT corresponding to this LLVM type. See getValueType.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
BooleanContent
Enum that describes how the target represents true/false values.
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
virtual bool hasAndNotCompare(SDValue Y) const
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool isCtlzFast() const
Return true if ctlz instruction is fast.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const
Return true if it is more correct/profitable to use strict FP_TO_INT conversion operations - canonica...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const
Get the CondCode that's to be used to test the result of the comparison libcall against zero.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal or custom on this target.
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MulExpansionKind
Enum that specifies when a multiplication should be expanded.
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual ConstraintWeight getMultipleConstraintMatchWeight(AsmOperandInfo &info, int maIndex) const
Examine constraint type and operand type and determine a weight value.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL into two nodes.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isUsedByReturnOnly(SDNode *, SDValue &) const
Return true if result of the specified node is used by a return node only.
virtual void computeKnownBitsForFrameIndex(int FIOp, KnownBits &Known, const MachineFunction &MF) const
Determine which of the bits of FrameIndex FIOp are known to be 0.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine the known alignment for the pointer value R.
std::vector< AsmOperandInfo > AsmOperandInfoVector
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, SDNodeFlags Flags, const SDLoc &DL, SelectionDAG &DAG) const
Expand check for floating point class.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all bits from only some vector eleme...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
virtual const char * getTargetNodeName(unsigned Opcode) const
This method returns the name of a target specific DAG node.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded vector elements, returning true on success...
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::VECTOR_SPLICE.
virtual const char * LowerXConstraint(EVT ConstraintVT) const
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned NumBitsPerElt) const
Expand CTTZ via Table Lookup.
virtual bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const
If SNaN is false,.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, EVT SubVecVT, SDValue Index) const
Get a pointer to a sub-vector of type SubVecVT at index Idx located in memory for a vector of type Ve...
virtual void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool isPositionIndependent() const
std::pair< StringRef, TargetLowering::ConstraintType > ConstraintPair
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const
Given an OpInfo with list of constraints codes as strings, return a sorted Vector of pairs of constra...
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, EVT WideVT, const SDValue LL, const SDValue LH, const SDValue RL, const SDValue RH, SDValue &Lo, SDValue &Hi) const
forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or brute force via a wide mul...
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
TargetLowering(const TargetLowering &)=delete
virtual bool shouldSimplifyDemandedVectorElts(SDValue Op, const TargetLoweringOpt &TLO) const
Return true if the target supports simplifying demanded vector elements by converting them to undefs.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, EVT DataVT, SelectionDAG &DAG, bool IsCompressedMemory) const
Increments memory address Addr according to the type of the value DataVT that should be stored.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, const SelectionDAG &DAG, unsigned Depth=0) const
Return true if vector Op has the same value across all DemandedElts, indicating any elements which ma...
SDValue expandRoundInexactToOdd(EVT ResultVT, SDValue Op, const SDLoc &DL, SelectionDAG &DAG) const
Truncate Op to ResultVT.
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const
Return if N is a True value when extended to VT.
bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const
Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
virtual const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const
This method returns the constant pool value that will be loaded by LD.
SDValue expandFP_ROUND(SDNode *Node, SelectionDAG &DAG) const
Expand round(fp) to fp conversion.
SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const
Try to convert the fminnum/fmaxnum to a compare/select sequence.
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, Register R, KnownBits &Known, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL=SDValue(), SDValue LH=SDValue(), SDValue RL=SDValue(), SDValue RH=SDValue()) const
Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, respectively,...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool isPositionIndependent() const
const Triple & getTargetTriple() const
TargetOptions Options
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< regclass_iterator > regclasses() const
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const
Return true if the given TargetRegisterClass has the ValueType T.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition: Triple.h:719
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:287
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2978
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:497
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:367
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:985
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:373
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition: ISDOpcodes.h:380
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:662
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1058
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:500
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:984
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:359
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:386
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:657
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
NodeType getExtForLoadExtType(bool IsFP, LoadExtType)
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isTrueWhenEqual(CondCode Cond)
Return true if the specified condition returns true if the two operands to the condition are equal.
Definition: ISDOpcodes.h:1587
unsigned getUnorderedFlavor(CondCode Cond)
This function returns 0 if the condition is always false if an operand is a NaN, 1 if the condition i...
Definition: ISDOpcodes.h:1592
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1562
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
FPClassTest invertFPClassTestIfSimpler(FPClassTest Test)
Evaluates if the specified FP class test is better performed as the inverse (i.e.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
auto find_if_not(R &&Range, UnaryPredicate P)
Definition: STLExtras.h:1754
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Definition: Utils.cpp:1585
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
APFloat neg(APFloat X)
Returns the negated value of the argument.
Definition: APFloat.h:1387
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:439
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Represent subnormal handling kind for floating point instruction inputs and outputs.
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
@ PositiveZero
Denormals are flushed to positive zero.
@ IEEE
IEEE-754 denormal numbers preserved.
constexpr bool inputsAreZero() const
Return true if input denormals must be implicitly treated as 0.
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
ConstraintPrefix Type
Type - The basic type of the constraint: input/output/clobber/label.
Definition: InlineAsm.h:126
int MatchingInput
MatchingInput - If this is not -1, this is an output constraint where an input constraint is required...
Definition: InlineAsm.h:136
ConstraintCodeVector Codes
Code - The constraint code, either the register name (in braces) or the constraint letter/number.
Definition: InlineAsm.h:154
SubConstraintInfoVector multipleAlternatives
multipleAlternatives - If there are multiple alternative constraints, this array will contain them.
Definition: InlineAsm.h:161
bool isIndirect
isIndirect - True if this operand is an indirect operand.
Definition: InlineAsm.h:150
bool hasMatchingInput() const
hasMatchingInput - Return true if this is an output constraint that has a matching input constraint.
Definition: InlineAsm.h:140
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned countMinSignBits() const
Returns the number of times the sign bit is replicated into the other bits.
Definition: KnownBits.h:251
static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smax(LHS, RHS).
Definition: KnownBits.cpp:208
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
bool hasConflict() const
Returns true if there is conflicting information.
Definition: KnownBits.h:47
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:542
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:285
KnownBits concat(const KnownBits &Lo) const
Concatenate the bits from Lo onto the bottom of *this.
Definition: KnownBits.h:229
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umax(LHS, RHS).
Definition: KnownBits.cpp:184
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits unionWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for either this or RHS or both.
Definition: KnownBits.h:317
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for smin(LHS, RHS).
Definition: KnownBits.cpp:221
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:508
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:548
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:524
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:528
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:101
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:777
KnownBits anyext(unsigned BitWidth) const
Return known bits for an "any" extension of the value we're tracking, where we don't know anything ab...
Definition: KnownBits.h:163
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:552
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:532
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:282
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:518
static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for umin(LHS, RHS).
Definition: KnownBits.cpp:202
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Magic data for optimising signed division by a constant.
static SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This contains information for each constraint that we are lowering.
MVT ConstraintVT
The ValueType for the operand value.
TargetLowering::ConstraintType ConstraintType
Information about the constraint code, e.g.
std::string ConstraintCode
This contains the actual string for the code, like "m".
Value * CallOperandVal
If this is the result output operand or a clobber, this is null, otherwise it is the incoming operand...
unsigned getMatchedOperand() const
If this is an input matching constraint, this method returns the output operand it matches.
bool isMatchingInputConstraint() const
Return true of this is an input operand that is a matching constraint like "4".
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
CallLoweringInfo & setChain(SDValue InChain)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setIsPostTypeLegalization(bool Value=true)
MakeLibCallOptions & setSExt(bool Value=true)
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...